Import Cobalt 11.62465

Change-Id: Ia510c39a570aace4d9feb784efa530cd0888a9f8
diff --git a/src/base/message_pump_ui_starboard.cc b/src/base/message_pump_ui_starboard.cc
index b3c696f..efecba0 100644
--- a/src/base/message_pump_ui_starboard.cc
+++ b/src/base/message_pump_ui_starboard.cc
@@ -119,6 +119,8 @@
     base::AutoLock auto_lock(outstanding_events_lock_);
     // Make sure any outstanding delayed event is canceled.
     CancelDelayedLocked();
+
+    TRACK_MEMORY_SCOPE("MessageLoop");
     outstanding_delayed_events_.insert(
         SbEventSchedule(&CallMessagePumpDelayed, this, delay.ToSbTime()));
   }
@@ -151,6 +153,7 @@
 }
 
 void MessagePumpUIStarboard::CancelDelayedLocked() {
+  TRACK_MEMORY_SCOPE("MessageLoop");
   outstanding_events_lock_.AssertAcquired();
   for (SbEventIdSet::iterator it = outstanding_delayed_events_.begin();
        it != outstanding_delayed_events_.end(); ++it) {
diff --git a/src/base/timer.cc b/src/base/timer.cc
index db670f1..b2b7954 100644
--- a/src/base/timer.cc
+++ b/src/base/timer.cc
@@ -225,12 +225,14 @@
   base::Closure task = user_task_;
 
   if (!is_repeating_) {
+    TRACK_MEMORY_SCOPE("MessageLoop");
     Stop();
     task.Run();
     return;
   }
 
   if (is_task_run_before_scheduling_next_) {
+    TRACK_MEMORY_SCOPE("MessageLoop");
     // Setup member variables and the next tasks before the current one runs as
     // we cannot access any member variables after calling task.Run().
     NewScheduledTaskInfo task_info = SetupNewScheduledTask(delay_);
@@ -243,6 +245,7 @@
       PostNewScheduledTask(task_info, delay_ - task_duration);
     }
   } else {
+    TRACK_MEMORY_SCOPE("MessageLoop");
     PostNewScheduledTask(delay_);
     task.Run();
   }
diff --git a/src/cobalt/CHANGELOG.md b/src/cobalt/CHANGELOG.md
index 085b9a2..1c6e3c6 100644
--- a/src/cobalt/CHANGELOG.md
+++ b/src/cobalt/CHANGELOG.md
@@ -4,8 +4,9 @@
 
 ## Version 10
 
-### Dummy example change
-Here we would put a description of a notable change that was made in Cobalt 10
-but did not make it into Cobalt 9.  This is currently the only item in a list
-that will expand to multiple items as we work on Cobalt.  When the first
-legitimate change is documented here, it should replace this example change.
+### AutoMem - Memory Configuration
+AutoMem has been added which assists developers in tuning the memory
+settings of the Cobalt app. On startup, memory settings are now printed
+for all builds except gold. Memory settings can be altered via the
+command line, build files and in some instances the Starboard API.
+*For more information, see [doc/memory_tuning.md](doc/memory_tuning.md)
diff --git a/src/cobalt/browser/browser.gyp b/src/cobalt/browser/browser.gyp
index 938907a..85a49b2 100644
--- a/src/cobalt/browser/browser.gyp
+++ b/src/cobalt/browser/browser.gyp
@@ -119,6 +119,7 @@
         '<(DEPTH)/cobalt/debug/debug.gyp:debug',
         '<(DEPTH)/cobalt/dom/dom.gyp:dom',
         '<(DEPTH)/cobalt/dom_parser/dom_parser.gyp:dom_parser',
+        '<(DEPTH)/cobalt/fetch/fetch.gyp:fetch',
         '<(DEPTH)/cobalt/h5vcc/h5vcc.gyp:h5vcc',
         '<(DEPTH)/cobalt/input/input.gyp:input',
         '<(DEPTH)/cobalt/layout/layout.gyp:layout',
diff --git a/src/cobalt/browser/browser_bindings_gen.gyp b/src/cobalt/browser/browser_bindings_gen.gyp
index 294a22c..b954983 100644
--- a/src/cobalt/browser/browser_bindings_gen.gyp
+++ b/src/cobalt/browser/browser_bindings_gen.gyp
@@ -148,6 +148,8 @@
         '../dom/xml_document.idl',
         '../dom/xml_serializer.idl',
 
+        '../fetch/fetch_internal.idl',
+
         '../h5vcc/dial/dial_http_request.idl',
         '../h5vcc/dial/dial_http_response.idl',
         '../h5vcc/dial/dial_server.idl',
diff --git a/src/cobalt/build/build.id b/src/cobalt/build/build.id
index 85e3cbf..8a3b29a 100644
--- a/src/cobalt/build/build.id
+++ b/src/cobalt/build/build.id
@@ -1 +1 @@
-61806
\ No newline at end of file
+62465
\ No newline at end of file
diff --git a/src/cobalt/doc/memory_tuning.md b/src/cobalt/doc/memory_tuning.md
new file mode 100644
index 0000000..1e9993e
--- /dev/null
+++ b/src/cobalt/doc/memory_tuning.md
@@ -0,0 +1,320 @@
+# Memory Tuning #
+
+Cobalt is designed to choose sensible parameters for memory-related options and
+parameters through a system called "AutoMem".
+
+On startup, AutoMem will print a memory table to the output console detailing
+the memory allocations that will be assigned to the various subsystems in
+cobalt.
+
+As an example, at the cost of performance you can reduce CPU memory on your
+platform by 5MB and GPU memory usage on your platform by 10MB using these
+command line flags:
+
+`cobalt --reduce_cpu_memory_by=5MB --reduce_gpu_memory_by=10MB`
+
+Some settings will be "fixed" while others will be "flexible" so that their
+memory consumption will scale down for memory constrained platforms.
+
+Read on for more information.
+
+### Memory Settings Table ###
+
+A table similar to the one below, will be printed on startup.
+
+~~~
+AutoMem:
+
+ SETTING NAME                           VALUE                    TYPE   SOURCE
+ ________________________________________________________________________________
+|                                      |             |          |      |         |
+| image_cache_size_in_bytes            |    33554432 |  32.0 MB |  GPU | AutoSet |
+|______________________________________|_____________|__________|______|_________|
+|                                      |             |          |      |         |
+| javascript_gc_threshold_in_bytes     |     8388608 |   8.0 MB |  CPU |   Build |
+|______________________________________|_____________|__________|______|_________|
+|                                      |             |          |      |         |
+| misc_cobalt_cpu_size_in_bytes        |   124780544 | 119.0 MB |  CPU | AutoSet |
+|______________________________________|_____________|__________|______|_________|
+|                                      |             |          |      |         |
+| misc_cobalt_gpu_size_in_bytes        |    25165824 |  24.0 MB |  GPU | AutoSet |
+|______________________________________|_____________|__________|______|_________|
+|                                      |             |          |      |         |
+| remote_typeface_cache_size_in_bytes  |     4194304 |   4.0 MB |  CPU |   Build |
+|______________________________________|_____________|__________|______|_________|
+|                                      |             |          |      |         |
+| skia_atlas_texture_dimensions        | 4096x8192x2 |  64.0 MB |  GPU |   Build |
+|______________________________________|_____________|__________|______|_________|
+|                                      |             |          |      |         |
+| skia_cache_size_in_bytes             |     4194304 |   4.0 MB |  GPU |   Build |
+|______________________________________|_____________|__________|______|_________|
+|                                      |             |          |      |         |
+| software_surface_cache_size_in_bytes |         N/A |      N/A |  N/A |     N/A |
+|______________________________________|_____________|__________|______|_________|
+
+~~~
+This table shows the breakdown of how much memory is being allocated to each
+sub-system, the type, and where it came from.
+
+**SETTING NAME:** This is the name of the memory setting. If a setting can be
+manually set through the command line or the build system, then it will be
+accessible by using this name. For example adding the command line argument
+`--image_cache_size_in_bytes=25165824` will manually set the Image Cache Size to
+24 megabytes. Also note that this is also equivalent:
+`--image_cache_size_in_bytes=24MB`. Note that the numerical value can include
+the suffix kb/mb/gb to specify kilo/mega/giga-bytes. The numerical value can
+be a floating point value. For example `--image_cache_size_in_bytes=.1GB` is
+equivalent to `--image_cache_size_in_bytes=100MB`.
+
+**VALUE:** This two column value has a first setting that describes what the
+actual value is, and the second column is the amount of memory that the setting
+consumes. This first setting gives hints on what kind of values the
+setting can be set to via the command line. For example,
+`skia_atlas_texture_dimensions` accepts texture sizes on the command line, such
+as: `--skia_atlas_texture_dimensions=2048x4096x2`
+
+**TYPE:** This specifies whether the setting consumes GPU or CPU memory.
+For example, the Image Cache will decode images to buffers to the GPU memory
+and therefore it is the classified as the GPU memory type.
+
+**SOURCE:** This specifies where the memory setting came from. It will either
+be set from a specific place or automatically generated from Cobalt.
+  * Values for **SOURCE**:
+    * `Starboard API`
+      * The value used was reported by the result of a Starboard API function call.
+      * Example: `SbSystemGetUsedCPUMemory()`
+    * `Build`
+      * Specified by the platform specific `*.gyp(i)` build file.
+      * For example: see `image_cache_size_in_bytes` in [`build/config/base.gypi`](../build/config/base.gypi)
+    * `CmdLine`
+      * Read the memory setting value from the command line.
+      * For example: `cobalt --image_cache_size_in_bytes=24MB`.
+    * `AutoSet`
+      * No value was specified and therefore Cobalt calculated the default value
+	    automatically based on system parameters. For example many caches
+		will be chosen proportionally to the size of the UI resolution.
+    * `AutoSet (Constrained)`
+      * This value was AutoSet to a default value, but then was reduced in
+      response to `max_cobalt_cpu_usage` or `max_cobalt_gpu_usage being` set too low.
+      This will also trigger in response to `reduce_cpu_memory_by` or
+      `reduce_cpu_memory_by` being set. See "Memory Scaling" section below.
+
+### Maximum Memory Table ###
+
+This second table is also printed at startup and details the sum of memory and
+maximum memory limits as reported by cobalt.
+
+~~~
+ MEMORY                 SOURCE          TOTAL      SETTINGS CONSUME
+ ____________________________________________________________________
+|                      |               |          |                  |
+| max_cobalt_cpu_usage | Starboard API | 256.0 MB |         131.0 MB |
+|______________________|_______________|__________|__________________|
+|                      |               |          |                  |
+| max_cobalt_gpu_usage | Starboard API | 768.0 MB |         124.0 MB |
+|______________________|_______________|__________|__________________|
+~~~
+
+This table shows the limits for CPU and GPU memory consumption and also how
+much memory is being consumed for each memory type.
+
+**MEMORY**: This is the name of the memory limit. If you want to change this
+setting manually then use the name on the command line. For example
+`--max_cobalt_cpu_usage=150MB` will set Cobalt to 150MB limit for CPU
+memory. If the sum of CPU memory exceeds this limit then memory settings of the
+same type will reduce their memory usage.
+
+**SOURCE**: This value indicates where the value came from.
+ * `Starboard API`
+   * `max_cobalt_cpu_usage`: This value was found from SbSystemGetTotalCPUMemory().
+   * `max_cobalt_gpu_usage`: This value was found from SbSystemGetTotalGPUMemory().
+ * `CmdLine`
+   * `max_cobalt_cpu_usage`: --max_cobalt_cpu_usage was used as a command argument.
+   * `max_cobalt_gpu_usage`: --max_cobalt_gpu_usage was used as a command argument.
+ * `Build`
+   * `max_cobalt_cpu_usage`: max_cobalt_cpu_usage was specified in a platform gyp file.
+   * `max_cobalt_gpu_usage`: max_cobalt_gpu_usage was specified in a platform gyp file.
+
+**TOTAL**: Represents the maximum available memory for settings. This value
+came from **SOURCE**.
+
+**SETTINGS CONSUME**: This value indicates the consumption of memory for the
+current memory type.
+
+For `max_cobalt_cpu_usage`, `Starboard API` indicates that this value came from
+`SbSystemGetTotalCPUMemory()`  If this source value is `Starboard API` then this
+value came from `SbSystemGetTotalCPUMemory()` (for CPU) or
+`SbSystemGetTotalGPUMemory()` for GPU).
+
+If the available memory for the Cobalt is less than the amount of memory
+consumed by the settings, then any settings that are AutoSet AND adjustable
+will reduce their memory consumption. When this happens, look for the string
+*`AutoSet (Constrained)`* in the first table.
+
+## Setting Maximum Memory Values ##
+
+The max cpu and gpu memory of the system can be set either by command line or
+by modifying the gyp build file.
+
+Command Line:
+  * `--max_cobalt_cpu_usage=160MB`
+  * `--max_cobalt_gpu_usage=160MB`
+
+Build settings:
+  * `starboard/<PLATFORM>/gyp_configuration.gypi`
+    * `max_cobalt_cpu_usage`
+    * `max_cobalt_gpu_usage`
+
+Command Line settings will override build settings.
+
+### Memory Scaling ###
+
+There are two primary ways in which the memory consumption settings will scale down.
+One is by specifying `--max_cobalt_cpu_usage` (or `max_cobalt_gpu_usage`) to a
+particular value (e.g. `--max_cobalt_cpu_usage=160MB`).
+
+`--max_cobalt_cpu_usage` (and `--max_cobalt_gpu_usage`) will trigger the memory
+to scale down whenever the memory settings memory consumption exceed the maximum
+**TOTAL** value. The memory settings will be scaled down until their consumption is
+less than or equal the maximum allowed value **TOTAL**. See also **SETTINGS CONSUME**.
+
+Another way to scale down the memory size is by passing the flags
+`--reduce_cpu_memory_by=XX` and `--reduce_gpu_memory_by=XX` which will:
+1) Ignore the `--max_cobalt_cpu_usage` and `--max_cobalt_gpu_usage`.
+2) Use the current memory consumption of the settings and then reduce that by
+   the amount.
+
+For example, if cobalt uses 160MB of CPU memory then passing in
+`--reduce_cpu_memory_by=10MB` to the command line will attempt to reduce the
+footprint of cobalt by 10MB to 150MB. Note that this reduction is an an attempt,
+and it's possible this attempt will fail if the memory reduction is too aggressive
+or if memory settings have been explicitly set via the build or command line.
+
+*Forcing a Memory Setting to be flexible*
+
+If a memory setting is set via a build setting, then it's possible to make it
+flexible via the command line by setting the value to "autoset". For example,
+ `--image_cache_size_in_bytes=auto` will allow `image_cache_size_in_bytes` to be
+flexible by disabling the value being set by a build setting.
+
+### Memory Warnings ###
+
+Cobalt will periodically check to see if the memory consumed by the application
+is less than the `--max_cobalt_cpu_usage` and `--max_cobalt_gpu_usage` amount.
+If the cpu/gpu exceeds this maximum value then an error message will be logged
+once to stdout for cpu and/or gpu memory systems.
+
+
+### Example 1 - Configuring for a memory restricted platform ###
+
+Let's say that we are configuring platform called "XXX":
+
+We will configure XXX such that:
+  * `image_cache_size_in_bytes` will be set to 32MB in the build settings.
+  * `skia_atlas_texture_dimensions` will be set to `2048x2048x2` in the build settings.
+  * `max_cobalt_cpu_usage` will be set to 160MB on the command line.
+
+**Configuring `image_cache_size_in_bytes` to be 32MB:**
+  * in `starboard\<PLATFORM>\gyp_configuration.gypi`
+    * add `'image_cache_size_in_bytes': 32 * 1024 * 1024,`
+
+**Configuring `skia_atlas_texture_dimensions` to be 2048x2048x2:**
+
+  * in `src\starboard\XXX\gyp_configuration.gypi`
+    * add `'skia_glyph_atlas_width': '2048'`
+    * add `'skia_glyph_atlas_height': '2048'`
+    * (note that the third dimension is assumed)
+
+**Configuring `max_cobalt_cpu_usage` to be 160MB:**
+
+  * `cobalt --max_cobalt_cpu_usage=160MB`
+
+### Example 2 - Configuring for a memory-plentiful platform ###
+
+The following command line will give a lot of memory to image cache and give
+500MB to `max_cobalt_cpu_usage` and `max_cobalt_gpu_usage`.
+
+~~~
+cobalt --max_cobalt_cpu_usage=500MB --max_cobalt_gpu_usage=500MB
+--image_cache_size_in_bytes=80MB
+~~~
+
+## API Reference ##
+
+#### Memory System API ####
+
+  * `max_cobalt_cpu_usage`
+    * This setting will set the maximum cpu memory that the app will consume.
+      CPU Memory settings will scale down their consumption in order to stay under
+      the `max_cobalt_cpu_usage`. If memory consumption exceeds this value during
+      runtime then a memory warning will be printed to stdout.
+    * Set via command line or else build system or else starboard.
+      * starboard value will bind to `SbSystemGetTotalCPUMemory()`.
+  * `max_cobalt_gpu_usage`
+    * This setting will set the maximum gpu memory that the app will consume.
+      GPU Memory settings will scale down their consumption in order to stay under
+      the `max_cobalt_gpu_usage`. If memory consumption exceeds this value during
+      runtime then a memory warning will be printed to stdout.
+    * Set via command line or else build system or else starboard.
+      * starboard value will bind to `SbSystemGetTotalGPUMemory()`.
+    * Note that `SbSystemGetTotalGPUMemory()` is optional. If no value exists
+      for `max_cobalt_gpu_usage` in build/commandline/starboard settings then no
+      GPU memory checking is performed.
+  * `reduce_cpu_memory_by`
+    * This setting will trigger CPU memory consumption to be reduced by the amount
+      specified. *This overrides the memory scaling behavior of `max_cobalt_cpu_usage`*.
+      But this will not affect memory checking of `max_cobalt_cpu_usage` otherwise.
+    * Set via command line or else the platform gyp build file.
+  * `reduce_cpu_memory_by`
+    * This setting will trigger GPU memory consumption to be reduced by the amount
+      specified. *This overrides the memory scaling behavior of `max_cobalt_gpu_usage`*.
+      But this will not affect memory checking of `max_cobalt_gpu_usage` otherwise.
+    * Set via command line or else the platform gyp build file.
+
+#### Memory Setting API ####
+
+  * `image_cache_size_in_bytes`
+    * See documentation *Image cache capacity* in `performance_tuning.md` for what
+      this setting does.
+    * Set via command line, or else build system, or else automatically by Cobalt.
+  * `javascript_gc_threshold_in_bytes`
+    * See documentation *Garbage collection trigger threshold* in `performance_tuning.md`
+      for what this setting does.
+    * Set via command line, or else build system, or else automatically by Cobalt.
+  * `remote_typeface_cache_size_in_bytes`
+    * Determines the capacity of the remote typefaces cache which manages all typefaces
+      downloaded from a web page.
+    * Set via command line, or else build system, or else automatically by Cobalt.
+  * `skia_atlas_texture_dimensions`
+    * Determines the size in pixels of the glyph atlas where rendered glyphs are
+      cached. The resulting memory usage is 2 bytes of GPU memory per pixel.
+      When a value is used that is too small, thrashing may occur that will
+      result in visible stutter. Such thrashing is more likely to occur when CJK
+      language glyphs are rendered and when the size of the glyphs in pixels is
+      larger, such as for higher resolution displays.
+      The negative default values indicates to the Cobalt that these settings
+      should be automatically set.
+    * Set via command line, or else build system, or else automatically by Cobalt.
+    * Note that in the gyp build system, this setting is represented as two values:
+      * `skia_glyph_atlas_width` and
+      * `skia_glyph_atlas_height`
+  * `skia_cache_size_in_bytes`
+    * See documentation *Glyph atlas size* in `performance_tuning.md` for what this
+      setting does.
+    * Set via command line, or else build system or else automatically by Cobalt.
+  * `software_surface_cache_size_in_bytes`
+    * See documentation *Scratch Surface cache capacity* in `performance_tuning.md`
+      for what this setting does.
+    * Set via command line, or else build system, or else automatically by Cobalt.
+
+#### Units for Command Line Settings ####
+
+Memory values passed into Cobalt via command line arguments support units such
+kb, mb, and gb for kilo-byte, megabyte, gigabytes. These units are case insensitive.
+
+For example, these are all equivalent on the command line:
+
+`--image_cache_size_in_bytes=67108864`
+`--image_cache_size_in_bytes=65536kb`
+`--image_cache_size_in_bytes=64mb`
+`--image_cache_size_in_bytes=.0625gb`
diff --git a/src/cobalt/fetch/embedded_scripts/fetch.js b/src/cobalt/fetch/embedded_scripts/fetch.js
index 415739b..48db9cd 100644
--- a/src/cobalt/fetch/embedded_scripts/fetch.js
+++ b/src/cobalt/fetch/embedded_scripts/fetch.js
@@ -1,14 +1,14 @@
-'use strict';(function(f){function n(a){"string"!==typeof a&&(a=String(a));if(/[^a-z0-9\-#$%&'*+.\^_`|~]/i.test(a))throw new h("Invalid character in header field name");return a.toLowerCase()}function w(a){"string"!==typeof a&&(a=String(a));var b;var c=0;for(b=a.length;c<b;c++){var d=a.charCodeAt(c);if(9!==d&&10!==d&&13!==d&&32!==d)break}for(b=a.length-1;b>c&&(d=a.charCodeAt(b),9===d||10===d||13===d||32===d);b--);a=a.substring(c,b+1);c=0;for(b=a.length;c<b;c++)if(d=a.charCodeAt(c),256<=d||0===d||
-10===d||13===d)throw new h("Invalid character in header field value");return a}function e(a){this.map=new q;if(void 0!==a){if(null===a||"object"!==typeof a)throw new h("Constructing Headers with invalid parameters");a instanceof e?a.forEach(function(a,c){this.append(c,a)},this):m.isArray(a)?a.forEach(function(a){if(2!==a.length)throw new h("Constructing Headers with invalid parameters");this.append(a[0],a[1])},this):Object.getOwnPropertyNames(a).forEach(function(b){this.append(b,a[b])},this)}}function r(a){if(a.bodyUsed)return t(new h("Body already read"));
-if(null===a.body)return A(new k(0));if(B(a.body))return t(new h("ReadableStream already locked"));var b=a.body.getReader(),c=[],d=0;return b.read().then(function C(a){if(a.done){if(0===c.length)a=new k(0);else if(1===c.length)a=new k(c[0]);else{a=new k(d);for(var f=0,e=c.length,g=0;f<e;f++)a.set(c[f],g),g+=c[f].length}return a}return a.value instanceof k?(d+=a.value.length,c.push(a.value),b.read().then(C)):t(new h("Invalid stream read value type"))})}function D(a){a=unescape(encodeURIComponent(a));
-for(var b=new k(a.length),c=0,d=a.length;c<d;c++)b[c]=a.charCodeAt(c);return b}function x(){this._initBody=function(a){this._bodyUsed=!1;this.body=null===a||void 0===a?null:a instanceof u?a:new u({start:function(b){if(a)if("string"===typeof a)b.enqueue(D(a));else if(y.prototype.isPrototypeOf(a))b.enqueue(new k(a));else if(E(a))b.enqueue(new k(a.buffer));else throw new h("Unsupported BodyInit type");b.close()}});this.headers.get("content-type")||"string"===typeof a&&this.headers.set("content-type",
-"text/plain;charset=UTF-8")};Object.defineProperty(this,"bodyUsed",{get:function(){return this._bodyUsed?!0:this.body?!!F(this.body):!1}});this.arrayBuffer=function(){return r(this).then(function(a){return a.buffer})};this.text=function(){return r(this).then(function(a){return 0===a.length?"":decodeURIComponent(escape(String.fromCharCode.apply(null,a)))})};this.json=function(){return this.text().then(JSON.parse)};return this}function p(a,b){b=b||{};var c=b.body;if(a instanceof p){if(a.bodyUsed)throw new h("Request body already read");
-this.url=a.url;this.credentials=a.credentials;b.headers||(this.headers=new e(a.headers));this.method=a.method;this.mode=a.mode;c||null===a.body||(c=a.body,a._bodyUsed=!0)}else this.url=String(a);this.credentials=b.credentials||this.credentials||"omit";if(b.headers||!this.headers)this.headers=new e(b.headers);a=b.method||this.method||"GET";var d=a.toUpperCase();this.method=-1<G.indexOf(d)?d:a;this.mode=b.mode||this.mode||null;this.referrer=null;if(("GET"===this.method||"HEAD"===this.method)&&c)throw new h("Body not allowed for GET or HEAD requests");
-this._initBody(c)}function H(a){var b=new e;a.replace(/\r?\n[\t ]+/g," ").split(/\r?\n/).forEach(function(a){var c=a.split(":");if(a=c.shift().trim())c=c.join(":").trim(),b.append(a,c)});return b}function l(a,b){b||(b={});this.type="default";this.status="status"in b?b.status:200;this.ok=200<=this.status&&300>this.status;this.statusText="statusText"in b?b.statusText:"OK";this.headers=new e(b.headers);this.url=b.url||"";this._initBody(a)}if(!f.fetch){var m=f.Array,y=f.ArrayBuffer,I=f.Symbol.iterator,
-q=f.Map,J=f.RangeError,h=f.TypeError,k=f.Uint8Array,v=f.Promise,t=v.reject,A=v.resolve,u=f.ReadableStream,z=f.ReadableStreamTee,F=f.IsReadableStreamDisturbed,B=f.IsReadableStreamLocked,K="[object Int8Array];[object Uint8Array];[object Uint8ClampedArray];[object Int16Array];[object Uint16Array];[object Int32Array];[object Uint32Array];[object Float32Array];[object Float64Array]".split(";"),E=y.isView||function(a){return a&&-1<K.indexOf(Object.prototype.toString.call(a))};e.prototype.append=function(a,
-b){if(2!==arguments.length)throw h("Invalid parameters to append");a=n(a);b=w(b);this.map.has(a)?this.map.set(a,this.map.get(a)+", "+b):this.map.set(a,b)};e.prototype["delete"]=function(a){if(1!==arguments.length)throw h("Invalid parameters to delete");this.map.delete(n(a))};e.prototype.get=function(a){if(1!==arguments.length)throw h("Invalid parameters to get");a=n(a);var b=this.map.get(a);return void 0!==b?b:null};e.prototype.has=function(a){if(1!==arguments.length)throw h("Invalid parameters to has");
-return this.map.has(n(a))};e.prototype.set=function(a,b){if(2!==arguments.length)throw h("Invalid parameters to set");this.map.set(n(a),w(b))};e.prototype.forEach=function(a,b){var c=this;m.from(this.map.entries()).sort().forEach(function(d){a.call(b,d[1],d[0],c)})};e.prototype.keys=function(){return(new q(m.from(this.map.entries()).sort())).keys()};e.prototype.values=function(){return(new q(m.from(this.map.entries()).sort())).values()};e.prototype.entries=function(){return(new q(m.from(this.map.entries()).sort())).entries()};
-e.prototype[I]=e.prototype.entries;var G="DELETE GET HEAD OPTIONS POST PUT".split(" ");p.prototype.clone=function(){var a=null;null!==this.body&&(a=z(this.body,!0),this.body=a[0],a=a[1]);return new p(this,{body:a})};x.call(p.prototype);x.call(l.prototype);l.prototype.clone=function(){var a=null;null!==this.body&&(a=z(this.body,!0),this.body=a[0],a=a[1]);return new l(a,{status:this.status,statusText:this.statusText,headers:new e(this.headers),url:this.url})};l.error=function(){var a=new l(null,{status:0,
-statusText:""});a.type="error";return a};var L=[301,302,303,307,308];l.redirect=function(a,b){if(-1===L.indexOf(b))throw new J("Invalid status code");return new l(null,{status:b,headers:{location:a}})};f.Headers=e;f.Request=p;f.Response=l;f.fetch=function(a,b){return new v(function(c,d){var f=!1,e=new p(a,b),g=new XMLHttpRequest,k=null,n=new u({start:function(a){k=a},cancel:function(a){f=!0;g.abort()}});g.onload=function(){k.close()};g.onreadystatechange=function(){if(g.readyState===g.HEADERS_RECEIVED){var a=
-{status:g.status,statusText:g.statusText,headers:H(g.getAllResponseHeaders()||"")};a.url="responseURL"in g?g.responseURL:a.headers.get("X-Request-URL");c(new l(n,a))}};g.onerror=function(){k.error(new h("Network request failed"));d(new h("Network request failed"))};g.ontimeout=function(){k.error(new h("Network request failed"));d(new h("Network request failed"))};g.open(e.method,e.url,!0);"include"===e.credentials&&(g.withCredentials=!0);e.headers.forEach(function(a,b){g.setRequestHeader(b,a)});var m=
-function(a){f||k.enqueue(a)};null===e.body?g.fetch(m,null):r(e).then(function(a){g.fetch(m,a)})})};f.fetch.polyfill=!0}})(this);
\ No newline at end of file
+'use strict';(function(e){function n(a){"string"!==typeof a&&(a=String(a));if(/[^a-z0-9\-#$%&'*+.\^_`|~]/i.test(a))throw new g("Invalid character in header field name");return a.toLowerCase()}function x(a){"string"!==typeof a&&(a=String(a));var b;var c=0;for(b=a.length;c<b;c++){var d=a.charCodeAt(c);if(9!==d&&10!==d&&13!==d&&32!==d)break}for(b=a.length-1;b>c&&(d=a.charCodeAt(b),9===d||10===d||13===d||32===d);b--);a=a.substring(c,b+1);c=0;for(b=a.length;c<b;c++)if(d=a.charCodeAt(c),256<=d||0===d||
+10===d||13===d)throw new g("Invalid character in header field value");return a}function f(a){this.map=new r;if(void 0!==a){if(null===a||"object"!==typeof a)throw new g("Constructing Headers with invalid parameters");a instanceof f?a.forEach(function(a,c){this.append(c,a)},this):m.isArray(a)?a.forEach(function(a){if(2!==a.length)throw new g("Constructing Headers with invalid parameters");this.append(a[0],a[1])},this):Object.getOwnPropertyNames(a).forEach(function(b){this.append(b,a[b])},this)}}function t(a){if(a.bodyUsed)return u(new g("Body already read"));
+if(null===a.body)return C(new k(0));if(D(a.body))return u(new g("ReadableStream already locked"));var b=a.body.getReader(),c=[],d=0;return b.read().then(function p(a){if(a.done){if(0===c.length)a=new k(0);else if(1===c.length)a=new k(c[0]);else{a=new k(d);for(var e=0,f=c.length,h=0;e<f;e++)a.set(c[e],h),h+=c[e].length}return a}return a.value instanceof k?(d+=a.value.length,c.push(a.value),b.read().then(p)):u(new g("Invalid stream read value type"))})}function E(a){a=unescape(encodeURIComponent(a));
+for(var b=new k(a.length),c=0,d=a.length;c<d;c++)b[c]=a.charCodeAt(c);return b}function y(){this._initBody=function(a){this._bodyUsed=!1;this.body=null===a||void 0===a?null:a instanceof v?a:new v({start:function(b){if(a)if("string"===typeof a)b.enqueue(E(a));else if(z.prototype.isPrototypeOf(a))b.enqueue(new k(a));else if(F(a))b.enqueue(new k(a.buffer));else throw new g("Unsupported BodyInit type");b.close()}});this.headers.get("content-type")||"string"===typeof a&&this.headers.set("content-type",
+"text/plain;charset=UTF-8")};Object.defineProperty(this,"bodyUsed",{get:function(){return this._bodyUsed?!0:this.body?!!G(this.body):!1}});this.arrayBuffer=function(){return t(this).then(function(a){return a.buffer})};this.text=function(){return t(this).then(function(a){return 0===a.length?"":decodeURIComponent(escape(String.fromCharCode.apply(null,a)))})};this.json=function(){return this.text().then(JSON.parse)};return this}function q(a,b){b=b||{};var c=b.body;if(a instanceof q){if(a.bodyUsed)throw new g("Request body already read");
+this.url=a.url;this.credentials=a.credentials;b.headers||(this.headers=new f(a.headers));this.method=a.method;this.mode=a.mode;c||null===a.body||(c=a.body,a._bodyUsed=!0)}else this.url=String(a);this.credentials=b.credentials||this.credentials||"omit";if(b.headers||!this.headers)this.headers=new f(b.headers);a=b.method||this.method||"GET";var d=a.toUpperCase();this.method=-1<H.indexOf(d)?d:a;this.mode=b.mode||this.mode||null;this.referrer=null;if(("GET"===this.method||"HEAD"===this.method)&&c)throw new g("Body not allowed for GET or HEAD requests");
+this._initBody(c)}function I(a){var b=new f;a.replace(/\r?\n[\t ]+/g," ").split(/\r?\n/).forEach(function(a){var c=a.split(":");if(a=c.shift().trim())c=c.join(":").trim(),b.append(a,c)});return b}function l(a,b){b||(b={});this.type="default";this.status="status"in b?b.status:200;if(200>this.status||599<this.status)throw new A("Invalid response status");this.ok=200<=this.status&&300>this.status;if("statusText"in b){var c=b.statusText;for(var d=0,e=c.length,p;d<e;d++)if(p=c.charCodeAt(d),9!==p&&(32>
+p||255<p||127===p))throw g("Invalid status text");}else c="OK";this.statusText=c;this.headers=new f(b.headers);this.url=b.url||"";if(a&&-1<J.indexOf(this.status))throw new g("Body not allowed with a null body status");this._initBody(a)}if(!e.fetch){var m=e.Array,z=e.ArrayBuffer,K=e.Symbol.iterator,r=e.Map,A=e.RangeError,g=e.TypeError,k=e.Uint8Array,w=e.Promise,u=w.reject,C=w.resolve,v=e.ReadableStream,B=e.ReadableStreamTee,G=e.IsReadableStreamDisturbed,D=e.IsReadableStreamLocked,L="[object Int8Array];[object Uint8Array];[object Uint8ClampedArray];[object Int16Array];[object Uint16Array];[object Int32Array];[object Uint32Array];[object Float32Array];[object Float64Array]".split(";"),
+F=z.isView||function(a){return a&&-1<L.indexOf(Object.prototype.toString.call(a))};f.prototype.append=function(a,b){if(2!==arguments.length)throw g("Invalid parameters to append");a=n(a);b=x(b);this.map.has(a)?this.map.set(a,this.map.get(a)+", "+b):this.map.set(a,b)};f.prototype["delete"]=function(a){if(1!==arguments.length)throw g("Invalid parameters to delete");this.map.delete(n(a))};f.prototype.get=function(a){if(1!==arguments.length)throw g("Invalid parameters to get");a=n(a);var b=this.map.get(a);
+return void 0!==b?b:null};f.prototype.has=function(a){if(1!==arguments.length)throw g("Invalid parameters to has");return this.map.has(n(a))};f.prototype.set=function(a,b){if(2!==arguments.length)throw g("Invalid parameters to set");this.map.set(n(a),x(b))};f.prototype.forEach=function(a,b){var c=this;m.from(this.map.entries()).sort().forEach(function(d){a.call(b,d[1],d[0],c)})};f.prototype.keys=function(){return(new r(m.from(this.map.entries()).sort())).keys()};f.prototype.values=function(){return(new r(m.from(this.map.entries()).sort())).values()};
+f.prototype.entries=function(){return(new r(m.from(this.map.entries()).sort())).entries()};f.prototype[K]=f.prototype.entries;var H="DELETE GET HEAD OPTIONS POST PUT".split(" ");q.prototype.clone=function(){var a=null;null!==this.body&&(a=B(this.body,!0),this.body=a[0],a=a[1]);return new q(this,{body:a})};y.call(q.prototype);var J=[101,204,205,304];y.call(l.prototype);l.prototype.clone=function(){var a=null;null!==this.body&&(a=B(this.body,!0),this.body=a[0],a=a[1]);return new l(a,{status:this.status,
+statusText:this.statusText,headers:new f(this.headers),url:this.url})};l.error=function(){var a=new l(null);a.type="error";a.status=0;a.statusText="";return a};var M=[301,302,303,307,308];l.redirect=function(a,b){if(!FetchInternal.IsUrlValid(a))throw new g("Invalid URL");void 0===b&&(b=302);if(-1===M.indexOf(b))throw new A("Invalid status code");return new l(null,{status:b,headers:{location:a}})};e.Headers=f;e.Request=q;e.Response=l;e.fetch=function(a,b){return new w(function(c,d){var e=!1,f=new q(a,
+b),h=new XMLHttpRequest,k=null,n=new v({start:function(a){k=a},cancel:function(a){e=!0;h.abort()}});h.onload=function(){k.close()};h.onreadystatechange=function(){if(h.readyState===h.HEADERS_RECEIVED){var a={status:h.status,statusText:h.statusText,headers:I(h.getAllResponseHeaders()||"")};a.url="responseURL"in h?h.responseURL:a.headers.get("X-Request-URL");c(new l(n,a))}};h.onerror=function(){k.error(new g("Network request failed"));d(new g("Network request failed"))};h.ontimeout=function(){k.error(new g("Network request failed"));
+d(new g("Network request failed"))};h.open(f.method,f.url,!0);"include"===f.credentials&&(h.withCredentials=!0);f.headers.forEach(function(a,b){h.setRequestHeader(b,a)});var m=function(a){e||k.enqueue(a)};null===f.body?h.fetch(m,null):t(f).then(function(a){h.fetch(m,a)})})};e.fetch.polyfill=!0}})(this);
\ No newline at end of file
diff --git a/src/cobalt/fetch/fetch.gyp b/src/cobalt/fetch/fetch.gyp
new file mode 100644
index 0000000..263b4be
--- /dev/null
+++ b/src/cobalt/fetch/fetch.gyp
@@ -0,0 +1,31 @@
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{
+  'targets': [
+    {
+      'target_name': 'fetch',
+      'type': 'static_library',
+      'sources': [
+        'fetch_internal.cc',
+        'fetch_internal.h',
+      ],
+      'dependencies': [
+        '<(DEPTH)/cobalt/script/engine.gyp:engine',
+        '<(DEPTH)/cobalt/script/script.gyp:script',
+        '<(DEPTH)/googleurl/googleurl.gyp:googleurl',
+      ],
+    },
+  ],
+}
diff --git a/src/cobalt/fetch/fetch.js b/src/cobalt/fetch/fetch.js
index 9201ff8..6248427 100644
--- a/src/cobalt/fetch/fetch.js
+++ b/src/cobalt/fetch/fetch.js
@@ -35,28 +35,28 @@
     return
   }
 
-  var Array = self.Array
-  var ArrayBuffer = self.ArrayBuffer
-  var Error = self.Error
-  var Symbol_iterator = self.Symbol.iterator
-  var Map = self.Map
-  var RangeError = self.RangeError
-  var TypeError = self.TypeError
-  var Uint8Array = self.Uint8Array
+  const Array = self.Array
+  const ArrayBuffer = self.ArrayBuffer
+  const Error = self.Error
+  const Symbol_iterator = self.Symbol.iterator
+  const Map = self.Map
+  const RangeError = self.RangeError
+  const TypeError = self.TypeError
+  const Uint8Array = self.Uint8Array
 
-  var Promise = self.Promise
-  var Promise_reject = Promise.reject
-  var Promise_resolve = Promise.resolve
+  const Promise = self.Promise
+  const Promise_reject = Promise.reject
+  const Promise_resolve = Promise.resolve
 
-  var ReadableStream = self.ReadableStream
-  var ReadableStreamTee = self.ReadableStreamTee
-  var IsReadableStreamDisturbed = self.IsReadableStreamDisturbed
-  var IsReadableStreamLocked = self.IsReadableStreamLocked
+  const ReadableStream = self.ReadableStream
+  const ReadableStreamTee = self.ReadableStreamTee
+  const IsReadableStreamDisturbed = self.IsReadableStreamDisturbed
+  const IsReadableStreamLocked = self.IsReadableStreamLocked
 
-  var err_InvalidHeadersInit = 'Constructing Headers with invalid parameters'
-  var err_NetworkRequestFailed = 'Network request failed'
+  const err_InvalidHeadersInit = 'Constructing Headers with invalid parameters'
+  const err_NetworkRequestFailed = 'Network request failed'
 
-  var viewClasses = [
+  const viewClasses = [
     '[object Int8Array]',
     '[object Uint8Array]',
     '[object Uint8ClampedArray]',
@@ -348,7 +348,7 @@
   }
 
   // HTTP methods whose capitalization should be normalized
-  var methods = ['DELETE', 'GET', 'HEAD', 'OPTIONS', 'POST', 'PUT']
+  const methods = ['DELETE', 'GET', 'HEAD', 'OPTIONS', 'POST', 'PUT']
 
   function normalizeMethod(method) {
     var upcased = method.toUpperCase()
@@ -422,6 +422,21 @@
 
   Body.call(Request.prototype)
 
+  // Status text must be a reason-phrase token.
+  // https://tools.ietf.org/html/rfc7230#section-3.1.2
+  function parseStatusText(text) {
+    for (var i = 0, len = text.length, c; i < len; i++) {
+      c = text.charCodeAt(i)
+      if (c !== 9 && (c < 32 || c > 255 || c === 127)) {
+        throw TypeError('Invalid status text')
+      }
+    }
+    return text
+  }
+
+  // Body is not allowed in responses with a null body status.
+  const nullBodyStatuses = [ 101, 204, 205, 304 ]
+
   // https://fetch.spec.whatwg.org/#response-class
   function Response(body, init) {
     if (!init) {
@@ -430,10 +445,16 @@
 
     this.type = 'default'
     this.status = 'status' in init ? init.status : 200
+    if (this.status < 200 || this.status > 599) {
+      throw new RangeError('Invalid response status')
+    }
     this.ok = this.status >= 200 && this.status < 300
-    this.statusText = 'statusText' in init ? init.statusText : 'OK'
+    this.statusText = 'statusText' in init ? parseStatusText(init.statusText) : 'OK'
     this.headers = new Headers(init.headers)
     this.url = init.url || ''
+    if (body && nullBodyStatuses.indexOf(this.status) > -1) {
+      throw new TypeError('Body not allowed with a null body status')
+    }
     this._initBody(body)
   }
 
@@ -455,14 +476,22 @@
   }
 
   Response.error = function() {
-    var response = new Response(null, {status: 0, statusText: ''})
+    var response = new Response(null)
     response.type = 'error'
+    response.status = 0
+    response.statusText = ''
     return response
   }
 
   var redirectStatuses = [301, 302, 303, 307, 308]
 
   Response.redirect = function(url, status) {
+    if (!FetchInternal.IsUrlValid(url)) {
+      throw new TypeError('Invalid URL')
+    }
+    if (status === undefined) {
+      status = 302
+    }
     if (redirectStatuses.indexOf(status) === -1) {
       throw new RangeError('Invalid status code')
     }
diff --git a/src/cobalt/fetch/fetch_internal.cc b/src/cobalt/fetch/fetch_internal.cc
new file mode 100644
index 0000000..ce79f8d
--- /dev/null
+++ b/src/cobalt/fetch/fetch_internal.cc
@@ -0,0 +1,29 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cobalt/fetch/fetch_internal.h"
+
+#include "googleurl/src/gurl.h"
+
+namespace cobalt {
+namespace fetch {
+
+// static
+bool FetchInternal::IsUrlValid(const std::string& url) {
+  GURL gurl(url);
+  return gurl.is_valid() || gurl.is_empty();
+}
+
+}  // namespace fetch
+}  // namespace cobalt
diff --git a/src/cobalt/fetch/fetch_internal.h b/src/cobalt/fetch/fetch_internal.h
new file mode 100644
index 0000000..f5b3add
--- /dev/null
+++ b/src/cobalt/fetch/fetch_internal.h
@@ -0,0 +1,40 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef COBALT_FETCH_FETCH_INTERNAL_H_
+#define COBALT_FETCH_FETCH_INTERNAL_H_
+
+#include <string>
+
+#include "cobalt/script/wrappable.h"
+
+namespace cobalt {
+namespace fetch {
+
+// Wrapper for utility functions for use with the fetch polyfill. This is
+// specific to the fetch polyfill and may change as the implementation changes.
+// This is not meant to be public and should not be used outside of the fetch
+// implementation.
+class FetchInternal : public script::Wrappable {
+ public:
+  // Return whether the given URL is valid.
+  static bool IsUrlValid(const std::string& url);
+
+  DEFINE_WRAPPABLE_TYPE(FetchInternal);
+};
+
+}  // namespace fetch
+}  // namespace cobalt
+
+#endif  // COBALT_FETCH_FETCH_INTERNAL_H_
diff --git a/src/cobalt/fetch/fetch_internal.idl b/src/cobalt/fetch/fetch_internal.idl
new file mode 100644
index 0000000..bd1153b
--- /dev/null
+++ b/src/cobalt/fetch/fetch_internal.idl
@@ -0,0 +1,21 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Wrapper for utility functions for use with the fetch polyfill. This is
+// specific to the fetch polyfill and may change as the implementation changes.
+// This is not meant to be public and should not be used outside of the fetch
+// implementation.
+interface FetchInternal {
+  static boolean IsUrlValid(DOMString url);
+};
diff --git a/src/cobalt/layout_tests/testdata/web-platform-tests/fetch/web_platform_tests.txt b/src/cobalt/layout_tests/testdata/web-platform-tests/fetch/web_platform_tests.txt
index f32ceb8..7532609 100644
--- a/src/cobalt/layout_tests/testdata/web-platform-tests/fetch/web_platform_tests.txt
+++ b/src/cobalt/layout_tests/testdata/web-platform-tests/fetch/web_platform_tests.txt
@@ -1,7 +1,13 @@
 # Fetch API tests
+#
+# Cobalt doesn't support Workers, so disable related tests.
+# Cobalt fetch implementation only supports the Body mixin types text(),
+#   json(), and arrayBuffer(). Blob, URLSearchParams, and FormData are not
+#   well-supported by the javascript engine. Certain tests may be substitued
+#   with *.cobalt.html versions to accommodate this limitation.
 api/basic/block-mime-as-script.html,DISABLE
 api/basic/conditional-get.html,DISABLE
-api/basic/error-after-response.html,DISABLE
+api/basic/error-after-response.html,PASS
 api/basic/integrity.html,DISABLE
 api/basic/integrity-sharedworker.html,DISABLE
 api/basic/integrity-worker.html,DISABLE
@@ -89,7 +95,7 @@
 api/request/request-keepalive-quota.html,DISABLE
 api/request/request-structure.html,DISABLE
 api/response/multi-globals/url-parsing.html,DISABLE
-# "*.cobalt.html" tests only check Body text(), arraybuffer(), json()
+# Fails because Blob is not fully supported.
 api/response/response-cancel-stream.html,DISABLE
 api/response/response-clone.html,PASS
 api/response/response-consume.cobalt.html,PASS
@@ -98,15 +104,17 @@
 api/response/response-consume-empty.html,DISABLE
 api/response/response-consume-stream.cobalt.html,PASS
 api/response/response-consume-stream.html,DISABLE
-api/response/response-error.html,DISABLE
+api/response/response-error.html,PASS
+# Fails because implementation is a polyfill.
 api/response/response-idl.html,DISABLE
 api/response/response-init-001.html,PASS
 api/response/response-init-002.html,PASS
 api/response/response-static-error.html,PASS
-api/response/response-static-redirect.html,DISABLE
+api/response/response-static-redirect.html,PASS
 api/response/response-stream-disturbed-1.html,PASS
 api/response/response-stream-disturbed-2.html,PASS
 api/response/response-stream-disturbed-3.html,PASS
 api/response/response-stream-disturbed-4.html,PASS
 api/response/response-stream-disturbed-5.html,PASS
+# Not implemented.
 api/response/response-trailer.html,DISABLE
diff --git a/src/cobalt/renderer/animations_test.cc b/src/cobalt/renderer/animations_test.cc
index 1cdda44..26a3fc5 100644
--- a/src/cobalt/renderer/animations_test.cc
+++ b/src/cobalt/renderer/animations_test.cc
@@ -141,7 +141,7 @@
 // in this case, but it really should be working, as the media engine
 // relies on this mechanism to deliver responsive video with minimal frame
 // drops.
-TEST(AnimationsTest, DISABLED_FreshlyCreatedImagesCanBeUsedInAnimations) {
+TEST(AnimationsTest, FreshlyCreatedImagesCanBeUsedInAnimations) {
   scoped_ptr<backend::GraphicsSystem> graphics_system =
       backend::CreateDefaultGraphicsSystem();
   scoped_ptr<backend::GraphicsContext> graphics_context =
@@ -157,7 +157,8 @@
   // a frame with the graphics context.
   const math::Size kDummySurfaceDimensions(1, 1);
   scoped_refptr<backend::RenderTarget> dummy_output_surface =
-      graphics_context->CreateOffscreenRenderTarget(kDummySurfaceDimensions);
+      graphics_context->CreateDownloadableOffscreenRenderTarget(
+          kDummySurfaceDimensions);
 
   const int kNumTestTrials = 5;
   for (int i = 0; i < kNumTestTrials; ++i) {
diff --git a/src/media/webm/webm_stream_parser.cc b/src/media/webm/webm_stream_parser.cc
index b8e044c..b41633a 100644
--- a/src/media/webm/webm_stream_parser.cc
+++ b/src/media/webm/webm_stream_parser.cc
@@ -140,6 +140,7 @@
     case kWebMIdVoid:
     case kWebMIdCRC32:
     case kWebMIdCues:
+    case kWebMIdTags:
       if (cur_size < (result + element_size)) {
         // We don't have the whole element yet. Signal we need more data.
         return 0;
diff --git a/src/starboard/tools/raspi/run_test.py b/src/starboard/tools/raspi/run_test.py
index b837307..b5420bc 100755
--- a/src/starboard/tools/raspi/run_test.py
+++ b/src/starboard/tools/raspi/run_test.py
@@ -34,6 +34,10 @@
 _RASPI_USERNAME = 'pi'
 _RASPI_PASSWORD = 'raspberry'
 
+# Timeouts are in seconds
+_PEXPECT_DEFAULT_TIMEOUT = 300
+_PEXPECT_EXPECT_TIMEOUT = 60
+
 
 # pylint: disable=unused-argument
 def _SigIntOrSigTermHandler(process, signum, frame):
@@ -87,14 +91,14 @@
   source = test_dir_path
   destination = raspi_user_hostname + ':~/'
   rsync_command = 'rsync ' + options + ' ' + source + ' ' + destination
-  rsync_process = pexpect.spawn(rsync_command, timeout=120)
+  rsync_process = pexpect.spawn(rsync_command, timeout=_PEXPECT_DEFAULT_TIMEOUT)
 
   signal.signal(signal.SIGINT,
                 functools.partial(_SigIntOrSigTermHandler, rsync_process))
   signal.signal(signal.SIGTERM,
                 functools.partial(_SigIntOrSigTermHandler, rsync_process))
 
-  rsync_process.expect(r'\S+ password:')
+  rsync_process.expect(r'\S+ password:', timeout=_PEXPECT_EXPECT_TIMEOUT)
   rsync_process.sendline(_RASPI_PASSWORD)
 
   while True:
@@ -107,14 +111,14 @@
 
   # ssh into the raspi and run the test
   ssh_command = 'ssh ' + raspi_user_hostname
-  ssh_process = pexpect.spawn(ssh_command, timeout=120)
+  ssh_process = pexpect.spawn(ssh_command, timeout=_PEXPECT_DEFAULT_TIMEOUT)
 
   signal.signal(signal.SIGINT,
                 functools.partial(_SigIntOrSigTermHandler, ssh_process))
   signal.signal(signal.SIGTERM,
                 functools.partial(_SigIntOrSigTermHandler, ssh_process))
 
-  ssh_process.expect(r'\S+ password:')
+  ssh_process.expect(r'\S+ password:', timeout=_PEXPECT_EXPECT_TIMEOUT)
   ssh_process.sendline(_RASPI_PASSWORD)
 
   test_command = raspi_test_path + ' ' + flags
diff --git a/src/starboard/win/lib/atomic_public.h b/src/starboard/win/lib/atomic_public.h
new file mode 100644
index 0000000..be4e805
--- /dev/null
+++ b/src/starboard/win/lib/atomic_public.h
@@ -0,0 +1,20 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STARBOARD_WIN_LIB_ATOMIC_PUBLIC_H_
+#define STARBOARD_WIN_LIB_ATOMIC_PUBLIC_H_
+
+#include "starboard/shared/win32/atomic_public.h"
+
+#endif  // STARBOARD_WIN_LIB_ATOMIC_PUBLIC_H_
diff --git a/src/starboard/win/lib/configuration_public.h b/src/starboard/win/lib/configuration_public.h
new file mode 100644
index 0000000..800aeb1
--- /dev/null
+++ b/src/starboard/win/lib/configuration_public.h
@@ -0,0 +1,23 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Other source files should never include this header directly, but should
+// include the generic "starboard/configuration.h" instead.
+
+#ifndef STARBOARD_WIN_LIB_CONFIGURATION_PUBLIC_H_
+#define STARBOARD_WIN_LIB_CONFIGURATION_PUBLIC_H_
+
+#include "starboard/win/shared/configuration_public.h"
+
+#endif  // STARBOARD_WIN_LIB_CONFIGURATION_PUBLIC_H_
diff --git a/src/starboard/win/lib/gyp_configuration.gypi b/src/starboard/win/lib/gyp_configuration.gypi
new file mode 100644
index 0000000..0dec384
--- /dev/null
+++ b/src/starboard/win/lib/gyp_configuration.gypi
@@ -0,0 +1,48 @@
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{
+  'variables': {
+    'javascript_engine': 'mozjs',
+    'cobalt_enable_jit': 0,
+  },
+  'includes': [
+    '../shared/gyp_configuration.gypi',
+  ],
+  'target_defaults': {
+    'default_configuration': 'win-lib_debug',
+    'configurations': {
+     'lib_base': {
+       'abstract': 1,
+       'msvs_settings': {
+         'VCLinkerTool': {
+           'SubSystem': '2', # WINDOWS
+         }
+       }
+      },
+      'win-lib_debug': {
+        'inherit_from': ['msvs_debug', 'lib_base'],
+      },
+      'win-lib_devel': {
+       'inherit_from': ['msvs_devel', 'lib_base'],
+      },
+      'win-lib_qa': {
+        'inherit_from': ['msvs_qa', 'lib_base'],
+      },
+      'win-lib_gold': {
+        'inherit_from': ['msvs_gold', 'lib_base'],
+      },
+    },  # end of configurations
+  },
+}
diff --git a/src/starboard/win/lib/gyp_configuration.py b/src/starboard/win/lib/gyp_configuration.py
new file mode 100644
index 0000000..e136082
--- /dev/null
+++ b/src/starboard/win/lib/gyp_configuration.py
@@ -0,0 +1,33 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import sys
+
+# Import the shared win platform configuration.
+sys.path.append(
+    os.path.realpath(
+        os.path.join(
+            os.path.dirname(__file__), os.pardir,
+            os.pardir, 'shared', 'win32')))
+import gyp_configuration
+
+
+def CreatePlatformConfig():
+  try:
+    return gyp_configuration.PlatformConfig('win-lib')
+  except RuntimeError as e:
+    logging.critical(e)
+    return None
diff --git a/src/starboard/win/lib/main.cc b/src/starboard/win/lib/main.cc
new file mode 100644
index 0000000..2ab7084
--- /dev/null
+++ b/src/starboard/win/lib/main.cc
@@ -0,0 +1,70 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <windows.h>
+
+#include <WinSock2.h>
+
+#include <string>
+#include <vector>
+
+#include "starboard/configuration.h"
+#include "starboard/shared/uwp/application_uwp.h"
+#include "starboard/shared/win32/thread_private.h"
+#include "starboard/shared/win32/wchar_utils.h"
+
+using starboard::shared::win32::wchar_tToUTF8;
+
+// TODO: Share more of this logic with xb1 & win-console?
+int main(Platform::Array<Platform::String ^> ^ args) {
+  if (!IsDebuggerPresent()) {
+    _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+    _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+    _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+    _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
+  }
+
+  const int kWinSockVersionMajor = 2;
+  const int kWinSockVersionMinor = 2;
+  WSAData wsaData;
+  int init_result = WSAStartup(
+      MAKEWORD(kWinSockVersionMajor, kWinSockVersionMajor), &wsaData);
+
+  SB_CHECK(init_result == 0);
+  // WSAStartup returns the highest version that is supported up to the version
+  // we request.
+  SB_CHECK(LOBYTE(wsaData.wVersion) == kWinSockVersionMajor &&
+           HIBYTE(wsaData.wVersion) == kWinSockVersionMinor);
+
+  starboard::shared::win32::RegisterMainThread();
+
+  std::vector<std::string> string_args;
+  for (auto it = args->begin(); it != args->end(); ++it) {
+    Platform::String ^ s = *it;
+    string_args.push_back(wchar_tToUTF8(s->Data(), s->Length()));
+  }
+
+  std::vector<const char*> utf8_args;
+  for (auto it = string_args.begin(); it != string_args.end(); ++it) {
+    utf8_args.push_back(it->data());
+  }
+
+  starboard::shared::uwp::ApplicationUwp application;
+  int return_value = application.Run(static_cast<int>(utf8_args.size()),
+                                     const_cast<char**>(utf8_args.data()));
+
+  WSACleanup();
+
+  return return_value;
+}
diff --git a/src/starboard/win/lib/starboard_platform.gyp b/src/starboard/win/lib/starboard_platform.gyp
new file mode 100644
index 0000000..f6dfb74
--- /dev/null
+++ b/src/starboard/win/lib/starboard_platform.gyp
@@ -0,0 +1,32 @@
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+{
+  'includes': [ '../shared/starboard_platform.gypi' ],
+  'sources': [
+    'atomic_public.h',
+    'configuration_public.h',
+    'thread_types_public.h',
+  ],
+  'variables': {
+    'starboard_platform_dependent_sources': [
+      'atomic_public.h',
+      'configuration_public.h',
+      'thread_types_public.h',
+      'main.cc',
+      '../shared/system_get_path.cc',
+      '<(DEPTH)/starboard/shared/uwp/application_uwp.cc',
+      '<(DEPTH)/starboard/shared/uwp/application_uwp.h',
+    ],
+  },
+}
diff --git a/src/starboard/win/lib/starboard_platform_tests.gyp b/src/starboard/win/lib/starboard_platform_tests.gyp
new file mode 100644
index 0000000..93f812b
--- /dev/null
+++ b/src/starboard/win/lib/starboard_platform_tests.gyp
@@ -0,0 +1,41 @@
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+{
+  'targets': [
+    {
+      'target_name': 'starboard_platform_tests',
+      'type': '<(gtest_target_type)',
+      'sources': [
+        '<(DEPTH)/starboard/common/test_main.cc',
+        '<(DEPTH)/starboard/shared/starboard/media/mime_type_test.cc',
+      ],
+      'dependencies': [
+        '<(DEPTH)/starboard/starboard.gyp:starboard',
+        '<(DEPTH)/testing/gmock.gyp:gmock',
+        '<(DEPTH)/testing/gtest.gyp:gtest',
+      ],
+    },
+    {
+      'target_name': 'starboard_platform_tests_deploy',
+      'type': 'none',
+      'dependencies': [
+        '<(DEPTH)/<(starboard_path)/starboard_platform_tests.gyp:starboard_platform_tests',
+      ],
+      'variables': {
+        'executable_name': 'starboard_platform_tests',
+      },
+      'includes': [ '../../build/deploy.gypi' ],
+    },
+  ],
+}
diff --git a/src/starboard/win/lib/thread_types_public.h b/src/starboard/win/lib/thread_types_public.h
new file mode 100644
index 0000000..0f8f791
--- /dev/null
+++ b/src/starboard/win/lib/thread_types_public.h
@@ -0,0 +1,22 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Includes threading primitive types and initializers.
+
+#ifndef STARBOARD_WIN_LIB_THREAD_TYPES_PUBLIC_H_
+#define STARBOARD_WIN_LIB_THREAD_TYPES_PUBLIC_H_
+
+#include "starboard/shared/win32/thread_types_public.h"
+
+#endif  // STARBOARD_WIN_LIB_THREAD_TYPES_PUBLIC_H_
diff --git a/src/starboard/win/shared/starboard_platform.gypi b/src/starboard/win/shared/starboard_platform.gypi
index 3df57c9..6d28bf6 100644
--- a/src/starboard/win/shared/starboard_platform.gypi
+++ b/src/starboard/win/shared/starboard_platform.gypi
@@ -19,13 +19,12 @@
       'msvs_settings': {
         'VCCLCompilerTool': {
           'AdditionalOptions': [
-            '/ZW',  # Windows Runtime (Needed for UWP)
-            '/EHsx', # C++ exceptions (required with /ZW)
-            # For 'platform.winmd'. Note that replacing x86 with
-            # x64 here does not work.
-            '/AI"<(visual_studio_install_path)/lib/x86/store/references"',
-            # For 'Windows.winmd'
-            '/AI"<(windows_sdk_path)/UnionMetadata/<(windows_sdk_version)"',
+            '/ZW',           # Windows Runtime
+            '/ZW:nostdlib',  # Windows Runtime, no default #using
+            '/EHsx',         # C++ exceptions (required with /ZW)
+            '/FU"<(visual_studio_install_path)/lib/x86/store/references/platform.winmd"',
+            '/FU"<(windows_sdk_path)/References/<(windows_sdk_version)/Windows.Foundation.FoundationContract/3.0.0.0/Windows.Foundation.FoundationContract.winmd"',
+            '/FU"<(windows_sdk_path)/References/<(windows_sdk_version)/Windows.Foundation.UniversalApiContract/4.0.0.0/Windows.Foundation.UniversalApiContract.winmd"',
           ]
         }
       },
@@ -290,6 +289,10 @@
         # This must be defined when building Starboard, and must not when
         # building Starboard client code.
         'STARBOARD_IMPLEMENTATION',
+        # VS2017 always defines this for UWP apps
+        'WINAPI_FAMILY=WINAPI_FAMILY_APP',
+        # VS2017 always defines this for UWP apps
+        '__WRL_NO_DEFAULT_LIB__',
       ],
     },
   ],
diff --git a/src/third_party/angle/angle.gyp b/src/third_party/angle/angle.gyp
index 3aea864..1bf5056 100644
--- a/src/third_party/angle/angle.gyp
+++ b/src/third_party/angle/angle.gyp
@@ -5,29 +5,22 @@
 {
     'variables':
     {
+        'angle_build_winrt': 1,
         'angle_code': 1,
         'angle_gen_path': '<(SHARED_INTERMEDIATE_DIR)/angle',
         'angle_use_commit_id%': 0,
         'angle_enable_d3d9%': 0,
-        'angle_enable_d3d11%': 0,
+        'angle_enable_d3d11%': 1,
         'angle_enable_gl%': 0,
         'angle_enable_vulkan%': 0,
         'angle_enable_essl%': 1, # Enable this for all configs by default
         'angle_enable_glsl%': 1, # Enable this for all configs by default
-        'angle_enable_hlsl%': 0,
+        'angle_enable_hlsl%': 1,
         'angle_link_glx%': 0,
         'angle_gl_library_type%': 'static_library',
         'dcheck_always_on%': 0,
         'conditions':
         [
-            ['OS=="win"',
-            {
-                'angle_enable_gl%': 1,
-                'angle_enable_d3d9%': 1,
-                'angle_enable_d3d11%': 1,
-                'angle_enable_hlsl%': 1,
-                'angle_enable_vulkan%': 1,
-            }],
             ['OS=="linux" and use_x11==1 and chromeos==0',
             {
                 'angle_enable_gl%': 1,
@@ -87,7 +80,7 @@
                             },
                         },
                     }],
-                    ['OS=="win"',
+                    ['target_os=="win"',
                     {
                         'configurations':
                         {
@@ -117,7 +110,7 @@
                         },
                     },
                 }],
-                ['OS=="win"',
+                ['target_os=="win"',
                 {
                     'configurations':
                     {
@@ -213,14 +206,14 @@
             },
             'conditions':
             [
-                ['OS=="win"',
+                ['target_os=="win"',
                 {
                     'sources':
                     [
                         '<@(libangle_gpu_info_util_win_sources)',
                     ],
                 }],
-                ['OS=="win" and angle_build_winrt==0',
+                ['target_os=="win" and angle_build_winrt==0',
                 {
                     'link_settings':
                     {
@@ -333,7 +326,7 @@
             [
                 {
                     'destination': '<(angle_gen_path)',
-                    'files': [ 'copy_compiler_dll.bat' ],
+                    'files': [ './src/copy_compiler_dll.bat' ],
                 },
             ],
             'conditions':
@@ -347,7 +340,7 @@
     ],
     'conditions':
     [
-        ['OS=="win"',
+        ['target_os=="win"',
         {
             'targets':
             [
@@ -366,12 +359,12 @@
                                     'action_name': 'copy_dll',
                                     'message': 'Copying D3D Compiler DLL...',
                                     'msvs_cygwin_shell': 0,
-                                    'inputs': [ 'copy_compiler_dll.bat' ],
+                                    'inputs': [ './src/copy_compiler_dll.bat' ],
                                     'outputs': [ '<(PRODUCT_DIR)/d3dcompiler_47.dll' ],
                                     'action':
                                     [
-                                        "<(angle_gen_path)/copy_compiler_dll.bat",
-                                        "$(PlatformName)",
+                                        "<(DEPTH)/third_party/angle/src/copy_compiler_dll.bat",
+                                        "x64",
                                         "<(windows_sdk_path)",
                                         "<(PRODUCT_DIR)"
                                     ],
diff --git a/src/third_party/angle/src/angle.gyp b/src/third_party/angle/src/angle.gyp
index e2b0f9b..36623f6 100644
--- a/src/third_party/angle/src/angle.gyp
+++ b/src/third_party/angle/src/angle.gyp
@@ -24,7 +24,7 @@
         'dcheck_always_on%': 0,
         'conditions':
         [
-            ['OS=="win"',
+            ['target_os=="win"',
             {
                 'angle_enable_gl%': 1,
                 'angle_enable_d3d9%': 1,
@@ -80,9 +80,9 @@
             {
                 'include_dirs':
                 [
-                    '<(angle_path)/include',
-                    '<(angle_path)/src',
-                    '<(angle_path)/src/common/third_party/numerics',
+                    '<(DEPTH)/include',
+                    '<(DEPTH)/src',
+                    '<(DEPTH)/src/common/third_party/numerics',
                 ],
                 'conditions':
                 [
@@ -99,7 +99,7 @@
                             },
                         },
                     }],
-                    ['OS=="win"',
+                    ['target_os=="win"',
                     {
                         'configurations':
                         {
@@ -129,7 +129,7 @@
                         },
                     },
                 }],
-                ['OS=="win"',
+                ['target_os=="win"',
                 {
                     'configurations':
                     {
@@ -192,8 +192,8 @@
             {
                 'include_dirs':
                 [
-                    '<(angle_path)/include',
-                    '<(angle_path)/src',
+                    '<(DEPTH)/include',
+                    '<(DEPTH)/src',
                 ],
             },
         },
@@ -219,20 +219,20 @@
             {
                 'include_dirs':
                 [
-                    '<(angle_path)/include',
-                    '<(angle_path)/src',
+                    '<(DEPTH)/include',
+                    '<(DEPTH)/src',
                 ],
             },
             'conditions':
             [
-                ['OS=="win"',
+                ['target_os=="win"',
                 {
                     'sources':
                     [
                         '<@(libangle_gpu_info_util_win_sources)',
                     ],
                 }],
-                ['OS=="win" and angle_build_winrt==0',
+                ['target_os=="win" and angle_build_winrt==0',
                 {
                     'link_settings':
                     {
@@ -290,7 +290,7 @@
                     ],
                     'dependencies':
                     [
-                        '<(angle_path)/src/third_party/libXNVCtrl/libXNVCtrl.gyp:libXNVCtrl',
+                        '<(DEPTH)/src/third_party/libXNVCtrl/libXNVCtrl.gyp:libXNVCtrl',
                     ],
                     'link_settings':
                     {
@@ -375,12 +375,12 @@
                             'action_name': 'Generate ANGLE Commit ID Header',
                             'message': 'Generating ANGLE Commit ID',
                             # reference the git index as an input, so we rebuild on changes to the index
-                            'inputs': [ '<(angle_id_script)', '<(angle_path)/.git/index' ],
+                            'inputs': [ '<(angle_id_script)', '<(DEPTH)/.git/index' ],
                             'outputs': [ '<(angle_id_header)' ],
                             'msvs_cygwin_shell': 0,
                             'action':
                             [
-                                'python', '<(angle_id_script)', 'gen', '<(angle_path)', '<(angle_id_header)'
+                                'python', '<(angle_id_script)', 'gen', '<(DEPTH)', '<(angle_id_header)'
                             ],
                         },
                     ],
@@ -433,7 +433,7 @@
                 }
             ]
         }],
-        ['OS=="win"',
+        ['target_os=="win"',
         {
             'targets':
             [
diff --git a/src/third_party/angle/src/gpu_info_util/SystemInfo_win.cpp b/src/third_party/angle/src/gpu_info_util/SystemInfo_win.cpp
index 3073822..a4c4dac 100644
--- a/src/third_party/angle/src/gpu_info_util/SystemInfo_win.cpp
+++ b/src/third_party/angle/src/gpu_info_util/SystemInfo_win.cpp
@@ -17,6 +17,8 @@
 #elif defined(GPU_INFO_USE_DXGI)
 #include <dxgi.h>
 #include <d3d10.h>
+#define __uuidof(NAME) IID_##NAME
+
 #else
 #error "SystemInfo_win needs at least GPU_INFO_USE_SETUPAPI or GPU_INFO_USE_DXGI defined"
 #endif
@@ -120,7 +122,9 @@
 bool GetDevicesFromDXGI(std::vector<GPUDeviceInfo> *devices)
 {
     IDXGIFactory *factory;
-    if (!SUCCEEDED(CreateDXGIFactory(__uuidof(IDXGIFactory), reinterpret_cast<void **>(&factory))))
+    // The CreateDXGIFactory function does not exist for Windows Store apps.
+    // Instead, Windows Store apps use the CreateDXGIFactory1 function.
+    if (!SUCCEEDED(CreateDXGIFactory1(__uuidof(IDXGIFactory), reinterpret_cast<void **>(&factory))))
     {
         return false;
     }
@@ -195,8 +199,14 @@
     FindPrimaryGPU(info);
 
     // nvd3d9wrap.dll is loaded into all processes when Optimus is enabled.
+    // GetModuleHandleW is desktop apps only.
+#if defined(WINAPI_FAMILY) && WINAPI_FAMILY==WINAPI_FAMILY_APP
+    info->isOptimus = false;
+#else
     HMODULE nvd3d9wrap = GetModuleHandleW(L"nvd3d9wrap.dll");
     info->isOptimus    = nvd3d9wrap != nullptr;
+#endif
+
 
     return true;
 }
diff --git a/src/third_party/angle/src/libANGLE/renderer/d3d/ProgramD3D.cpp b/src/third_party/angle/src/libANGLE/renderer/d3d/ProgramD3D.cpp
index b8df596..ce19a29 100644
--- a/src/third_party/angle/src/libANGLE/renderer/d3d/ProgramD3D.cpp
+++ b/src/third_party/angle/src/libANGLE/renderer/d3d/ProgramD3D.cpp
@@ -10,6 +10,7 @@
 
 #include "common/bitset_utils.h"
 #include "common/utilities.h"
+#include "compiler/translator/blocklayoutHLSL.h"
 #include "libANGLE/Framebuffer.h"
 #include "libANGLE/FramebufferAttachment.h"
 #include "libANGLE/Program.h"
diff --git a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp
index 4d7e56a..4c544ce 100644
--- a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp
+++ b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/Renderer11.cpp
@@ -446,6 +446,10 @@
         EGLint requestedMinorVersion = static_cast<EGLint>(
             attributes.get(EGL_PLATFORM_ANGLE_MAX_VERSION_MINOR_ANGLE, EGL_DONT_CARE));
 
+// Only allow feature level 10 on starboard.
+#if defined(STARBOARD)
+        mAvailableFeatureLevels.push_back(D3D_FEATURE_LEVEL_10_0);
+#else
         if (requestedMajorVersion == EGL_DONT_CARE || requestedMajorVersion >= 11)
         {
             if (requestedMinorVersion == EGL_DONT_CARE || requestedMinorVersion >= 0)
@@ -470,6 +474,7 @@
         {
             mAvailableFeatureLevels.push_back(D3D_FEATURE_LEVEL_9_3);
         }
+#endif  // STARBOARD
 
         EGLint requestedDeviceType = static_cast<EGLint>(attributes.get(
             EGL_PLATFORM_ANGLE_DEVICE_TYPE_ANGLE, EGL_PLATFORM_ANGLE_DEVICE_TYPE_HARDWARE_ANGLE));
diff --git a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.cpp b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.cpp
index dd37ace..a65923a 100644
--- a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.cpp
+++ b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.cpp
@@ -22,7 +22,7 @@
 bool CoreWindowNativeWindow::initialize(EGLNativeWindowType window, IPropertySet *propertySet)
 {
     ComPtr<IPropertySet> props = propertySet;
-    ComPtr<IInspectable> win = window;
+    ComPtr<IInspectable> win   = static_cast<IInspectable *>(window);
     SIZE swapChainSize = {};
     HRESULT result = S_OK;
 
diff --git a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.h b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.h
index d43bf0b..5e90a40 100644
--- a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.h
+++ b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/CoreWindowNativeWindow.h
@@ -46,9 +46,13 @@
     ComPtr<IMap<HSTRING, IInspectable*>> mPropertyMap;
 };
 
+#if !defined(STARBOARD)
 [uuid(7F924F66-EBAE-40E5-A10B-B8F35E245190)]
-class CoreWindowSizeChangedHandler :
-    public Microsoft::WRL::RuntimeClass<Microsoft::WRL::RuntimeClassFlags<Microsoft::WRL::ClassicCom>, IWindowSizeChangedEventHandler>
+#endif
+class CoreWindowSizeChangedHandler
+    : public Microsoft::WRL::RuntimeClass<
+          Microsoft::WRL::RuntimeClassFlags<Microsoft::WRL::ClassicCom>,
+          IWindowSizeChangedEventHandler>
 {
   public:
     CoreWindowSizeChangedHandler() { }
diff --git a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/InspectableNativeWindow.cpp b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/InspectableNativeWindow.cpp
index cc81521..60b6d73 100644
--- a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/InspectableNativeWindow.cpp
+++ b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/InspectableNativeWindow.cpp
@@ -19,7 +19,7 @@
         return false;
     }
 
-    ComPtr<IInspectable> win = window;
+    ComPtr<IInspectable> win = static_cast<IInspectable *>(window);
     ComPtr<ABI::Windows::UI::Core::ICoreWindow> coreWin;
     if (SUCCEEDED(win.As(&coreWin)))
     {
@@ -40,7 +40,7 @@
         return false;
     }
 
-    ComPtr<IInspectable> win = window;
+    ComPtr<IInspectable> win = static_cast<IInspectable *>(window);
     ComPtr<ABI::Windows::UI::Xaml::Controls::ISwapChainPanel> panel;
     if (SUCCEEDED(win.As(&panel)))
     {
@@ -61,7 +61,7 @@
         return false;
     }
 
-    ComPtr<IInspectable> props = window;
+    ComPtr<IInspectable> props = static_cast<IInspectable *>(window);
     ComPtr<IPropertySet> propSet;
     ComPtr<IInspectable> nativeWindow;
     ComPtr<ABI::Windows::Foundation::Collections::IMap<HSTRING, IInspectable*>> propMap;
diff --git a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/NativeWindow11WinRT.cpp b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/NativeWindow11WinRT.cpp
index 2ef2235..655b23b 100644
--- a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/NativeWindow11WinRT.cpp
+++ b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/NativeWindow11WinRT.cpp
@@ -89,6 +89,7 @@
                                              DXGI_FORMAT format,
                                              UINT width,
                                              UINT height,
+                                             UINT samples,
                                              IDXGISwapChain **swapChain)
 {
     if (mImpl)
diff --git a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/NativeWindow11WinRT.h b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/NativeWindow11WinRT.h
index 996fd3a..42a9e76 100644
--- a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/NativeWindow11WinRT.h
+++ b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/NativeWindow11WinRT.h
@@ -29,11 +29,14 @@
     bool getClientRect(LPRECT rect) const override;
     bool isIconic() const override;
 
+    // Note that argument UINT samples was added here to make this actually
+    // override the createSwapChain method in the base class NativeWindow11.
     HRESULT createSwapChain(ID3D11Device *device,
                             IDXGIFactory *factory,
                             DXGI_FORMAT format,
                             UINT width,
                             UINT height,
+                            UINT samples,
                             IDXGISwapChain **swapChain) override;
 
     void commitChange() override;
diff --git a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.cpp b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.cpp
index c6d07fc..a18482a 100644
--- a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.cpp
+++ b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.cpp
@@ -92,7 +92,7 @@
 bool SwapChainPanelNativeWindow::initialize(EGLNativeWindowType window, IPropertySet *propertySet)
 {
     ComPtr<IPropertySet> props = propertySet;
-    ComPtr<IInspectable> win = window;
+    ComPtr<IInspectable> win   = static_cast<IInspectable *>(window);
     SIZE swapChainSize = {};
     HRESULT result = S_OK;
 
diff --git a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.h b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.h
index f9a2fc0..8264201 100644
--- a/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.h
+++ b/src/third_party/angle/src/libANGLE/renderer/d3d/d3d11/winrt/SwapChainPanelNativeWindow.h
@@ -42,9 +42,13 @@
     ComPtr<IDXGISwapChain1> mSwapChain;
 };
 
+#if !defined(STARBOARD)
 [uuid(8ACBD974-8187-4508-AD80-AEC77F93CF36)]
-class SwapChainPanelSizeChangedHandler :
-    public Microsoft::WRL::RuntimeClass<Microsoft::WRL::RuntimeClassFlags<Microsoft::WRL::ClassicCom>, ABI::Windows::UI::Xaml::ISizeChangedEventHandler>
+#endif
+class SwapChainPanelSizeChangedHandler
+    : public Microsoft::WRL::RuntimeClass<
+          Microsoft::WRL::RuntimeClassFlags<Microsoft::WRL::ClassicCom>,
+          ABI::Windows::UI::Xaml::ISizeChangedEventHandler>
 {
   public:
     SwapChainPanelSizeChangedHandler() { }
diff --git a/src/third_party/angle/src/libEGL.gypi b/src/third_party/angle/src/libEGL.gypi
index fdce4ba..8e9469e 100644
--- a/src/third_party/angle/src/libEGL.gypi
+++ b/src/third_party/angle/src/libEGL.gypi
@@ -33,7 +33,7 @@
                 {
                     'msvs_requires_importlibrary' : 'true',
                 }],
-                ['OS=="win"', {
+                ['target_os=="win"', {
                     'defines':
                     [
                         'EGLAPI=',
diff --git a/src/third_party/angle/src/libGLESv2.gypi b/src/third_party/angle/src/libGLESv2.gypi
index f32c83d..818c152 100644
--- a/src/third_party/angle/src/libGLESv2.gypi
+++ b/src/third_party/angle/src/libGLESv2.gypi
@@ -916,7 +916,7 @@
                 ],
                 'conditions':
                 [
-                    ['OS=="win"', {
+                    ['target_os=="win"', {
                         'defines':
                         [
                             'GL_APICALL=',
@@ -1009,7 +1009,7 @@
                     ],
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
@@ -1107,7 +1107,7 @@
                     ],
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
@@ -1138,7 +1138,7 @@
                         '<@(libangle_null_sources)',
                     ],
                 }],
-                ['angle_build_winrt==0 and OS=="win"',
+                ['angle_build_winrt==0 and target_os=="win"',
                 {
                     'dependencies':
                     [
diff --git a/src/third_party/angle/src/tests/angle_end2end_tests.gypi b/src/third_party/angle/src/tests/angle_end2end_tests.gypi
index fe0a604..0c616a5 100644
--- a/src/third_party/angle/src/tests/angle_end2end_tests.gypi
+++ b/src/third_party/angle/src/tests/angle_end2end_tests.gypi
@@ -144,7 +144,7 @@
     ],
     'conditions':
     [
-        ['OS=="win"',
+        ['target_os=="win"',
         {
             'sources':
             [
diff --git a/src/third_party/angle/src/tests/angle_perftests.gypi b/src/third_party/angle/src/tests/angle_perftests.gypi
index 760f7c8..aedfd82 100644
--- a/src/third_party/angle/src/tests/angle_perftests.gypi
+++ b/src/third_party/angle/src/tests/angle_perftests.gypi
@@ -70,7 +70,7 @@
     ],
     'conditions':
     [
-        ['OS=="win"',
+        ['target_os=="win"',
         {
             'sources':
             [
diff --git a/src/third_party/angle/src/tests/angle_unittests.gypi b/src/third_party/angle/src/tests/angle_unittests.gypi
index 0c0199c..48dd7b4 100644
--- a/src/third_party/angle/src/tests/angle_unittests.gypi
+++ b/src/third_party/angle/src/tests/angle_unittests.gypi
@@ -155,7 +155,7 @@
                 },
             },
         }],
-        ['OS=="win"',
+        ['target_os=="win"',
         {
             # TODO(cwallez): make this angle_enable_hlsl instead (requires gyp file refactoring)
             'defines':
diff --git a/src/third_party/angle/src/tests/angle_white_box_tests.gypi b/src/third_party/angle/src/tests/angle_white_box_tests.gypi
index 28c7a45..70fb227 100644
--- a/src/third_party/angle/src/tests/angle_white_box_tests.gypi
+++ b/src/third_party/angle/src/tests/angle_white_box_tests.gypi
@@ -50,7 +50,7 @@
     ],
     'conditions':
     [
-        ['OS=="win"',
+        ['target_os=="win"',
         {
             'sources':
             [
diff --git a/src/third_party/angle/src/tests/deqp.gypi b/src/third_party/angle/src/tests/deqp.gypi
index fcfb004..7f611cb 100644
--- a/src/third_party/angle/src/tests/deqp.gypi
+++ b/src/third_party/angle/src/tests/deqp.gypi
@@ -1196,18 +1196,18 @@
         ],
         'conditions':
         [
-            ['(OS=="win" or OS=="linux" or OS=="mac")',
+            ['(target_os=="win" or OS=="linux" or OS=="mac")',
             {
                 # Build the dEQP libraries for all Windows/Linux builds
                 'angle_build_deqp_libraries%': 1,
             }],
-            ['((OS=="win" or OS=="linux" or OS=="mac") and angle_build_winrt==0)',
+            ['((target_os=="win" or OS=="linux" or OS=="mac") and angle_build_winrt==0)',
             {
                 # Build the dEQP GoogleTest support helpers for all Windows/Linux builds except WinRT
                 # GoogleTest doesn't support WinRT
                 'angle_build_deqp_gtest_support%': 1,
             }],
-            ['((OS=="win" or OS=="linux" or OS=="mac") and angle_build_winrt==0)',
+            ['((target_os=="win" or OS=="linux" or OS=="mac") and angle_build_winrt==0)',
             {
                 # Build the dEQP executables for all standalone Windows/Linux builds except WinRT
                 # GYP doesn't support generating standalone WinRT executables
@@ -1482,7 +1482,7 @@
                                     '-Wno-delete-non-virtual-dtor',
                                 ],
                             }],
-                            ['OS=="win"',
+                            ['target_os=="win"',
                             {
                                 'cflags': ['<@(deqp_win_cflags)'],
                                 'cflags_cc': ['<@(deqp_win_cflags)'],
@@ -1586,7 +1586,7 @@
                                 },
                             },
                         }],
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources': [ '<@(deqp_libtester_sources_win)', ],
                         }],
diff --git a/src/third_party/angle/src/tests/tests.gyp b/src/third_party/angle/src/tests/tests.gyp
index 854ba8e..702539f 100644
--- a/src/third_party/angle/src/tests/tests.gyp
+++ b/src/third_party/angle/src/tests/tests.gyp
@@ -206,7 +206,7 @@
                 },
             ],
         }],
-        ['OS=="win"',
+        ['target_os=="win"',
         {
             'conditions':
             [
diff --git a/src/third_party/angle/src/vulkan_support/vulkan.gypi b/src/third_party/angle/src/vulkan_support/vulkan.gypi
index 57d77a3..779d0da 100644
--- a/src/third_party/angle/src/vulkan_support/vulkan.gypi
+++ b/src/third_party/angle/src/vulkan_support/vulkan.gypi
@@ -373,7 +373,7 @@
                     },
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
@@ -540,7 +540,7 @@
                     },
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'defines':
                             [
@@ -594,7 +594,7 @@
                         },
                         'conditions':
                         [
-                            ['OS=="win"',
+                            ['target_os=="win"',
                             {
                                 'defines':
                                 [
@@ -863,7 +863,7 @@
                             ],
                             'conditions':
                             [
-                                ['OS=="win"',
+                                ['target_os=="win"',
                                 {
                                     'inputs':
                                     [
@@ -946,7 +946,7 @@
                         },
                         'conditions':
                         [
-                            ['OS=="win"',
+                            ['target_os=="win"',
                             {
                                 'defines':
                                 [
@@ -966,7 +966,7 @@
                     },
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
@@ -1059,7 +1059,7 @@
                     ],
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
@@ -1097,7 +1097,7 @@
                     ],
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
@@ -1135,7 +1135,7 @@
                     ],
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
@@ -1158,7 +1158,7 @@
                     ],
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
@@ -1218,7 +1218,7 @@
                     ],
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
@@ -1278,7 +1278,7 @@
                     ],
                     'conditions':
                     [
-                        ['OS=="win"',
+                        ['target_os=="win"',
                         {
                             'sources':
                             [
diff --git a/src/third_party/angle/util/util.gyp b/src/third_party/angle/util/util.gyp
index bda8a05..8b11c9a 100644
--- a/src/third_party/angle/util/util.gyp
+++ b/src/third_party/angle/util/util.gyp
@@ -97,8 +97,8 @@
             {
                 'include_dirs':
                 [
-                    '<(angle_path)/include',
-                    '<(angle_path)/util',
+                    '<(DEPTH)/include',
+                    '<(DEPTH)/util',
                 ],
                 'sources':
                 [
@@ -112,14 +112,14 @@
                 ],
                 'conditions':
                 [
-                    ['OS=="win" and angle_build_winrt==0',
+                    ['target_os=="win" and angle_build_winrt==0',
                     {
                         'sources':
                         [
                             '<@(util_win32_sources)',
                         ],
                     }],
-                    ['OS=="win" and angle_build_winrt==1',
+                    ['target_os=="win" and angle_build_winrt==1',
                     {
                         'sources':
                         [
@@ -195,20 +195,20 @@
             'dependencies':
             [
                 'angle_util_config',
-                '<(angle_path)/src/angle.gyp:angle_common',
-                '<(angle_path)/src/angle.gyp:libEGL',
-                '<(angle_path)/src/angle.gyp:libGLESv2',
+                '<(DEPTH)/src/angle.gyp:angle_common',
+                '<(DEPTH)/src/angle.gyp:libEGL',
+                '<(DEPTH)/src/angle.gyp:libGLESv2',
             ],
             'export_dependent_settings':
             [
-                '<(angle_path)/src/angle.gyp:angle_common',
+                '<(DEPTH)/src/angle.gyp:angle_common',
             ],
             'direct_dependent_settings':
             {
                 'include_dirs':
                 [
-                    '<(angle_path)/include',
-                    '<(angle_path)/util',
+                    '<(DEPTH)/include',
+                    '<(DEPTH)/util',
                 ],
                 'defines':
                 [
@@ -225,20 +225,20 @@
             'dependencies':
             [
                 'angle_util_config',
-                '<(angle_path)/src/angle.gyp:angle_common',
-                '<(angle_path)/src/angle.gyp:libEGL_static',
-                '<(angle_path)/src/angle.gyp:libGLESv2_static',
+                '<(DEPTH)/src/angle.gyp:angle_common',
+                '<(DEPTH)/src/angle.gyp:libEGL_static',
+                '<(DEPTH)/src/angle.gyp:libGLESv2_static',
             ],
             'export_dependent_settings':
             [
-                '<(angle_path)/src/angle.gyp:angle_common',
+                '<(DEPTH)/src/angle.gyp:angle_common',
             ],
             'direct_dependent_settings':
             {
                 'include_dirs':
                 [
-                    '<(angle_path)/include',
-                    '<(angle_path)/util',
+                    '<(DEPTH)/include',
+                    '<(DEPTH)/util',
                 ],
                 'defines':
                 [
diff --git a/src/third_party/blink/Source/bindings/scripts/utilities.py b/src/third_party/blink/Source/bindings/scripts/utilities.py
index 16ca414..4d43f6f 100644
--- a/src/third_party/blink/Source/bindings/scripts/utilities.py
+++ b/src/third_party/blink/Source/bindings/scripts/utilities.py
@@ -24,6 +24,7 @@
         'cssom',
         'debug',
         'dom',
+        'fetch',
         'h5vcc',
         'media_session',
         'speech',
diff --git a/src/third_party/openssl/openssl/crypto/bn/asm/pa-risc2.S b/src/third_party/openssl/openssl/crypto/bn/asm/pa-risc2.S
new file mode 100644
index 0000000..f3b1629
--- /dev/null
+++ b/src/third_party/openssl/openssl/crypto/bn/asm/pa-risc2.S
@@ -0,0 +1,1618 @@
+;
+; PA-RISC 2.0 implementation of bn_asm code, based on the
+; 64-bit version of the code.  This code is effectively the
+; same as the 64-bit version except the register model is
+; slightly different given all values must be 32-bit between
+; function calls.  Thus the 64-bit return values are returned
+; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit
+;
+;
+; This code is approximately 2x faster than the C version
+; for RSA/DSA.
+;
+; See http://devresource.hp.com/  for more details on the PA-RISC
+; architecture.  Also see the book "PA-RISC 2.0 Architecture"
+; by Gerry Kane for information on the instruction set architecture.
+;
+; Code written by Chris Ruemmler (with some help from the HP C
+; compiler).
+;
+; The code compiles with HP's assembler
+;
+
+	.level	2.0N
+	.space	$TEXT$
+	.subspa	$CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
+
+;
+; Global Register definitions used for the routines.
+;
+; Some information about HP's runtime architecture for 32-bits.
+;
+; "Caller save" means the calling function must save the register
+; if it wants the register to be preserved.
+; "Callee save" means if a function uses the register, it must save
+; the value before using it.
+;
+; For the floating point registers 
+;
+;    "caller save" registers: fr4-fr11, fr22-fr31
+;    "callee save" registers: fr12-fr21
+;    "special" registers: fr0-fr3 (status and exception registers)
+;
+; For the integer registers
+;     value zero             :  r0
+;     "caller save" registers: r1,r19-r26
+;     "callee save" registers: r3-r18
+;     return register        :  r2  (rp)
+;     return values          ; r28,r29  (ret0,ret1)
+;     Stack pointer          ; r30  (sp) 
+;     millicode return ptr   ; r31  (also a caller save register)
+
+
+;
+; Arguments to the routines
+;
+r_ptr       .reg %r26
+a_ptr       .reg %r25
+b_ptr       .reg %r24
+num         .reg %r24
+n           .reg %r23
+
+;
+; Note that the "w" argument for bn_mul_add_words and bn_mul_words
+; is passed on the stack at a delta of -56 from the top of stack
+; as the routine is entered.
+;
+
+;
+; Globals used in some routines
+;
+
+top_overflow .reg %r23
+high_mask    .reg %r22    ; value 0xffffffff80000000L
+
+
+;------------------------------------------------------------------------------
+;
+; bn_mul_add_words
+;
+;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, 
+;								int num, BN_ULONG w)
+;
+; arg0 = r_ptr
+; arg1 = a_ptr
+; arg3 = num
+; -56(sp) =  w
+;
+; Local register definitions
+;
+
+fm1          .reg %fr22
+fm           .reg %fr23
+ht_temp      .reg %fr24
+ht_temp_1    .reg %fr25
+lt_temp      .reg %fr26
+lt_temp_1    .reg %fr27
+fm1_1        .reg %fr28
+fm_1         .reg %fr29
+
+fw_h         .reg %fr7L
+fw_l         .reg %fr7R
+fw           .reg %fr7
+
+fht_0        .reg %fr8L
+flt_0        .reg %fr8R
+t_float_0    .reg %fr8
+
+fht_1        .reg %fr9L
+flt_1        .reg %fr9R
+t_float_1    .reg %fr9
+
+tmp_0        .reg %r31
+tmp_1        .reg %r21
+m_0          .reg %r20 
+m_1          .reg %r19 
+ht_0         .reg %r1  
+ht_1         .reg %r3
+lt_0         .reg %r4
+lt_1         .reg %r5
+m1_0         .reg %r6 
+m1_1         .reg %r7 
+rp_val       .reg %r8
+rp_val_1     .reg %r9
+
+bn_mul_add_words
+	.export	bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
+	.proc
+	.callinfo frame=128
+    .entry
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3  
+    STD     %r4,8(%sp)          ; save r4  
+	NOP                         ; Needed to make the loop 16-byte aligned
+	NOP                         ; needed to make the loop 16-byte aligned
+
+    STD     %r5,16(%sp)         ; save r5  
+	NOP
+    STD     %r6,24(%sp)         ; save r6  
+    STD     %r7,32(%sp)         ; save r7  
+
+    STD     %r8,40(%sp)         ; save r8  
+    STD     %r9,48(%sp)         ; save r9  
+    COPY    %r0,%ret1           ; return 0 by default
+    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32    
+
+    CMPIB,>= 0,num,bn_mul_add_words_exit  ; if (num <= 0) then exit
+	LDO     128(%sp),%sp        ; bump stack
+
+	;
+	; The loop is unrolled twice, so if there is only 1 number
+    ; then go straight to the cleanup code.
+	;
+	CMPIB,= 1,num,bn_mul_add_words_single_top
+	FLDD    -184(%sp),fw        ; (-56-128) load up w into fw (fw_h/fw_l)
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
+    ; two 32-bit mutiplies can be issued per cycle.
+    ; 
+bn_mul_add_words_unroll2
+
+    FLDD    0(a_ptr),t_float_0       ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    FLDD    8(a_ptr),t_float_1       ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    LDD     0(r_ptr),rp_val          ; rp[0]
+    LDD     8(r_ptr),rp_val_1        ; rp[1]
+
+    XMPYU   fht_0,fw_l,fm1           ; m1[0] = fht_0*fw_l
+    XMPYU   fht_1,fw_l,fm1_1         ; m1[1] = fht_1*fw_l
+    FSTD    fm1,-16(%sp)             ; -16(sp) = m1[0]
+    FSTD    fm1_1,-48(%sp)           ; -48(sp) = m1[1]
+
+    XMPYU   flt_0,fw_h,fm            ; m[0] = flt_0*fw_h
+    XMPYU   flt_1,fw_h,fm_1          ; m[1] = flt_1*fw_h
+    FSTD    fm,-8(%sp)               ; -8(sp) = m[0]
+    FSTD    fm_1,-40(%sp)            ; -40(sp) = m[1]
+
+    XMPYU   fht_0,fw_h,ht_temp       ; ht_temp   = fht_0*fw_h
+    XMPYU   fht_1,fw_h,ht_temp_1     ; ht_temp_1 = fht_1*fw_h
+    FSTD    ht_temp,-24(%sp)         ; -24(sp)   = ht_temp
+    FSTD    ht_temp_1,-56(%sp)       ; -56(sp)   = ht_temp_1
+
+    XMPYU   flt_0,fw_l,lt_temp       ; lt_temp = lt*fw_l
+    XMPYU   flt_1,fw_l,lt_temp_1     ; lt_temp = lt*fw_l
+    FSTD    lt_temp,-32(%sp)         ; -32(sp) = lt_temp 
+    FSTD    lt_temp_1,-64(%sp)       ; -64(sp) = lt_temp_1 
+
+    LDD     -8(%sp),m_0              ; m[0] 
+    LDD     -40(%sp),m_1             ; m[1]
+    LDD     -16(%sp),m1_0            ; m1[0]
+    LDD     -48(%sp),m1_1            ; m1[1]
+
+    LDD     -24(%sp),ht_0            ; ht[0]
+    LDD     -56(%sp),ht_1            ; ht[1]
+    ADD,L   m1_0,m_0,tmp_0           ; tmp_0 = m[0] + m1[0]; 
+    ADD,L   m1_1,m_1,tmp_1           ; tmp_1 = m[1] + m1[1]; 
+
+    LDD     -32(%sp),lt_0            
+    LDD     -64(%sp),lt_1            
+    CMPCLR,*>>= tmp_0,m1_0, %r0      ; if (m[0] < m1[0])
+    ADD,L   ht_0,top_overflow,ht_0   ; ht[0] += (1<<32)
+
+    CMPCLR,*>>= tmp_1,m1_1,%r0       ; if (m[1] < m1[1])
+    ADD,L   ht_1,top_overflow,ht_1   ; ht[1] += (1<<32)
+    EXTRD,U tmp_0,31,32,m_0          ; m[0]>>32  
+    DEPD,Z  tmp_0,31,32,m1_0         ; m1[0] = m[0]<<32 
+
+    EXTRD,U tmp_1,31,32,m_1          ; m[1]>>32  
+    DEPD,Z  tmp_1,31,32,m1_1         ; m1[1] = m[1]<<32 
+    ADD,L   ht_0,m_0,ht_0            ; ht[0]+= (m[0]>>32)
+    ADD,L   ht_1,m_1,ht_1            ; ht[1]+= (m[1]>>32)
+
+    ADD     lt_0,m1_0,lt_0           ; lt[0] = lt[0]+m1[0];
+	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
+    ADD     lt_1,m1_1,lt_1           ; lt[1] = lt[1]+m1[1];
+    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++
+
+    ADD    %ret1,lt_0,lt_0           ; lt[0] = lt[0] + c;
+	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
+    ADD     lt_0,rp_val,lt_0         ; lt[0] = lt[0]+rp[0]
+    ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
+
+	LDO    -2(num),num               ; num = num - 2;
+    ADD     ht_0,lt_1,lt_1           ; lt[1] = lt[1] + ht_0 (c);
+    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++
+    STD     lt_0,0(r_ptr)            ; rp[0] = lt[0]
+
+    ADD     lt_1,rp_val_1,lt_1       ; lt[1] = lt[1]+rp[1]
+    ADD,DC  ht_1,%r0,%ret1           ; ht[1]++
+    LDO     16(a_ptr),a_ptr          ; a_ptr += 2
+
+    STD     lt_1,8(r_ptr)            ; rp[1] = lt[1]
+	CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
+    LDO     16(r_ptr),r_ptr          ; r_ptr += 2
+
+    CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
+
+	;
+	; Top of loop aligned on 64-byte boundary
+	;
+bn_mul_add_words_single_top
+    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    LDD     0(r_ptr),rp_val           ; rp[0]
+    LDO     8(a_ptr),a_ptr            ; a_ptr++
+    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l
+    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
+    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
+    FSTD    fm,-8(%sp)                ; -8(sp) = m
+    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h
+    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
+    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
+    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
+
+    LDD     -8(%sp),m_0               
+    LDD    -16(%sp),m1_0              ; m1 = temp1 
+    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1; 
+    LDD     -24(%sp),ht_0             
+    LDD     -32(%sp),lt_0             
+
+    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1)
+    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
+
+    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
+    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
+
+    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
+    ADD     lt_0,m1_0,tmp_0           ; tmp_0 = lt+m1;
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+    ADD     %ret1,tmp_0,lt_0          ; lt = lt + c;
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+    ADD     lt_0,rp_val,lt_0          ; lt = lt+rp[0]
+    ADD,DC  ht_0,%r0,%ret1            ; ht++
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt
+
+bn_mul_add_words_exit
+    .EXIT
+	
+    EXTRD,U %ret1,31,32,%ret0         ; for 32-bit, return in ret0/ret1
+    LDD     -80(%sp),%r9              ; restore r9  
+    LDD     -88(%sp),%r8              ; restore r8  
+    LDD     -96(%sp),%r7              ; restore r7  
+    LDD     -104(%sp),%r6             ; restore r6  
+    LDD     -112(%sp),%r5             ; restore r5  
+    LDD     -120(%sp),%r4             ; restore r4  
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3             ; restore r3
+	.PROCEND	;in=23,24,25,26,29;out=28;
+
+;----------------------------------------------------------------------------
+;
+;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
+;
+; arg0 = rp
+; arg1 = ap
+; arg3 = num
+; w on stack at -56(sp)
+
+bn_mul_words
+	.proc
+	.callinfo frame=128
+    .entry
+	.EXPORT	bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3  
+    STD     %r4,8(%sp)          ; save r4  
+	NOP
+    STD     %r5,16(%sp)         ; save r5  
+
+    STD     %r6,24(%sp)         ; save r6  
+    STD     %r7,32(%sp)         ; save r7  
+    COPY    %r0,%ret1           ; return 0 by default
+    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32    
+
+    CMPIB,>= 0,num,bn_mul_words_exit
+	LDO     128(%sp),%sp    ; bump stack
+
+	;
+	; See if only 1 word to do, thus just do cleanup
+	;
+	CMPIB,= 1,num,bn_mul_words_single_top
+	FLDD    -184(%sp),fw        ; (-56-128) load up w into fw (fw_h/fw_l)
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
+    ; two 32-bit mutiplies can be issued per cycle.
+    ; 
+bn_mul_words_unroll2
+
+    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    FLDD    8(a_ptr),t_float_1        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    XMPYU   fht_0,fw_l,fm1            ; m1[0] = fht_0*fw_l
+    XMPYU   fht_1,fw_l,fm1_1          ; m1[1] = ht*fw_l
+
+    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
+    FSTD    fm1_1,-48(%sp)            ; -48(sp) = m1
+    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
+    XMPYU   flt_1,fw_h,fm_1           ; m = lt*fw_h
+
+    FSTD    fm,-8(%sp)                ; -8(sp) = m
+    FSTD    fm_1,-40(%sp)             ; -40(sp) = m
+    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = fht_0*fw_h
+    XMPYU   fht_1,fw_h,ht_temp_1      ; ht_temp = ht*fw_h
+
+    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
+    FSTD    ht_temp_1,-56(%sp)        ; -56(sp) = ht
+    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
+    XMPYU   flt_1,fw_l,lt_temp_1      ; lt_temp = lt*fw_l
+
+    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
+    FSTD    lt_temp_1,-64(%sp)        ; -64(sp) = lt 
+    LDD     -8(%sp),m_0               
+    LDD     -40(%sp),m_1              
+
+    LDD    -16(%sp),m1_0              
+    LDD    -48(%sp),m1_1              
+    LDD     -24(%sp),ht_0             
+    LDD     -56(%sp),ht_1             
+
+    ADD,L   m1_0,m_0,tmp_0            ; tmp_0 = m + m1; 
+    ADD,L   m1_1,m_1,tmp_1            ; tmp_1 = m + m1; 
+    LDD     -32(%sp),lt_0             
+    LDD     -64(%sp),lt_1             
+
+    CMPCLR,*>>= tmp_0,m1_0, %r0       ; if (m < m1)
+    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
+    CMPCLR,*>>= tmp_1,m1_1,%r0        ; if (m < m1)
+    ADD,L   ht_1,top_overflow,ht_1    ; ht += (1<<32)
+
+    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
+    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
+    EXTRD,U tmp_1,31,32,m_1           ; m>>32  
+    DEPD,Z  tmp_1,31,32,m1_1          ; m1 = m<<32 
+
+    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
+    ADD,L   ht_1,m_1,ht_1             ; ht+= (m>>32)
+    ADD     lt_0,m1_0,lt_0            ; lt = lt+m1;
+	ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    ADD     lt_1,m1_1,lt_1            ; lt = lt+m1;
+    ADD,DC  ht_1,%r0,ht_1             ; ht++
+    ADD    %ret1,lt_0,lt_0            ; lt = lt + c (ret1);
+	ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    ADD     ht_0,lt_1,lt_1            ; lt = lt + c (ht_0)
+    ADD,DC  ht_1,%r0,ht_1             ; ht++
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt
+    STD     lt_1,8(r_ptr)             ; rp[1] = lt
+
+	COPY    ht_1,%ret1                ; carry = ht
+	LDO    -2(num),num                ; num = num - 2;
+    LDO     16(a_ptr),a_ptr           ; ap += 2
+	CMPIB,<= 2,num,bn_mul_words_unroll2
+    LDO     16(r_ptr),r_ptr           ; rp++
+
+    CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
+
+	;
+	; Top of loop aligned on 64-byte boundary
+	;
+bn_mul_words_single_top
+    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+
+    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l
+    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
+    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
+    FSTD    fm,-8(%sp)                ; -8(sp) = m
+    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h
+    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
+    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
+    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
+
+    LDD     -8(%sp),m_0               
+    LDD    -16(%sp),m1_0              
+    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1; 
+    LDD     -24(%sp),ht_0             
+    LDD     -32(%sp),lt_0             
+
+    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1)
+    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
+
+    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
+    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
+
+    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
+    ADD     lt_0,m1_0,lt_0            ; lt= lt+m1;
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    ADD     %ret1,lt_0,lt_0           ; lt = lt + c;
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    COPY    ht_0,%ret1                ; copy carry
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt
+
+bn_mul_words_exit
+    .EXIT
+    EXTRD,U %ret1,31,32,%ret0           ; for 32-bit, return in ret0/ret1
+    LDD     -96(%sp),%r7              ; restore r7  
+    LDD     -104(%sp),%r6             ; restore r6  
+    LDD     -112(%sp),%r5             ; restore r5  
+    LDD     -120(%sp),%r4             ; restore r4  
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3             ; restore r3
+	.PROCEND	
+
+;----------------------------------------------------------------------------
+;
+;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
+;
+; arg0 = rp
+; arg1 = ap
+; arg2 = num
+;
+
+bn_sqr_words
+	.proc
+	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3  
+    STD     %r4,8(%sp)          ; save r4  
+	NOP
+    STD     %r5,16(%sp)         ; save r5  
+
+    CMPIB,>= 0,num,bn_sqr_words_exit
+	LDO     128(%sp),%sp       ; bump stack
+
+	;
+	; If only 1, the goto straight to cleanup
+	;
+	CMPIB,= 1,num,bn_sqr_words_single_top
+    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+
+bn_sqr_words_unroll2
+    FLDD    0(a_ptr),t_float_0        ; a[0]
+    FLDD    8(a_ptr),t_float_1        ; a[1]
+    XMPYU   fht_0,flt_0,fm            ; m[0]
+    XMPYU   fht_1,flt_1,fm_1          ; m[1]
+
+    FSTD    fm,-24(%sp)               ; store m[0]
+    FSTD    fm_1,-56(%sp)             ; store m[1]
+    XMPYU   flt_0,flt_0,lt_temp       ; lt[0]
+    XMPYU   flt_1,flt_1,lt_temp_1     ; lt[1]
+
+    FSTD    lt_temp,-16(%sp)          ; store lt[0]
+    FSTD    lt_temp_1,-48(%sp)        ; store lt[1]
+    XMPYU   fht_0,fht_0,ht_temp       ; ht[0]
+    XMPYU   fht_1,fht_1,ht_temp_1     ; ht[1]
+
+    FSTD    ht_temp,-8(%sp)           ; store ht[0]
+    FSTD    ht_temp_1,-40(%sp)        ; store ht[1]
+    LDD     -24(%sp),m_0             
+    LDD     -56(%sp),m_1              
+
+    AND     m_0,high_mask,tmp_0       ; m[0] & Mask
+    AND     m_1,high_mask,tmp_1       ; m[1] & Mask
+    DEPD,Z  m_0,30,31,m_0             ; m[0] << 32+1
+    DEPD,Z  m_1,30,31,m_1             ; m[1] << 32+1
+
+    LDD     -16(%sp),lt_0        
+    LDD     -48(%sp),lt_1        
+    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m[0]&Mask >> 32-1
+    EXTRD,U tmp_1,32,33,tmp_1         ; tmp_1 = m[1]&Mask >> 32-1
+
+    LDD     -8(%sp),ht_0            
+    LDD     -40(%sp),ht_1           
+    ADD,L   ht_0,tmp_0,ht_0           ; ht[0] += tmp_0
+    ADD,L   ht_1,tmp_1,ht_1           ; ht[1] += tmp_1
+
+    ADD     lt_0,m_0,lt_0             ; lt = lt+m
+    ADD,DC  ht_0,%r0,ht_0             ; ht[0]++
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt[0]
+    STD     ht_0,8(r_ptr)             ; rp[1] = ht[1]
+
+    ADD     lt_1,m_1,lt_1             ; lt = lt+m
+    ADD,DC  ht_1,%r0,ht_1             ; ht[1]++
+    STD     lt_1,16(r_ptr)            ; rp[2] = lt[1]
+    STD     ht_1,24(r_ptr)            ; rp[3] = ht[1]
+
+	LDO    -2(num),num                ; num = num - 2;
+    LDO     16(a_ptr),a_ptr           ; ap += 2
+	CMPIB,<= 2,num,bn_sqr_words_unroll2
+    LDO     32(r_ptr),r_ptr           ; rp += 4
+
+    CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
+
+	;
+	; Top of loop aligned on 64-byte boundary
+	;
+bn_sqr_words_single_top
+    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+
+    XMPYU   fht_0,flt_0,fm            ; m
+    FSTD    fm,-24(%sp)               ; store m
+
+    XMPYU   flt_0,flt_0,lt_temp       ; lt
+    FSTD    lt_temp,-16(%sp)          ; store lt
+
+    XMPYU   fht_0,fht_0,ht_temp       ; ht
+    FSTD    ht_temp,-8(%sp)           ; store ht
+
+    LDD     -24(%sp),m_0              ; load m
+    AND     m_0,high_mask,tmp_0       ; m & Mask
+    DEPD,Z  m_0,30,31,m_0             ; m << 32+1
+    LDD     -16(%sp),lt_0             ; lt
+
+    LDD     -8(%sp),ht_0              ; ht
+    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m&Mask >> 32-1
+    ADD     m_0,lt_0,lt_0             ; lt = lt+m
+    ADD,L   ht_0,tmp_0,ht_0           ; ht += tmp_0
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt
+    STD     ht_0,8(r_ptr)             ; rp[1] = ht
+
+bn_sqr_words_exit
+    .EXIT
+    LDD     -112(%sp),%r5       ; restore r5  
+    LDD     -120(%sp),%r4       ; restore r4  
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3 
+	.PROCEND	;in=23,24,25,26,29;out=28;
+
+
+;----------------------------------------------------------------------------
+;
+;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+;
+; arg0 = rp 
+; arg1 = ap
+; arg2 = bp 
+; arg3 = n
+
+t  .reg %r22
+b  .reg %r21
+l  .reg %r20
+
+bn_add_words
+	.proc
+    .entry
+	.callinfo
+	.EXPORT	bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+	.align 64
+
+    CMPIB,>= 0,n,bn_add_words_exit
+    COPY    %r0,%ret1           ; return 0 by default
+
+	;
+	; If 2 or more numbers do the loop
+	;
+	CMPIB,= 1,n,bn_add_words_single_top
+	NOP
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+bn_add_words_unroll2
+	LDD     0(a_ptr),t
+	LDD     0(b_ptr),b
+	ADD     t,%ret1,t                    ; t = t+c;
+	ADD,DC  %r0,%r0,%ret1                ; set c to carry
+	ADD     t,b,l                        ; l = t + b[0]
+	ADD,DC  %ret1,%r0,%ret1              ; c+= carry
+	STD     l,0(r_ptr)
+
+	LDD     8(a_ptr),t
+	LDD     8(b_ptr),b
+	ADD     t,%ret1,t                     ; t = t+c;
+	ADD,DC  %r0,%r0,%ret1                 ; set c to carry
+	ADD     t,b,l                         ; l = t + b[0]
+	ADD,DC  %ret1,%r0,%ret1               ; c+= carry
+	STD     l,8(r_ptr)
+
+	LDO     -2(n),n
+	LDO     16(a_ptr),a_ptr
+	LDO     16(b_ptr),b_ptr
+
+	CMPIB,<= 2,n,bn_add_words_unroll2
+	LDO     16(r_ptr),r_ptr
+
+    CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
+
+bn_add_words_single_top
+	LDD     0(a_ptr),t
+	LDD     0(b_ptr),b
+
+	ADD     t,%ret1,t                 ; t = t+c;
+	ADD,DC  %r0,%r0,%ret1             ; set c to carry (could use CMPCLR??)
+	ADD     t,b,l                     ; l = t + b[0]
+	ADD,DC  %ret1,%r0,%ret1           ; c+= carry
+	STD     l,0(r_ptr)
+
+bn_add_words_exit
+    .EXIT
+    BVE     (%rp)
+    EXTRD,U %ret1,31,32,%ret0           ; for 32-bit, return in ret0/ret1
+	.PROCEND	;in=23,24,25,26,29;out=28;
+
+;----------------------------------------------------------------------------
+;
+;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+;
+; arg0 = rp 
+; arg1 = ap
+; arg2 = bp 
+; arg3 = n
+
+t1       .reg %r22
+t2       .reg %r21
+sub_tmp1 .reg %r20
+sub_tmp2 .reg %r19
+
+
+bn_sub_words
+	.proc
+	.callinfo 
+	.EXPORT	bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+
+    CMPIB,>=  0,n,bn_sub_words_exit
+    COPY    %r0,%ret1           ; return 0 by default
+
+	;
+	; If 2 or more numbers do the loop
+	;
+	CMPIB,= 1,n,bn_sub_words_single_top
+	NOP
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+bn_sub_words_unroll2
+	LDD     0(a_ptr),t1
+	LDD     0(b_ptr),t2
+	SUB     t1,t2,sub_tmp1           ; t3 = t1-t2; 
+	SUB     sub_tmp1,%ret1,sub_tmp1  ; t3 = t3- c; 
+
+	CMPCLR,*>> t1,t2,sub_tmp2        ; clear if t1 > t2
+	LDO      1(%r0),sub_tmp2
+	
+	CMPCLR,*= t1,t2,%r0
+	COPY    sub_tmp2,%ret1
+	STD     sub_tmp1,0(r_ptr)
+
+	LDD     8(a_ptr),t1
+	LDD     8(b_ptr),t2
+	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2; 
+	SUB     sub_tmp1,%ret1,sub_tmp1   ; t3 = t3- c; 
+	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2
+	LDO      1(%r0),sub_tmp2
+	
+	CMPCLR,*= t1,t2,%r0
+	COPY    sub_tmp2,%ret1
+	STD     sub_tmp1,8(r_ptr)
+
+	LDO     -2(n),n
+	LDO     16(a_ptr),a_ptr
+	LDO     16(b_ptr),b_ptr
+
+	CMPIB,<= 2,n,bn_sub_words_unroll2
+	LDO     16(r_ptr),r_ptr
+
+    CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
+
+bn_sub_words_single_top
+	LDD     0(a_ptr),t1
+	LDD     0(b_ptr),t2
+	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2; 
+	SUB     sub_tmp1,%ret1,sub_tmp1   ; t3 = t3- c; 
+	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2
+	LDO      1(%r0),sub_tmp2
+	
+	CMPCLR,*= t1,t2,%r0
+	COPY    sub_tmp2,%ret1
+
+	STD     sub_tmp1,0(r_ptr)
+
+bn_sub_words_exit
+    .EXIT
+    BVE     (%rp)
+    EXTRD,U %ret1,31,32,%ret0           ; for 32-bit, return in ret0/ret1
+	.PROCEND	;in=23,24,25,26,29;out=28;
+
+;------------------------------------------------------------------------------
+;
+; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
+;
+; arg0 = h
+; arg1 = l
+; arg2 = d
+;
+; This is mainly just output from the HP C compiler.  
+;
+;------------------------------------------------------------------------------
+bn_div_words
+	.PROC
+	.EXPORT	bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN
+	.IMPORT	BN_num_bits_word,CODE
+	;--- not PIC	.IMPORT	__iob,DATA
+	;--- not PIC	.IMPORT	fprintf,CODE
+	.IMPORT	abort,CODE
+	.IMPORT	$$div2U,MILLICODE
+	.CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
+        .ENTRY
+        STW     %r2,-20(%r30)   ;offset 0x8ec
+        STW,MA  %r3,192(%r30)   ;offset 0x8f0
+        STW     %r4,-188(%r30)  ;offset 0x8f4
+        DEPD    %r5,31,32,%r6   ;offset 0x8f8
+        STD     %r6,-184(%r30)  ;offset 0x8fc
+        DEPD    %r7,31,32,%r8   ;offset 0x900
+        STD     %r8,-176(%r30)  ;offset 0x904
+        STW     %r9,-168(%r30)  ;offset 0x908
+        LDD     -248(%r30),%r3  ;offset 0x90c
+        COPY    %r26,%r4        ;offset 0x910
+        COPY    %r24,%r5        ;offset 0x914
+        DEPD    %r25,31,32,%r4  ;offset 0x918
+        CMPB,*<>        %r3,%r0,$0006000C       ;offset 0x91c
+        DEPD    %r23,31,32,%r5  ;offset 0x920
+        MOVIB,TR        -1,%r29,$00060002       ;offset 0x924
+        EXTRD,U %r29,31,32,%r28 ;offset 0x928
+$0006002A
+        LDO     -1(%r29),%r29   ;offset 0x92c
+        SUB     %r23,%r7,%r23   ;offset 0x930
+$00060024
+        SUB     %r4,%r31,%r25   ;offset 0x934
+        AND     %r25,%r19,%r26  ;offset 0x938
+        CMPB,*<>,N      %r0,%r26,$00060046      ;offset 0x93c
+        DEPD,Z  %r25,31,32,%r20 ;offset 0x940
+        OR      %r20,%r24,%r21  ;offset 0x944
+        CMPB,*<<,N      %r21,%r23,$0006002A     ;offset 0x948
+        SUB     %r31,%r2,%r31   ;offset 0x94c
+$00060046
+$0006002E
+        DEPD,Z  %r23,31,32,%r25 ;offset 0x950
+        EXTRD,U %r23,31,32,%r26 ;offset 0x954
+        AND     %r25,%r19,%r24  ;offset 0x958
+        ADD,L   %r31,%r26,%r31  ;offset 0x95c
+        CMPCLR,*>>=     %r5,%r24,%r0    ;offset 0x960
+        LDO     1(%r31),%r31    ;offset 0x964
+$00060032
+        CMPB,*<<=,N     %r31,%r4,$00060036      ;offset 0x968
+        LDO     -1(%r29),%r29   ;offset 0x96c
+        ADD,L   %r4,%r3,%r4     ;offset 0x970
+$00060036
+        ADDIB,=,N       -1,%r8,$D0      ;offset 0x974
+        SUB     %r5,%r24,%r28   ;offset 0x978
+$0006003A
+        SUB     %r4,%r31,%r24   ;offset 0x97c
+        SHRPD   %r24,%r28,32,%r4        ;offset 0x980
+        DEPD,Z  %r29,31,32,%r9  ;offset 0x984
+        DEPD,Z  %r28,31,32,%r5  ;offset 0x988
+$0006001C
+        EXTRD,U %r4,31,32,%r31  ;offset 0x98c
+        CMPB,*<>,N      %r31,%r2,$00060020      ;offset 0x990
+        MOVB,TR %r6,%r29,$D1    ;offset 0x994
+        STD     %r29,-152(%r30) ;offset 0x998
+$0006000C
+        EXTRD,U %r3,31,32,%r25  ;offset 0x99c
+        COPY    %r3,%r26        ;offset 0x9a0
+        EXTRD,U %r3,31,32,%r9   ;offset 0x9a4
+        EXTRD,U %r4,31,32,%r8   ;offset 0x9a8
+        .CALL   ARGW0=GR,ARGW1=GR,RTNVAL=GR     ;in=25,26;out=28;
+        B,L     BN_num_bits_word,%r2    ;offset 0x9ac
+        EXTRD,U %r5,31,32,%r7   ;offset 0x9b0
+        LDI     64,%r20 ;offset 0x9b4
+        DEPD    %r7,31,32,%r5   ;offset 0x9b8
+        DEPD    %r8,31,32,%r4   ;offset 0x9bc
+        DEPD    %r9,31,32,%r3   ;offset 0x9c0
+        CMPB,=  %r28,%r20,$00060012     ;offset 0x9c4
+        COPY    %r28,%r24       ;offset 0x9c8
+        MTSARCM %r24    ;offset 0x9cc
+        DEPDI,Z -1,%sar,1,%r19  ;offset 0x9d0
+        CMPB,*>>,N      %r4,%r19,$D2    ;offset 0x9d4
+$00060012
+        SUBI    64,%r24,%r31    ;offset 0x9d8
+        CMPCLR,*<<      %r4,%r3,%r0     ;offset 0x9dc
+        SUB     %r4,%r3,%r4     ;offset 0x9e0
+$00060016
+        CMPB,=  %r31,%r0,$0006001A      ;offset 0x9e4
+        COPY    %r0,%r9 ;offset 0x9e8
+        MTSARCM %r31    ;offset 0x9ec
+        DEPD,Z  %r3,%sar,64,%r3 ;offset 0x9f0
+        SUBI    64,%r31,%r26    ;offset 0x9f4
+        MTSAR   %r26    ;offset 0x9f8
+        SHRPD   %r4,%r5,%sar,%r4        ;offset 0x9fc
+        MTSARCM %r31    ;offset 0xa00
+        DEPD,Z  %r5,%sar,64,%r5 ;offset 0xa04
+$0006001A
+        DEPDI,Z -1,31,32,%r19   ;offset 0xa08
+        AND     %r3,%r19,%r29   ;offset 0xa0c
+        EXTRD,U %r29,31,32,%r2  ;offset 0xa10
+        DEPDI,Z -1,63,32,%r6    ;offset 0xa14
+        MOVIB,TR        2,%r8,$0006001C ;offset 0xa18
+        EXTRD,U %r3,63,32,%r7   ;offset 0xa1c
+$D2
+        ;--- not PIC	ADDIL   LR'__iob-$global$,%r27,%r1      ;offset 0xa20
+        ;--- not PIC	LDIL    LR'C$7,%r21     ;offset 0xa24
+        ;--- not PIC	LDO     RR'__iob-$global$+32(%r1),%r26  ;offset 0xa28
+        ;--- not PIC	.CALL   ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR    ;in=24,25,26;out=28;
+        ;--- not PIC	B,L     fprintf,%r2     ;offset 0xa2c
+        ;--- not PIC	LDO     RR'C$7(%r21),%r25       ;offset 0xa30
+        .CALL           ;
+        B,L     abort,%r2       ;offset 0xa34
+        NOP             ;offset 0xa38
+        B       $D3     ;offset 0xa3c
+        LDW     -212(%r30),%r2  ;offset 0xa40
+$00060020
+        COPY    %r4,%r26        ;offset 0xa44
+        EXTRD,U %r4,31,32,%r25  ;offset 0xa48
+        COPY    %r2,%r24        ;offset 0xa4c
+        .CALL   ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
+        B,L     $$div2U,%r31    ;offset 0xa50
+        EXTRD,U %r2,31,32,%r23  ;offset 0xa54
+        DEPD    %r28,31,32,%r29 ;offset 0xa58
+$00060022
+        STD     %r29,-152(%r30) ;offset 0xa5c
+$D1
+        AND     %r5,%r19,%r24   ;offset 0xa60
+        EXTRD,U %r24,31,32,%r24 ;offset 0xa64
+        STW     %r2,-160(%r30)  ;offset 0xa68
+        STW     %r7,-128(%r30)  ;offset 0xa6c
+        FLDD    -152(%r30),%fr4 ;offset 0xa70
+        FLDD    -152(%r30),%fr7 ;offset 0xa74
+        FLDW    -160(%r30),%fr8L        ;offset 0xa78
+        FLDW    -128(%r30),%fr5L        ;offset 0xa7c
+        XMPYU   %fr8L,%fr7L,%fr10       ;offset 0xa80
+        FSTD    %fr10,-136(%r30)        ;offset 0xa84
+        XMPYU   %fr8L,%fr7R,%fr22       ;offset 0xa88
+        FSTD    %fr22,-144(%r30)        ;offset 0xa8c
+        XMPYU   %fr5L,%fr4L,%fr11       ;offset 0xa90
+        XMPYU   %fr5L,%fr4R,%fr23       ;offset 0xa94
+        FSTD    %fr11,-112(%r30)        ;offset 0xa98
+        FSTD    %fr23,-120(%r30)        ;offset 0xa9c
+        LDD     -136(%r30),%r28 ;offset 0xaa0
+        DEPD,Z  %r28,31,32,%r31 ;offset 0xaa4
+        LDD     -144(%r30),%r20 ;offset 0xaa8
+        ADD,L   %r20,%r31,%r31  ;offset 0xaac
+        LDD     -112(%r30),%r22 ;offset 0xab0
+        DEPD,Z  %r22,31,32,%r22 ;offset 0xab4
+        LDD     -120(%r30),%r21 ;offset 0xab8
+        B       $00060024       ;offset 0xabc
+        ADD,L   %r21,%r22,%r23  ;offset 0xac0
+$D0
+        OR      %r9,%r29,%r29   ;offset 0xac4
+$00060040
+        EXTRD,U %r29,31,32,%r28 ;offset 0xac8
+$00060002
+$L2
+        LDW     -212(%r30),%r2  ;offset 0xacc
+$D3
+        LDW     -168(%r30),%r9  ;offset 0xad0
+        LDD     -176(%r30),%r8  ;offset 0xad4
+        EXTRD,U %r8,31,32,%r7   ;offset 0xad8
+        LDD     -184(%r30),%r6  ;offset 0xadc
+        EXTRD,U %r6,31,32,%r5   ;offset 0xae0
+        LDW     -188(%r30),%r4  ;offset 0xae4
+        BVE     (%r2)   ;offset 0xae8
+        .EXIT
+        LDW,MB  -192(%r30),%r3  ;offset 0xaec
+	.PROCEND	;in=23,25;out=28,29;fpin=105,107;
+
+
+
+
+;----------------------------------------------------------------------------
+;
+; Registers to hold 64-bit values to manipulate.  The "L" part
+; of the register corresponds to the upper 32-bits, while the "R"
+; part corresponds to the lower 32-bits
+; 
+; Note, that when using b6 and b7, the code must save these before
+; using them because they are callee save registers 
+; 
+;
+; Floating point registers to use to save values that
+; are manipulated.  These don't collide with ftemp1-6 and
+; are all caller save registers
+;
+a0        .reg %fr22
+a0L       .reg %fr22L
+a0R       .reg %fr22R
+
+a1        .reg %fr23
+a1L       .reg %fr23L
+a1R       .reg %fr23R
+
+a2        .reg %fr24
+a2L       .reg %fr24L
+a2R       .reg %fr24R
+
+a3        .reg %fr25
+a3L       .reg %fr25L
+a3R       .reg %fr25R
+
+a4        .reg %fr26
+a4L       .reg %fr26L
+a4R       .reg %fr26R
+
+a5        .reg %fr27
+a5L       .reg %fr27L
+a5R       .reg %fr27R
+
+a6        .reg %fr28
+a6L       .reg %fr28L
+a6R       .reg %fr28R
+
+a7        .reg %fr29
+a7L       .reg %fr29L
+a7R       .reg %fr29R
+
+b0        .reg %fr30
+b0L       .reg %fr30L
+b0R       .reg %fr30R
+
+b1        .reg %fr31
+b1L       .reg %fr31L
+b1R       .reg %fr31R
+
+;
+; Temporary floating point variables, these are all caller save
+; registers
+;
+ftemp1    .reg %fr4
+ftemp2    .reg %fr5
+ftemp3    .reg %fr6
+ftemp4    .reg %fr7
+
+;
+; The B set of registers when used.
+;
+
+b2        .reg %fr8
+b2L       .reg %fr8L
+b2R       .reg %fr8R
+
+b3        .reg %fr9
+b3L       .reg %fr9L
+b3R       .reg %fr9R
+
+b4        .reg %fr10
+b4L       .reg %fr10L
+b4R       .reg %fr10R
+
+b5        .reg %fr11
+b5L       .reg %fr11L
+b5R       .reg %fr11R
+
+b6        .reg %fr12
+b6L       .reg %fr12L
+b6R       .reg %fr12R
+
+b7        .reg %fr13
+b7L       .reg %fr13L
+b7R       .reg %fr13R
+
+c1           .reg %r21   ; only reg
+temp1        .reg %r20   ; only reg
+temp2        .reg %r19   ; only reg
+temp3        .reg %r31   ; only reg
+
+m1           .reg %r28   
+c2           .reg %r23   
+high_one     .reg %r1
+ht           .reg %r6
+lt           .reg %r5
+m            .reg %r4
+c3           .reg %r3
+
+SQR_ADD_C  .macro  A0L,A0R,C1,C2,C3
+    XMPYU   A0L,A0R,ftemp1       ; m
+    FSTD    ftemp1,-24(%sp)      ; store m
+
+    XMPYU   A0R,A0R,ftemp2       ; lt
+    FSTD    ftemp2,-16(%sp)      ; store lt
+
+    XMPYU   A0L,A0L,ftemp3       ; ht
+    FSTD    ftemp3,-8(%sp)       ; store ht
+
+    LDD     -24(%sp),m           ; load m
+    AND     m,high_mask,temp2    ; m & Mask
+    DEPD,Z  m,30,31,temp3        ; m << 32+1
+    LDD     -16(%sp),lt          ; lt
+
+    LDD     -8(%sp),ht           ; ht
+    EXTRD,U temp2,32,33,temp1    ; temp1 = m&Mask >> 32-1
+    ADD     temp3,lt,lt          ; lt = lt+m
+    ADD,L   ht,temp1,ht          ; ht += temp1
+    ADD,DC  ht,%r0,ht            ; ht++
+
+    ADD     C1,lt,C1             ; c1=c1+lt
+    ADD,DC  ht,%r0,ht            ; ht++
+
+    ADD     C2,ht,C2             ; c2=c2+ht
+    ADD,DC  C3,%r0,C3            ; c3++
+.endm
+
+SQR_ADD_C2 .macro  A0L,A0R,A1L,A1R,C1,C2,C3
+    XMPYU   A0L,A1R,ftemp1          ; m1 = bl*ht
+    FSTD    ftemp1,-16(%sp)         ;
+    XMPYU   A0R,A1L,ftemp2          ; m = bh*lt
+    FSTD    ftemp2,-8(%sp)          ;
+    XMPYU   A0R,A1R,ftemp3          ; lt = bl*lt
+    FSTD    ftemp3,-32(%sp)
+    XMPYU   A0L,A1L,ftemp4          ; ht = bh*ht
+    FSTD    ftemp4,-24(%sp)         ;
+
+    LDD     -8(%sp),m               ; r21 = m
+    LDD     -16(%sp),m1             ; r19 = m1
+    ADD,L   m,m1,m                  ; m+m1
+
+    DEPD,Z  m,31,32,temp3           ; (m+m1<<32)
+    LDD     -24(%sp),ht             ; r24 = ht
+
+    CMPCLR,*>>= m,m1,%r0            ; if (m < m1)
+    ADD,L   ht,high_one,ht          ; ht+=high_one
+
+    EXTRD,U m,31,32,temp1           ; m >> 32
+    LDD     -32(%sp),lt             ; lt
+    ADD,L   ht,temp1,ht             ; ht+= m>>32
+    ADD     lt,temp3,lt             ; lt = lt+m1
+    ADD,DC  ht,%r0,ht               ; ht++
+
+    ADD     ht,ht,ht                ; ht=ht+ht;
+    ADD,DC  C3,%r0,C3               ; add in carry (c3++)
+
+    ADD     lt,lt,lt                ; lt=lt+lt;
+    ADD,DC  ht,%r0,ht               ; add in carry (ht++)
+
+    ADD     C1,lt,C1                ; c1=c1+lt
+    ADD,DC,*NUV ht,%r0,ht           ; add in carry (ht++)
+    LDO     1(C3),C3              ; bump c3 if overflow,nullify otherwise
+
+    ADD     C2,ht,C2                ; c2 = c2 + ht
+    ADD,DC  C3,%r0,C3             ; add in carry (c3++)
+.endm
+
+;
+;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
+; arg0 = r_ptr
+; arg1 = a_ptr
+;
+
+bn_sqr_comba8
+	.PROC
+	.CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .ENTRY
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3
+    STD     %r4,8(%sp)          ; save r4
+    STD     %r5,16(%sp)         ; save r5
+    STD     %r6,24(%sp)         ; save r6
+
+	;
+	; Zero out carries
+	;
+	COPY     %r0,c1
+	COPY     %r0,c2
+	COPY     %r0,c3
+
+	LDO      128(%sp),%sp       ; bump stack
+    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
+    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
+
+	;
+	; Load up all of the values we are going to use
+	;
+    FLDD     0(a_ptr),a0       
+    FLDD     8(a_ptr),a1       
+    FLDD    16(a_ptr),a2       
+    FLDD    24(a_ptr),a3       
+    FLDD    32(a_ptr),a4       
+    FLDD    40(a_ptr),a5       
+    FLDD    48(a_ptr),a6       
+    FLDD    56(a_ptr),a7       
+
+	SQR_ADD_C a0L,a0R,c1,c2,c3
+	STD     c1,0(r_ptr)          ; r[0] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
+	STD     c2,8(r_ptr)          ; r[1] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C a1L,a1R,c3,c1,c2
+	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
+	STD     c3,16(r_ptr)            ; r[2] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
+	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
+	STD     c1,24(r_ptr)           ; r[3] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C a2L,a2R,c2,c3,c1
+	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
+	SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
+	STD     c2,32(r_ptr)          ; r[4] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
+	SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
+	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
+	STD     c3,40(r_ptr)          ; r[5] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C a3L,a3R,c1,c2,c3
+	SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
+	SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
+	SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
+	STD     c1,48(r_ptr)          ; r[6] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
+	SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
+	SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
+	SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
+	STD     c2,56(r_ptr)          ; r[7] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C a4L,a4R,c3,c1,c2
+	SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
+	SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
+	SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
+	STD     c3,64(r_ptr)          ; r[8] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
+	SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
+	SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
+	STD     c1,72(r_ptr)          ; r[9] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C a5L,a5R,c2,c3,c1
+	SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
+	SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
+	STD     c2,80(r_ptr)          ; r[10] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
+	SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
+	STD     c3,88(r_ptr)          ; r[11] = c3;
+	COPY    %r0,c3
+	
+	SQR_ADD_C a6L,a6R,c1,c2,c3
+	SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
+	STD     c1,96(r_ptr)          ; r[12] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
+	STD     c2,104(r_ptr)         ; r[13] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C a7L,a7R,c3,c1,c2
+	STD     c3, 112(r_ptr)       ; r[14] = c3
+	STD     c1, 120(r_ptr)       ; r[15] = c1
+
+    .EXIT
+    LDD     -104(%sp),%r6        ; restore r6
+    LDD     -112(%sp),%r5        ; restore r5
+    LDD     -120(%sp),%r4        ; restore r4
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3
+
+	.PROCEND	
+
+;-----------------------------------------------------------------------------
+;
+;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
+; arg0 = r_ptr
+; arg1 = a_ptr
+;
+
+bn_sqr_comba4
+	.proc
+	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+    STD     %r3,0(%sp)          ; save r3
+    STD     %r4,8(%sp)          ; save r4
+    STD     %r5,16(%sp)         ; save r5
+    STD     %r6,24(%sp)         ; save r6
+
+	;
+	; Zero out carries
+	;
+	COPY     %r0,c1
+	COPY     %r0,c2
+	COPY     %r0,c3
+
+	LDO      128(%sp),%sp       ; bump stack
+    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
+    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
+
+	;
+	; Load up all of the values we are going to use
+	;
+    FLDD     0(a_ptr),a0       
+    FLDD     8(a_ptr),a1       
+    FLDD    16(a_ptr),a2       
+    FLDD    24(a_ptr),a3       
+    FLDD    32(a_ptr),a4       
+    FLDD    40(a_ptr),a5       
+    FLDD    48(a_ptr),a6       
+    FLDD    56(a_ptr),a7       
+
+	SQR_ADD_C a0L,a0R,c1,c2,c3
+
+	STD     c1,0(r_ptr)          ; r[0] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
+
+	STD     c2,8(r_ptr)          ; r[1] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C a1L,a1R,c3,c1,c2
+	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
+
+	STD     c3,16(r_ptr)            ; r[2] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
+	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
+
+	STD     c1,24(r_ptr)           ; r[3] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C a2L,a2R,c2,c3,c1
+	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
+
+	STD     c2,32(r_ptr)           ; r[4] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
+	STD     c3,40(r_ptr)           ; r[5] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C a3L,a3R,c1,c2,c3
+	STD     c1,48(r_ptr)           ; r[6] = c1;
+	STD     c2,56(r_ptr)           ; r[7] = c2;
+
+    .EXIT
+    LDD     -104(%sp),%r6        ; restore r6
+    LDD     -112(%sp),%r5        ; restore r5
+    LDD     -120(%sp),%r4        ; restore r4
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3
+
+	.PROCEND	
+
+
+;---------------------------------------------------------------------------
+
+MUL_ADD_C  .macro  A0L,A0R,B0L,B0R,C1,C2,C3
+    XMPYU   A0L,B0R,ftemp1        ; m1 = bl*ht
+    FSTD    ftemp1,-16(%sp)       ;
+    XMPYU   A0R,B0L,ftemp2        ; m = bh*lt
+    FSTD    ftemp2,-8(%sp)        ;
+    XMPYU   A0R,B0R,ftemp3        ; lt = bl*lt
+    FSTD    ftemp3,-32(%sp)
+    XMPYU   A0L,B0L,ftemp4        ; ht = bh*ht
+    FSTD    ftemp4,-24(%sp)       ;
+
+    LDD     -8(%sp),m             ; r21 = m
+    LDD     -16(%sp),m1           ; r19 = m1
+    ADD,L   m,m1,m                ; m+m1
+
+    DEPD,Z  m,31,32,temp3         ; (m+m1<<32)
+    LDD     -24(%sp),ht           ; r24 = ht
+
+    CMPCLR,*>>= m,m1,%r0          ; if (m < m1)
+    ADD,L   ht,high_one,ht        ; ht+=high_one
+
+    EXTRD,U m,31,32,temp1         ; m >> 32
+    LDD     -32(%sp),lt           ; lt
+    ADD,L   ht,temp1,ht           ; ht+= m>>32
+    ADD     lt,temp3,lt           ; lt = lt+m1
+    ADD,DC  ht,%r0,ht             ; ht++
+
+    ADD     C1,lt,C1              ; c1=c1+lt
+    ADD,DC  ht,%r0,ht             ; bump c3 if overflow,nullify otherwise
+
+    ADD     C2,ht,C2              ; c2 = c2 + ht
+    ADD,DC  C3,%r0,C3             ; add in carry (c3++)
+.endm
+
+
+;
+;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+; arg0 = r_ptr
+; arg1 = a_ptr
+; arg2 = b_ptr
+;
+
+bn_mul_comba8
+	.proc
+	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3
+    STD     %r4,8(%sp)          ; save r4
+    STD     %r5,16(%sp)         ; save r5
+    STD     %r6,24(%sp)         ; save r6
+    FSTD    %fr12,32(%sp)       ; save r6
+    FSTD    %fr13,40(%sp)       ; save r7
+
+	;
+	; Zero out carries
+	;
+	COPY     %r0,c1
+	COPY     %r0,c2
+	COPY     %r0,c3
+
+	LDO      128(%sp),%sp       ; bump stack
+    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
+
+	;
+	; Load up all of the values we are going to use
+	;
+    FLDD      0(a_ptr),a0       
+    FLDD      8(a_ptr),a1       
+    FLDD     16(a_ptr),a2       
+    FLDD     24(a_ptr),a3       
+    FLDD     32(a_ptr),a4       
+    FLDD     40(a_ptr),a5       
+    FLDD     48(a_ptr),a6       
+    FLDD     56(a_ptr),a7       
+
+    FLDD      0(b_ptr),b0       
+    FLDD      8(b_ptr),b1       
+    FLDD     16(b_ptr),b2       
+    FLDD     24(b_ptr),b3       
+    FLDD     32(b_ptr),b4       
+    FLDD     40(b_ptr),b5       
+    FLDD     48(b_ptr),b6       
+    FLDD     56(b_ptr),b7       
+
+	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
+	STD       c1,0(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
+	STD       c2,8(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
+	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
+	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
+	STD       c3,16(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
+	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
+	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
+	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
+	STD       c1,24(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
+	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
+	MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
+	STD       c2,32(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
+	MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
+	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
+	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
+	MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
+	MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
+	STD       c3,40(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
+	MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
+	MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
+	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
+	MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
+	MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
+	MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
+	STD       c1,48(r_ptr)
+	COPY      %r0,c1
+	
+	MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
+	MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
+	MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
+	MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
+	MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
+	MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
+	STD       c2,56(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
+	MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
+	MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
+	MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
+	MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
+	MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
+	MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
+	STD       c3,64(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
+	MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
+	MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
+	MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
+	MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
+	MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
+	STD       c1,72(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
+	MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
+	MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
+	MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
+	MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
+	STD       c2,80(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
+	MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
+	MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
+	MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
+	STD       c3,88(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
+	MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
+	MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
+	STD       c1,96(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
+	MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
+	STD       c2,104(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
+	STD       c3,112(r_ptr)
+	STD       c1,120(r_ptr)
+
+    .EXIT
+    FLDD    -88(%sp),%fr13 
+    FLDD    -96(%sp),%fr12 
+    LDD     -104(%sp),%r6        ; restore r6
+    LDD     -112(%sp),%r5        ; restore r5
+    LDD     -120(%sp),%r4        ; restore r4
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3
+
+	.PROCEND	
+
+;-----------------------------------------------------------------------------
+;
+;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+; arg0 = r_ptr
+; arg1 = a_ptr
+; arg2 = b_ptr
+;
+
+bn_mul_comba4
+	.proc
+	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3
+    STD     %r4,8(%sp)          ; save r4
+    STD     %r5,16(%sp)         ; save r5
+    STD     %r6,24(%sp)         ; save r6
+    FSTD    %fr12,32(%sp)       ; save r6
+    FSTD    %fr13,40(%sp)       ; save r7
+
+	;
+	; Zero out carries
+	;
+	COPY     %r0,c1
+	COPY     %r0,c2
+	COPY     %r0,c3
+
+	LDO      128(%sp),%sp       ; bump stack
+    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
+
+	;
+	; Load up all of the values we are going to use
+	;
+    FLDD      0(a_ptr),a0       
+    FLDD      8(a_ptr),a1       
+    FLDD     16(a_ptr),a2       
+    FLDD     24(a_ptr),a3       
+
+    FLDD      0(b_ptr),b0       
+    FLDD      8(b_ptr),b1       
+    FLDD     16(b_ptr),b2       
+    FLDD     24(b_ptr),b3       
+
+	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
+	STD       c1,0(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
+	STD       c2,8(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
+	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
+	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
+	STD       c3,16(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
+	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
+	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
+	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
+	STD       c1,24(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
+	STD       c2,32(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
+	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
+	STD       c3,40(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
+	STD       c1,48(r_ptr)
+	STD       c2,56(r_ptr)
+
+    .EXIT
+    FLDD    -88(%sp),%fr13 
+    FLDD    -96(%sp),%fr12 
+    LDD     -104(%sp),%r6        ; restore r6
+    LDD     -112(%sp),%r5        ; restore r5
+    LDD     -120(%sp),%r4        ; restore r4
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3
+
+	.PROCEND	
+
+
+;--- not PIC	.SPACE	$TEXT$
+;--- not PIC	.SUBSPA	$CODE$
+;--- not PIC	.SPACE	$PRIVATE$,SORT=16
+;--- not PIC	.IMPORT	$global$,DATA
+;--- not PIC	.SPACE	$TEXT$
+;--- not PIC	.SUBSPA	$CODE$
+;--- not PIC	.SUBSPA	$LIT$,ACCESS=0x2c
+;--- not PIC	C$7
+;--- not PIC	.ALIGN	8
+;--- not PIC	.STRINGZ	"Division would overflow (%d)\n"
+	.END
diff --git a/src/third_party/openssl/openssl/crypto/bn/asm/pa-risc2W.S b/src/third_party/openssl/openssl/crypto/bn/asm/pa-risc2W.S
new file mode 100644
index 0000000..a995457
--- /dev/null
+++ b/src/third_party/openssl/openssl/crypto/bn/asm/pa-risc2W.S
@@ -0,0 +1,1605 @@
+;
+; PA-RISC 64-bit implementation of bn_asm code
+;
+; This code is approximately 2x faster than the C version
+; for RSA/DSA.
+;
+; See http://devresource.hp.com/  for more details on the PA-RISC
+; architecture.  Also see the book "PA-RISC 2.0 Architecture"
+; by Gerry Kane for information on the instruction set architecture.
+;
+; Code written by Chris Ruemmler (with some help from the HP C
+; compiler).
+;
+; The code compiles with HP's assembler
+;
+
+	.level	2.0W
+	.space	$TEXT$
+	.subspa	$CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY
+
+;
+; Global Register definitions used for the routines.
+;
+; Some information about HP's runtime architecture for 64-bits.
+;
+; "Caller save" means the calling function must save the register
+; if it wants the register to be preserved.
+; "Callee save" means if a function uses the register, it must save
+; the value before using it.
+;
+; For the floating point registers 
+;
+;    "caller save" registers: fr4-fr11, fr22-fr31
+;    "callee save" registers: fr12-fr21
+;    "special" registers: fr0-fr3 (status and exception registers)
+;
+; For the integer registers
+;     value zero             :  r0
+;     "caller save" registers: r1,r19-r26
+;     "callee save" registers: r3-r18
+;     return register        :  r2  (rp)
+;     return values          ; r28  (ret0,ret1)
+;     Stack pointer          ; r30  (sp) 
+;     global data pointer    ; r27  (dp)
+;     argument pointer       ; r29  (ap)
+;     millicode return ptr   ; r31  (also a caller save register)
+
+
+;
+; Arguments to the routines
+;
+r_ptr       .reg %r26
+a_ptr       .reg %r25
+b_ptr       .reg %r24
+num         .reg %r24
+w           .reg %r23
+n           .reg %r23
+
+
+;
+; Globals used in some routines
+;
+
+top_overflow .reg %r29
+high_mask    .reg %r22    ; value 0xffffffff80000000L
+
+
+;------------------------------------------------------------------------------
+;
+; bn_mul_add_words
+;
+;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, 
+;								int num, BN_ULONG w)
+;
+; arg0 = r_ptr
+; arg1 = a_ptr
+; arg2 = num
+; arg3 = w
+;
+; Local register definitions
+;
+
+fm1          .reg %fr22
+fm           .reg %fr23
+ht_temp      .reg %fr24
+ht_temp_1    .reg %fr25
+lt_temp      .reg %fr26
+lt_temp_1    .reg %fr27
+fm1_1        .reg %fr28
+fm_1         .reg %fr29
+
+fw_h         .reg %fr7L
+fw_l         .reg %fr7R
+fw           .reg %fr7
+
+fht_0        .reg %fr8L
+flt_0        .reg %fr8R
+t_float_0    .reg %fr8
+
+fht_1        .reg %fr9L
+flt_1        .reg %fr9R
+t_float_1    .reg %fr9
+
+tmp_0        .reg %r31
+tmp_1        .reg %r21
+m_0          .reg %r20 
+m_1          .reg %r19 
+ht_0         .reg %r1  
+ht_1         .reg %r3
+lt_0         .reg %r4
+lt_1         .reg %r5
+m1_0         .reg %r6 
+m1_1         .reg %r7 
+rp_val       .reg %r8
+rp_val_1     .reg %r9
+
+bn_mul_add_words
+	.export	bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN
+	.proc
+	.callinfo frame=128
+    .entry
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3  
+    STD     %r4,8(%sp)          ; save r4  
+	NOP                         ; Needed to make the loop 16-byte aligned
+	NOP                         ; Needed to make the loop 16-byte aligned
+
+    STD     %r5,16(%sp)         ; save r5  
+    STD     %r6,24(%sp)         ; save r6  
+    STD     %r7,32(%sp)         ; save r7  
+    STD     %r8,40(%sp)         ; save r8  
+
+    STD     %r9,48(%sp)         ; save r9  
+    COPY    %r0,%ret0           ; return 0 by default
+    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32    
+	STD     w,56(%sp)           ; store w on stack
+
+    CMPIB,>= 0,num,bn_mul_add_words_exit  ; if (num <= 0) then exit
+	LDO     128(%sp),%sp       ; bump stack
+
+	;
+	; The loop is unrolled twice, so if there is only 1 number
+    ; then go straight to the cleanup code.
+	;
+	CMPIB,= 1,num,bn_mul_add_words_single_top
+	FLDD    -72(%sp),fw     ; load up w into fp register fw (fw_h/fw_l)
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
+    ; two 32-bit mutiplies can be issued per cycle.
+    ; 
+bn_mul_add_words_unroll2
+
+    FLDD    0(a_ptr),t_float_0       ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    FLDD    8(a_ptr),t_float_1       ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    LDD     0(r_ptr),rp_val          ; rp[0]
+    LDD     8(r_ptr),rp_val_1        ; rp[1]
+
+    XMPYU   fht_0,fw_l,fm1           ; m1[0] = fht_0*fw_l
+    XMPYU   fht_1,fw_l,fm1_1         ; m1[1] = fht_1*fw_l
+    FSTD    fm1,-16(%sp)             ; -16(sp) = m1[0]
+    FSTD    fm1_1,-48(%sp)           ; -48(sp) = m1[1]
+
+    XMPYU   flt_0,fw_h,fm            ; m[0] = flt_0*fw_h
+    XMPYU   flt_1,fw_h,fm_1          ; m[1] = flt_1*fw_h
+    FSTD    fm,-8(%sp)               ; -8(sp) = m[0]
+    FSTD    fm_1,-40(%sp)            ; -40(sp) = m[1]
+
+    XMPYU   fht_0,fw_h,ht_temp       ; ht_temp   = fht_0*fw_h
+    XMPYU   fht_1,fw_h,ht_temp_1     ; ht_temp_1 = fht_1*fw_h
+    FSTD    ht_temp,-24(%sp)         ; -24(sp)   = ht_temp
+    FSTD    ht_temp_1,-56(%sp)       ; -56(sp)   = ht_temp_1
+
+    XMPYU   flt_0,fw_l,lt_temp       ; lt_temp = lt*fw_l
+    XMPYU   flt_1,fw_l,lt_temp_1     ; lt_temp = lt*fw_l
+    FSTD    lt_temp,-32(%sp)         ; -32(sp) = lt_temp 
+    FSTD    lt_temp_1,-64(%sp)       ; -64(sp) = lt_temp_1 
+
+    LDD     -8(%sp),m_0              ; m[0] 
+    LDD     -40(%sp),m_1             ; m[1]
+    LDD     -16(%sp),m1_0            ; m1[0]
+    LDD     -48(%sp),m1_1            ; m1[1]
+
+    LDD     -24(%sp),ht_0            ; ht[0]
+    LDD     -56(%sp),ht_1            ; ht[1]
+    ADD,L   m1_0,m_0,tmp_0           ; tmp_0 = m[0] + m1[0]; 
+    ADD,L   m1_1,m_1,tmp_1           ; tmp_1 = m[1] + m1[1]; 
+
+    LDD     -32(%sp),lt_0            
+    LDD     -64(%sp),lt_1            
+    CMPCLR,*>>= tmp_0,m1_0, %r0      ; if (m[0] < m1[0])
+    ADD,L   ht_0,top_overflow,ht_0   ; ht[0] += (1<<32)
+
+    CMPCLR,*>>= tmp_1,m1_1,%r0       ; if (m[1] < m1[1])
+    ADD,L   ht_1,top_overflow,ht_1   ; ht[1] += (1<<32)
+    EXTRD,U tmp_0,31,32,m_0          ; m[0]>>32  
+    DEPD,Z  tmp_0,31,32,m1_0         ; m1[0] = m[0]<<32 
+
+    EXTRD,U tmp_1,31,32,m_1          ; m[1]>>32  
+    DEPD,Z  tmp_1,31,32,m1_1         ; m1[1] = m[1]<<32 
+    ADD,L   ht_0,m_0,ht_0            ; ht[0]+= (m[0]>>32)
+    ADD,L   ht_1,m_1,ht_1            ; ht[1]+= (m[1]>>32)
+
+    ADD     lt_0,m1_0,lt_0           ; lt[0] = lt[0]+m1[0];
+	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
+    ADD     lt_1,m1_1,lt_1           ; lt[1] = lt[1]+m1[1];
+    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++
+
+    ADD    %ret0,lt_0,lt_0           ; lt[0] = lt[0] + c;
+	ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
+    ADD     lt_0,rp_val,lt_0         ; lt[0] = lt[0]+rp[0]
+    ADD,DC  ht_0,%r0,ht_0            ; ht[0]++
+
+	LDO    -2(num),num               ; num = num - 2;
+    ADD     ht_0,lt_1,lt_1           ; lt[1] = lt[1] + ht_0 (c);
+    ADD,DC  ht_1,%r0,ht_1            ; ht[1]++
+    STD     lt_0,0(r_ptr)            ; rp[0] = lt[0]
+
+    ADD     lt_1,rp_val_1,lt_1       ; lt[1] = lt[1]+rp[1]
+    ADD,DC  ht_1,%r0,%ret0           ; ht[1]++
+    LDO     16(a_ptr),a_ptr          ; a_ptr += 2
+
+    STD     lt_1,8(r_ptr)            ; rp[1] = lt[1]
+	CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do
+    LDO     16(r_ptr),r_ptr          ; r_ptr += 2
+
+    CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one
+
+	;
+	; Top of loop aligned on 64-byte boundary
+	;
+bn_mul_add_words_single_top
+    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    LDD     0(r_ptr),rp_val           ; rp[0]
+    LDO     8(a_ptr),a_ptr            ; a_ptr++
+    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l
+    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
+    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
+    FSTD    fm,-8(%sp)                ; -8(sp) = m
+    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h
+    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
+    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
+    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
+
+    LDD     -8(%sp),m_0               
+    LDD    -16(%sp),m1_0              ; m1 = temp1 
+    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1; 
+    LDD     -24(%sp),ht_0             
+    LDD     -32(%sp),lt_0             
+
+    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1)
+    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
+
+    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
+    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
+
+    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
+    ADD     lt_0,m1_0,tmp_0           ; tmp_0 = lt+m1;
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+    ADD     %ret0,tmp_0,lt_0          ; lt = lt + c;
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+    ADD     lt_0,rp_val,lt_0          ; lt = lt+rp[0]
+    ADD,DC  ht_0,%r0,%ret0            ; ht++
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt
+
+bn_mul_add_words_exit
+    .EXIT
+    LDD     -80(%sp),%r9              ; restore r9  
+    LDD     -88(%sp),%r8              ; restore r8  
+    LDD     -96(%sp),%r7              ; restore r7  
+    LDD     -104(%sp),%r6             ; restore r6  
+    LDD     -112(%sp),%r5             ; restore r5  
+    LDD     -120(%sp),%r4             ; restore r4  
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3             ; restore r3
+	.PROCEND	;in=23,24,25,26,29;out=28;
+
+;----------------------------------------------------------------------------
+;
+;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
+;
+; arg0 = rp
+; arg1 = ap
+; arg2 = num
+; arg3 = w
+
+bn_mul_words
+	.proc
+	.callinfo frame=128
+    .entry
+	.EXPORT	bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3  
+    STD     %r4,8(%sp)          ; save r4  
+    STD     %r5,16(%sp)         ; save r5  
+    STD     %r6,24(%sp)         ; save r6  
+
+    STD     %r7,32(%sp)         ; save r7  
+    COPY    %r0,%ret0           ; return 0 by default
+    DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32    
+	STD     w,56(%sp)           ; w on stack
+
+    CMPIB,>= 0,num,bn_mul_words_exit
+	LDO     128(%sp),%sp       ; bump stack
+
+	;
+	; See if only 1 word to do, thus just do cleanup
+	;
+	CMPIB,= 1,num,bn_mul_words_single_top
+	FLDD    -72(%sp),fw     ; load up w into fp register fw (fw_h/fw_l)
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+	; PA-RISC 2.0 chips have two fully pipelined multipliers, thus
+    ; two 32-bit mutiplies can be issued per cycle.
+    ; 
+bn_mul_words_unroll2
+
+    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    FLDD    8(a_ptr),t_float_1        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+    XMPYU   fht_0,fw_l,fm1            ; m1[0] = fht_0*fw_l
+    XMPYU   fht_1,fw_l,fm1_1          ; m1[1] = ht*fw_l
+
+    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
+    FSTD    fm1_1,-48(%sp)            ; -48(sp) = m1
+    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
+    XMPYU   flt_1,fw_h,fm_1           ; m = lt*fw_h
+
+    FSTD    fm,-8(%sp)                ; -8(sp) = m
+    FSTD    fm_1,-40(%sp)             ; -40(sp) = m
+    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = fht_0*fw_h
+    XMPYU   fht_1,fw_h,ht_temp_1      ; ht_temp = ht*fw_h
+
+    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
+    FSTD    ht_temp_1,-56(%sp)        ; -56(sp) = ht
+    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
+    XMPYU   flt_1,fw_l,lt_temp_1      ; lt_temp = lt*fw_l
+
+    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
+    FSTD    lt_temp_1,-64(%sp)        ; -64(sp) = lt 
+    LDD     -8(%sp),m_0               
+    LDD     -40(%sp),m_1              
+
+    LDD    -16(%sp),m1_0              
+    LDD    -48(%sp),m1_1              
+    LDD     -24(%sp),ht_0             
+    LDD     -56(%sp),ht_1             
+
+    ADD,L   m1_0,m_0,tmp_0            ; tmp_0 = m + m1; 
+    ADD,L   m1_1,m_1,tmp_1            ; tmp_1 = m + m1; 
+    LDD     -32(%sp),lt_0             
+    LDD     -64(%sp),lt_1             
+
+    CMPCLR,*>>= tmp_0,m1_0, %r0       ; if (m < m1)
+    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
+    CMPCLR,*>>= tmp_1,m1_1,%r0        ; if (m < m1)
+    ADD,L   ht_1,top_overflow,ht_1    ; ht += (1<<32)
+
+    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
+    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
+    EXTRD,U tmp_1,31,32,m_1           ; m>>32  
+    DEPD,Z  tmp_1,31,32,m1_1          ; m1 = m<<32 
+
+    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
+    ADD,L   ht_1,m_1,ht_1             ; ht+= (m>>32)
+    ADD     lt_0,m1_0,lt_0            ; lt = lt+m1;
+	ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    ADD     lt_1,m1_1,lt_1            ; lt = lt+m1;
+    ADD,DC  ht_1,%r0,ht_1             ; ht++
+    ADD    %ret0,lt_0,lt_0            ; lt = lt + c (ret0);
+	ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    ADD     ht_0,lt_1,lt_1            ; lt = lt + c (ht_0)
+    ADD,DC  ht_1,%r0,ht_1             ; ht++
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt
+    STD     lt_1,8(r_ptr)             ; rp[1] = lt
+
+	COPY    ht_1,%ret0                ; carry = ht
+	LDO    -2(num),num                ; num = num - 2;
+    LDO     16(a_ptr),a_ptr           ; ap += 2
+	CMPIB,<= 2,num,bn_mul_words_unroll2
+    LDO     16(r_ptr),r_ptr           ; rp++
+
+    CMPIB,=,N 0,num,bn_mul_words_exit ; are we done?
+
+	;
+	; Top of loop aligned on 64-byte boundary
+	;
+bn_mul_words_single_top
+    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+
+    XMPYU   fht_0,fw_l,fm1            ; m1 = ht*fw_l
+    FSTD    fm1,-16(%sp)              ; -16(sp) = m1
+    XMPYU   flt_0,fw_h,fm             ; m = lt*fw_h
+    FSTD    fm,-8(%sp)                ; -8(sp) = m
+    XMPYU   fht_0,fw_h,ht_temp        ; ht_temp = ht*fw_h
+    FSTD    ht_temp,-24(%sp)          ; -24(sp) = ht
+    XMPYU   flt_0,fw_l,lt_temp        ; lt_temp = lt*fw_l
+    FSTD    lt_temp,-32(%sp)          ; -32(sp) = lt 
+
+    LDD     -8(%sp),m_0               
+    LDD    -16(%sp),m1_0              
+    ADD,L   m_0,m1_0,tmp_0            ; tmp_0 = m + m1; 
+    LDD     -24(%sp),ht_0             
+    LDD     -32(%sp),lt_0             
+
+    CMPCLR,*>>= tmp_0,m1_0,%r0        ; if (m < m1)
+    ADD,L   ht_0,top_overflow,ht_0    ; ht += (1<<32)
+
+    EXTRD,U tmp_0,31,32,m_0           ; m>>32  
+    DEPD,Z  tmp_0,31,32,m1_0          ; m1 = m<<32 
+
+    ADD,L   ht_0,m_0,ht_0             ; ht+= (m>>32)
+    ADD     lt_0,m1_0,lt_0            ; lt= lt+m1;
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    ADD     %ret0,lt_0,lt_0           ; lt = lt + c;
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    COPY    ht_0,%ret0                ; copy carry
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt
+
+bn_mul_words_exit
+    .EXIT
+    LDD     -96(%sp),%r7              ; restore r7  
+    LDD     -104(%sp),%r6             ; restore r6  
+    LDD     -112(%sp),%r5             ; restore r5  
+    LDD     -120(%sp),%r4             ; restore r4  
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3             ; restore r3
+	.PROCEND	;in=23,24,25,26,29;out=28;
+
+;----------------------------------------------------------------------------
+;
+;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num)
+;
+; arg0 = rp
+; arg1 = ap
+; arg2 = num
+;
+
+bn_sqr_words
+	.proc
+	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3  
+    STD     %r4,8(%sp)          ; save r4  
+	NOP
+    STD     %r5,16(%sp)         ; save r5  
+
+    CMPIB,>= 0,num,bn_sqr_words_exit
+	LDO     128(%sp),%sp       ; bump stack
+
+	;
+	; If only 1, the goto straight to cleanup
+	;
+	CMPIB,= 1,num,bn_sqr_words_single_top
+    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+
+bn_sqr_words_unroll2
+    FLDD    0(a_ptr),t_float_0        ; a[0]
+    FLDD    8(a_ptr),t_float_1        ; a[1]
+    XMPYU   fht_0,flt_0,fm            ; m[0]
+    XMPYU   fht_1,flt_1,fm_1          ; m[1]
+
+    FSTD    fm,-24(%sp)               ; store m[0]
+    FSTD    fm_1,-56(%sp)             ; store m[1]
+    XMPYU   flt_0,flt_0,lt_temp       ; lt[0]
+    XMPYU   flt_1,flt_1,lt_temp_1     ; lt[1]
+
+    FSTD    lt_temp,-16(%sp)          ; store lt[0]
+    FSTD    lt_temp_1,-48(%sp)        ; store lt[1]
+    XMPYU   fht_0,fht_0,ht_temp       ; ht[0]
+    XMPYU   fht_1,fht_1,ht_temp_1     ; ht[1]
+
+    FSTD    ht_temp,-8(%sp)           ; store ht[0]
+    FSTD    ht_temp_1,-40(%sp)        ; store ht[1]
+    LDD     -24(%sp),m_0             
+    LDD     -56(%sp),m_1              
+
+    AND     m_0,high_mask,tmp_0       ; m[0] & Mask
+    AND     m_1,high_mask,tmp_1       ; m[1] & Mask
+    DEPD,Z  m_0,30,31,m_0             ; m[0] << 32+1
+    DEPD,Z  m_1,30,31,m_1             ; m[1] << 32+1
+
+    LDD     -16(%sp),lt_0        
+    LDD     -48(%sp),lt_1        
+    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m[0]&Mask >> 32-1
+    EXTRD,U tmp_1,32,33,tmp_1         ; tmp_1 = m[1]&Mask >> 32-1
+
+    LDD     -8(%sp),ht_0            
+    LDD     -40(%sp),ht_1           
+    ADD,L   ht_0,tmp_0,ht_0           ; ht[0] += tmp_0
+    ADD,L   ht_1,tmp_1,ht_1           ; ht[1] += tmp_1
+
+    ADD     lt_0,m_0,lt_0             ; lt = lt+m
+    ADD,DC  ht_0,%r0,ht_0             ; ht[0]++
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt[0]
+    STD     ht_0,8(r_ptr)             ; rp[1] = ht[1]
+
+    ADD     lt_1,m_1,lt_1             ; lt = lt+m
+    ADD,DC  ht_1,%r0,ht_1             ; ht[1]++
+    STD     lt_1,16(r_ptr)            ; rp[2] = lt[1]
+    STD     ht_1,24(r_ptr)            ; rp[3] = ht[1]
+
+	LDO    -2(num),num                ; num = num - 2;
+    LDO     16(a_ptr),a_ptr           ; ap += 2
+	CMPIB,<= 2,num,bn_sqr_words_unroll2
+    LDO     32(r_ptr),r_ptr           ; rp += 4
+
+    CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done?
+
+	;
+	; Top of loop aligned on 64-byte boundary
+	;
+bn_sqr_words_single_top
+    FLDD    0(a_ptr),t_float_0        ; load up 64-bit value (fr8L) ht(L)/lt(R)
+
+    XMPYU   fht_0,flt_0,fm            ; m
+    FSTD    fm,-24(%sp)               ; store m
+
+    XMPYU   flt_0,flt_0,lt_temp       ; lt
+    FSTD    lt_temp,-16(%sp)          ; store lt
+
+    XMPYU   fht_0,fht_0,ht_temp       ; ht
+    FSTD    ht_temp,-8(%sp)           ; store ht
+
+    LDD     -24(%sp),m_0              ; load m
+    AND     m_0,high_mask,tmp_0       ; m & Mask
+    DEPD,Z  m_0,30,31,m_0             ; m << 32+1
+    LDD     -16(%sp),lt_0             ; lt
+
+    LDD     -8(%sp),ht_0              ; ht
+    EXTRD,U tmp_0,32,33,tmp_0         ; tmp_0 = m&Mask >> 32-1
+    ADD     m_0,lt_0,lt_0             ; lt = lt+m
+    ADD,L   ht_0,tmp_0,ht_0           ; ht += tmp_0
+    ADD,DC  ht_0,%r0,ht_0             ; ht++
+
+    STD     lt_0,0(r_ptr)             ; rp[0] = lt
+    STD     ht_0,8(r_ptr)             ; rp[1] = ht
+
+bn_sqr_words_exit
+    .EXIT
+    LDD     -112(%sp),%r5       ; restore r5  
+    LDD     -120(%sp),%r4       ; restore r4  
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3 
+	.PROCEND	;in=23,24,25,26,29;out=28;
+
+
+;----------------------------------------------------------------------------
+;
+;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+;
+; arg0 = rp 
+; arg1 = ap
+; arg2 = bp 
+; arg3 = n
+
+t  .reg %r22
+b  .reg %r21
+l  .reg %r20
+
+bn_add_words
+	.proc
+    .entry
+	.callinfo
+	.EXPORT	bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+	.align 64
+
+    CMPIB,>= 0,n,bn_add_words_exit
+    COPY    %r0,%ret0           ; return 0 by default
+
+	;
+	; If 2 or more numbers do the loop
+	;
+	CMPIB,= 1,n,bn_add_words_single_top
+	NOP
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+bn_add_words_unroll2
+	LDD     0(a_ptr),t
+	LDD     0(b_ptr),b
+	ADD     t,%ret0,t                    ; t = t+c;
+	ADD,DC  %r0,%r0,%ret0                ; set c to carry
+	ADD     t,b,l                        ; l = t + b[0]
+	ADD,DC  %ret0,%r0,%ret0              ; c+= carry
+	STD     l,0(r_ptr)
+
+	LDD     8(a_ptr),t
+	LDD     8(b_ptr),b
+	ADD     t,%ret0,t                     ; t = t+c;
+	ADD,DC  %r0,%r0,%ret0                 ; set c to carry
+	ADD     t,b,l                         ; l = t + b[0]
+	ADD,DC  %ret0,%r0,%ret0               ; c+= carry
+	STD     l,8(r_ptr)
+
+	LDO     -2(n),n
+	LDO     16(a_ptr),a_ptr
+	LDO     16(b_ptr),b_ptr
+
+	CMPIB,<= 2,n,bn_add_words_unroll2
+	LDO     16(r_ptr),r_ptr
+
+    CMPIB,=,N 0,n,bn_add_words_exit ; are we done?
+
+bn_add_words_single_top
+	LDD     0(a_ptr),t
+	LDD     0(b_ptr),b
+
+	ADD     t,%ret0,t                 ; t = t+c;
+	ADD,DC  %r0,%r0,%ret0             ; set c to carry (could use CMPCLR??)
+	ADD     t,b,l                     ; l = t + b[0]
+	ADD,DC  %ret0,%r0,%ret0           ; c+= carry
+	STD     l,0(r_ptr)
+
+bn_add_words_exit
+    .EXIT
+    BVE     (%rp)
+	NOP
+	.PROCEND	;in=23,24,25,26,29;out=28;
+
+;----------------------------------------------------------------------------
+;
+;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
+;
+; arg0 = rp 
+; arg1 = ap
+; arg2 = bp 
+; arg3 = n
+
+t1       .reg %r22
+t2       .reg %r21
+sub_tmp1 .reg %r20
+sub_tmp2 .reg %r19
+
+
+bn_sub_words
+	.proc
+	.callinfo 
+	.EXPORT	bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+
+    CMPIB,>=  0,n,bn_sub_words_exit
+    COPY    %r0,%ret0           ; return 0 by default
+
+	;
+	; If 2 or more numbers do the loop
+	;
+	CMPIB,= 1,n,bn_sub_words_single_top
+	NOP
+
+	;
+	; This loop is unrolled 2 times (64-byte aligned as well)
+	;
+bn_sub_words_unroll2
+	LDD     0(a_ptr),t1
+	LDD     0(b_ptr),t2
+	SUB     t1,t2,sub_tmp1           ; t3 = t1-t2; 
+	SUB     sub_tmp1,%ret0,sub_tmp1  ; t3 = t3- c; 
+
+	CMPCLR,*>> t1,t2,sub_tmp2        ; clear if t1 > t2
+	LDO      1(%r0),sub_tmp2
+	
+	CMPCLR,*= t1,t2,%r0
+	COPY    sub_tmp2,%ret0
+	STD     sub_tmp1,0(r_ptr)
+
+	LDD     8(a_ptr),t1
+	LDD     8(b_ptr),t2
+	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2; 
+	SUB     sub_tmp1,%ret0,sub_tmp1   ; t3 = t3- c; 
+	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2
+	LDO      1(%r0),sub_tmp2
+	
+	CMPCLR,*= t1,t2,%r0
+	COPY    sub_tmp2,%ret0
+	STD     sub_tmp1,8(r_ptr)
+
+	LDO     -2(n),n
+	LDO     16(a_ptr),a_ptr
+	LDO     16(b_ptr),b_ptr
+
+	CMPIB,<= 2,n,bn_sub_words_unroll2
+	LDO     16(r_ptr),r_ptr
+
+    CMPIB,=,N 0,n,bn_sub_words_exit ; are we done?
+
+bn_sub_words_single_top
+	LDD     0(a_ptr),t1
+	LDD     0(b_ptr),t2
+	SUB     t1,t2,sub_tmp1            ; t3 = t1-t2; 
+	SUB     sub_tmp1,%ret0,sub_tmp1   ; t3 = t3- c; 
+	CMPCLR,*>> t1,t2,sub_tmp2         ; clear if t1 > t2
+	LDO      1(%r0),sub_tmp2
+	
+	CMPCLR,*= t1,t2,%r0
+	COPY    sub_tmp2,%ret0
+
+	STD     sub_tmp1,0(r_ptr)
+
+bn_sub_words_exit
+    .EXIT
+    BVE     (%rp)
+	NOP
+	.PROCEND	;in=23,24,25,26,29;out=28;
+
+;------------------------------------------------------------------------------
+;
+; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d)
+;
+; arg0 = h
+; arg1 = l
+; arg2 = d
+;
+; This is mainly just modified assembly from the compiler, thus the
+; lack of variable names.
+;
+;------------------------------------------------------------------------------
+bn_div_words
+	.proc
+	.callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+	.IMPORT	BN_num_bits_word,CODE,NO_RELOCATION
+	.IMPORT	__iob,DATA
+	.IMPORT	fprintf,CODE,NO_RELOCATION
+	.IMPORT	abort,CODE,NO_RELOCATION
+	.IMPORT	$$div2U,MILLICODE
+    .entry
+    STD     %r2,-16(%r30)   
+    STD,MA  %r3,352(%r30)   
+    STD     %r4,-344(%r30)  
+    STD     %r5,-336(%r30)  
+    STD     %r6,-328(%r30)  
+    STD     %r7,-320(%r30)  
+    STD     %r8,-312(%r30)  
+    STD     %r9,-304(%r30)  
+    STD     %r10,-296(%r30)
+
+    STD     %r27,-288(%r30)             ; save gp
+
+    COPY    %r24,%r3           ; save d 
+    COPY    %r26,%r4           ; save h (high 64-bits)
+    LDO      -1(%r0),%ret0     ; return -1 by default	
+
+    CMPB,*=  %r0,%arg2,$D3     ; if (d == 0)
+    COPY    %r25,%r5           ; save l (low 64-bits)
+
+    LDO     -48(%r30),%r29     ; create ap 
+    .CALL   ;in=26,29;out=28;
+    B,L     BN_num_bits_word,%r2 
+    COPY    %r3,%r26        
+    LDD     -288(%r30),%r27    ; restore gp 
+    LDI     64,%r21 
+
+    CMPB,=  %r21,%ret0,$00000012   ;if (i == 64) (forward) 
+    COPY    %ret0,%r24             ; i   
+    MTSARCM %r24    
+    DEPDI,Z -1,%sar,1,%r29  
+    CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<<i) (forward) 
+
+$00000012
+    SUBI    64,%r24,%r31                       ; i = 64 - i;
+    CMPCLR,*<< %r4,%r3,%r0                     ; if (h >= d)
+    SUB     %r4,%r3,%r4                        ; h -= d
+    CMPB,=  %r31,%r0,$0000001A                 ; if (i)
+    COPY    %r0,%r10                           ; ret = 0
+    MTSARCM %r31                               ; i to shift
+    DEPD,Z  %r3,%sar,64,%r3                    ; d <<= i;
+    SUBI    64,%r31,%r19                       ; 64 - i; redundent
+    MTSAR   %r19                               ; (64 -i) to shift
+    SHRPD   %r4,%r5,%sar,%r4                   ; l>> (64-i)
+    MTSARCM %r31                               ; i to shift
+    DEPD,Z  %r5,%sar,64,%r5                    ; l <<= i;
+
+$0000001A
+    DEPDI,Z -1,31,32,%r19                      
+    EXTRD,U %r3,31,32,%r6                      ; dh=(d&0xfff)>>32
+    EXTRD,U %r3,63,32,%r8                      ; dl = d&0xffffff
+    LDO     2(%r0),%r9
+    STD    %r3,-280(%r30)                      ; "d" to stack
+
+$0000001C
+    DEPDI,Z -1,63,32,%r29                      ; 
+    EXTRD,U %r4,31,32,%r31                     ; h >> 32
+    CMPB,*=,N  %r31,%r6,$D2     	       ; if ((h>>32) != dh)(forward) div
+    COPY    %r4,%r26       
+    EXTRD,U %r4,31,32,%r25 
+    COPY    %r6,%r24      
+    .CALL   ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL)
+    B,L     $$div2U,%r2     
+    EXTRD,U %r6,31,32,%r23  
+    DEPD    %r28,31,32,%r29 
+$D2
+    STD     %r29,-272(%r30)                   ; q
+    AND     %r5,%r19,%r24                   ; t & 0xffffffff00000000;
+    EXTRD,U %r24,31,32,%r24                 ; ??? 
+    FLDD    -272(%r30),%fr7                 ; q
+    FLDD    -280(%r30),%fr8                 ; d
+    XMPYU   %fr8L,%fr7L,%fr10  
+    FSTD    %fr10,-256(%r30)   
+    XMPYU   %fr8L,%fr7R,%fr22  
+    FSTD    %fr22,-264(%r30)   
+    XMPYU   %fr8R,%fr7L,%fr11 
+    XMPYU   %fr8R,%fr7R,%fr23
+    FSTD    %fr11,-232(%r30)
+    FSTD    %fr23,-240(%r30)
+    LDD     -256(%r30),%r28
+    DEPD,Z  %r28,31,32,%r2 
+    LDD     -264(%r30),%r20
+    ADD,L   %r20,%r2,%r31   
+    LDD     -232(%r30),%r22 
+    DEPD,Z  %r22,31,32,%r22 
+    LDD     -240(%r30),%r21 
+    B       $00000024       ; enter loop  
+    ADD,L   %r21,%r22,%r23 
+
+$0000002A
+    LDO     -1(%r29),%r29   
+    SUB     %r23,%r8,%r23   
+$00000024
+    SUB     %r4,%r31,%r25   
+    AND     %r25,%r19,%r26  
+    CMPB,*<>,N      %r0,%r26,$00000046  ; (forward)
+    DEPD,Z  %r25,31,32,%r20 
+    OR      %r20,%r24,%r21  
+    CMPB,*<<,N  %r21,%r23,$0000002A ;(backward) 
+    SUB     %r31,%r6,%r31   
+;-------------Break path---------------------
+
+$00000046
+    DEPD,Z  %r23,31,32,%r25              ;tl
+    EXTRD,U %r23,31,32,%r26              ;t
+    AND     %r25,%r19,%r24               ;tl = (tl<<32)&0xfffffff0000000L
+    ADD,L   %r31,%r26,%r31               ;th += t; 
+    CMPCLR,*>>=     %r5,%r24,%r0         ;if (l<tl)
+    LDO     1(%r31),%r31                 ; th++;
+    CMPB,*<<=,N     %r31,%r4,$00000036   ;if (n < th) (forward)
+    LDO     -1(%r29),%r29                ;q--; 
+    ADD,L   %r4,%r3,%r4                  ;h += d;
+$00000036
+    ADDIB,=,N       -1,%r9,$D1 ;if (--count == 0) break (forward) 
+    SUB     %r5,%r24,%r28                ; l -= tl;
+    SUB     %r4,%r31,%r24                ; h -= th;
+    SHRPD   %r24,%r28,32,%r4             ; h = ((h<<32)|(l>>32));
+    DEPD,Z  %r29,31,32,%r10              ; ret = q<<32
+    b      $0000001C
+    DEPD,Z  %r28,31,32,%r5               ; l = l << 32 
+
+$D1
+    OR      %r10,%r29,%r28           ; ret |= q
+$D3
+    LDD     -368(%r30),%r2  
+$D0
+    LDD     -296(%r30),%r10 
+    LDD     -304(%r30),%r9  
+    LDD     -312(%r30),%r8  
+    LDD     -320(%r30),%r7  
+    LDD     -328(%r30),%r6  
+    LDD     -336(%r30),%r5  
+    LDD     -344(%r30),%r4  
+    BVE     (%r2)   
+        .EXIT
+    LDD,MB  -352(%r30),%r3 
+
+bn_div_err_case
+    MFIA    %r6     
+    ADDIL   L'bn_div_words-bn_div_err_case,%r6,%r1 
+    LDO     R'bn_div_words-bn_div_err_case(%r1),%r6  
+    ADDIL   LT'__iob,%r27,%r1       
+    LDD     RT'__iob(%r1),%r26      
+    ADDIL   L'C$4-bn_div_words,%r6,%r1    
+    LDO     R'C$4-bn_div_words(%r1),%r25  
+    LDO     64(%r26),%r26   
+    .CALL           ;in=24,25,26,29;out=28;
+    B,L     fprintf,%r2    
+    LDO     -48(%r30),%r29 
+    LDD     -288(%r30),%r27
+    .CALL           ;in=29;
+    B,L     abort,%r2      
+    LDO     -48(%r30),%r29 
+    LDD     -288(%r30),%r27
+    B       $D0         
+    LDD     -368(%r30),%r2  
+	.PROCEND	;in=24,25,26,29;out=28;
+
+;----------------------------------------------------------------------------
+;
+; Registers to hold 64-bit values to manipulate.  The "L" part
+; of the register corresponds to the upper 32-bits, while the "R"
+; part corresponds to the lower 32-bits
+; 
+; Note, that when using b6 and b7, the code must save these before
+; using them because they are callee save registers 
+; 
+;
+; Floating point registers to use to save values that
+; are manipulated.  These don't collide with ftemp1-6 and
+; are all caller save registers
+;
+a0        .reg %fr22
+a0L       .reg %fr22L
+a0R       .reg %fr22R
+
+a1        .reg %fr23
+a1L       .reg %fr23L
+a1R       .reg %fr23R
+
+a2        .reg %fr24
+a2L       .reg %fr24L
+a2R       .reg %fr24R
+
+a3        .reg %fr25
+a3L       .reg %fr25L
+a3R       .reg %fr25R
+
+a4        .reg %fr26
+a4L       .reg %fr26L
+a4R       .reg %fr26R
+
+a5        .reg %fr27
+a5L       .reg %fr27L
+a5R       .reg %fr27R
+
+a6        .reg %fr28
+a6L       .reg %fr28L
+a6R       .reg %fr28R
+
+a7        .reg %fr29
+a7L       .reg %fr29L
+a7R       .reg %fr29R
+
+b0        .reg %fr30
+b0L       .reg %fr30L
+b0R       .reg %fr30R
+
+b1        .reg %fr31
+b1L       .reg %fr31L
+b1R       .reg %fr31R
+
+;
+; Temporary floating point variables, these are all caller save
+; registers
+;
+ftemp1    .reg %fr4
+ftemp2    .reg %fr5
+ftemp3    .reg %fr6
+ftemp4    .reg %fr7
+
+;
+; The B set of registers when used.
+;
+
+b2        .reg %fr8
+b2L       .reg %fr8L
+b2R       .reg %fr8R
+
+b3        .reg %fr9
+b3L       .reg %fr9L
+b3R       .reg %fr9R
+
+b4        .reg %fr10
+b4L       .reg %fr10L
+b4R       .reg %fr10R
+
+b5        .reg %fr11
+b5L       .reg %fr11L
+b5R       .reg %fr11R
+
+b6        .reg %fr12
+b6L       .reg %fr12L
+b6R       .reg %fr12R
+
+b7        .reg %fr13
+b7L       .reg %fr13L
+b7R       .reg %fr13R
+
+c1           .reg %r21   ; only reg
+temp1        .reg %r20   ; only reg
+temp2        .reg %r19   ; only reg
+temp3        .reg %r31   ; only reg
+
+m1           .reg %r28   
+c2           .reg %r23   
+high_one     .reg %r1
+ht           .reg %r6
+lt           .reg %r5
+m            .reg %r4
+c3           .reg %r3
+
+SQR_ADD_C  .macro  A0L,A0R,C1,C2,C3
+    XMPYU   A0L,A0R,ftemp1       ; m
+    FSTD    ftemp1,-24(%sp)      ; store m
+
+    XMPYU   A0R,A0R,ftemp2       ; lt
+    FSTD    ftemp2,-16(%sp)      ; store lt
+
+    XMPYU   A0L,A0L,ftemp3       ; ht
+    FSTD    ftemp3,-8(%sp)       ; store ht
+
+    LDD     -24(%sp),m           ; load m
+    AND     m,high_mask,temp2    ; m & Mask
+    DEPD,Z  m,30,31,temp3        ; m << 32+1
+    LDD     -16(%sp),lt          ; lt
+
+    LDD     -8(%sp),ht           ; ht
+    EXTRD,U temp2,32,33,temp1    ; temp1 = m&Mask >> 32-1
+    ADD     temp3,lt,lt          ; lt = lt+m
+    ADD,L   ht,temp1,ht          ; ht += temp1
+    ADD,DC  ht,%r0,ht            ; ht++
+
+    ADD     C1,lt,C1             ; c1=c1+lt
+    ADD,DC  ht,%r0,ht            ; ht++
+
+    ADD     C2,ht,C2             ; c2=c2+ht
+    ADD,DC  C3,%r0,C3            ; c3++
+.endm
+
+SQR_ADD_C2 .macro  A0L,A0R,A1L,A1R,C1,C2,C3
+    XMPYU   A0L,A1R,ftemp1          ; m1 = bl*ht
+    FSTD    ftemp1,-16(%sp)         ;
+    XMPYU   A0R,A1L,ftemp2          ; m = bh*lt
+    FSTD    ftemp2,-8(%sp)          ;
+    XMPYU   A0R,A1R,ftemp3          ; lt = bl*lt
+    FSTD    ftemp3,-32(%sp)
+    XMPYU   A0L,A1L,ftemp4          ; ht = bh*ht
+    FSTD    ftemp4,-24(%sp)         ;
+
+    LDD     -8(%sp),m               ; r21 = m
+    LDD     -16(%sp),m1             ; r19 = m1
+    ADD,L   m,m1,m                  ; m+m1
+
+    DEPD,Z  m,31,32,temp3           ; (m+m1<<32)
+    LDD     -24(%sp),ht             ; r24 = ht
+
+    CMPCLR,*>>= m,m1,%r0            ; if (m < m1)
+    ADD,L   ht,high_one,ht          ; ht+=high_one
+
+    EXTRD,U m,31,32,temp1           ; m >> 32
+    LDD     -32(%sp),lt             ; lt
+    ADD,L   ht,temp1,ht             ; ht+= m>>32
+    ADD     lt,temp3,lt             ; lt = lt+m1
+    ADD,DC  ht,%r0,ht               ; ht++
+
+    ADD     ht,ht,ht                ; ht=ht+ht;
+    ADD,DC  C3,%r0,C3               ; add in carry (c3++)
+
+    ADD     lt,lt,lt                ; lt=lt+lt;
+    ADD,DC  ht,%r0,ht               ; add in carry (ht++)
+
+    ADD     C1,lt,C1                ; c1=c1+lt
+    ADD,DC,*NUV ht,%r0,ht           ; add in carry (ht++)
+    LDO     1(C3),C3              ; bump c3 if overflow,nullify otherwise
+
+    ADD     C2,ht,C2                ; c2 = c2 + ht
+    ADD,DC  C3,%r0,C3             ; add in carry (c3++)
+.endm
+
+;
+;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
+; arg0 = r_ptr
+; arg1 = a_ptr
+;
+
+bn_sqr_comba8
+	.PROC
+	.CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .ENTRY
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3
+    STD     %r4,8(%sp)          ; save r4
+    STD     %r5,16(%sp)         ; save r5
+    STD     %r6,24(%sp)         ; save r6
+
+	;
+	; Zero out carries
+	;
+	COPY     %r0,c1
+	COPY     %r0,c2
+	COPY     %r0,c3
+
+	LDO      128(%sp),%sp       ; bump stack
+    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
+    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
+
+	;
+	; Load up all of the values we are going to use
+	;
+    FLDD     0(a_ptr),a0       
+    FLDD     8(a_ptr),a1       
+    FLDD    16(a_ptr),a2       
+    FLDD    24(a_ptr),a3       
+    FLDD    32(a_ptr),a4       
+    FLDD    40(a_ptr),a5       
+    FLDD    48(a_ptr),a6       
+    FLDD    56(a_ptr),a7       
+
+	SQR_ADD_C a0L,a0R,c1,c2,c3
+	STD     c1,0(r_ptr)          ; r[0] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
+	STD     c2,8(r_ptr)          ; r[1] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C a1L,a1R,c3,c1,c2
+	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
+	STD     c3,16(r_ptr)            ; r[2] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
+	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
+	STD     c1,24(r_ptr)           ; r[3] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C a2L,a2R,c2,c3,c1
+	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
+	SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1
+	STD     c2,32(r_ptr)          ; r[4] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2
+	SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2
+	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
+	STD     c3,40(r_ptr)          ; r[5] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C a3L,a3R,c1,c2,c3
+	SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3
+	SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3
+	SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3
+	STD     c1,48(r_ptr)          ; r[6] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1
+	SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1
+	SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1
+	SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1
+	STD     c2,56(r_ptr)          ; r[7] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C a4L,a4R,c3,c1,c2
+	SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2
+	SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2
+	SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2
+	STD     c3,64(r_ptr)          ; r[8] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3
+	SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3
+	SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3
+	STD     c1,72(r_ptr)          ; r[9] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C a5L,a5R,c2,c3,c1
+	SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1
+	SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1
+	STD     c2,80(r_ptr)          ; r[10] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2
+	SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2
+	STD     c3,88(r_ptr)          ; r[11] = c3;
+	COPY    %r0,c3
+	
+	SQR_ADD_C a6L,a6R,c1,c2,c3
+	SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3
+	STD     c1,96(r_ptr)          ; r[12] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1
+	STD     c2,104(r_ptr)         ; r[13] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C a7L,a7R,c3,c1,c2
+	STD     c3, 112(r_ptr)       ; r[14] = c3
+	STD     c1, 120(r_ptr)       ; r[15] = c1
+
+    .EXIT
+    LDD     -104(%sp),%r6        ; restore r6
+    LDD     -112(%sp),%r5        ; restore r5
+    LDD     -120(%sp),%r4        ; restore r4
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3
+
+	.PROCEND	
+
+;-----------------------------------------------------------------------------
+;
+;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
+; arg0 = r_ptr
+; arg1 = a_ptr
+;
+
+bn_sqr_comba4
+	.proc
+	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+    STD     %r3,0(%sp)          ; save r3
+    STD     %r4,8(%sp)          ; save r4
+    STD     %r5,16(%sp)         ; save r5
+    STD     %r6,24(%sp)         ; save r6
+
+	;
+	; Zero out carries
+	;
+	COPY     %r0,c1
+	COPY     %r0,c2
+	COPY     %r0,c3
+
+	LDO      128(%sp),%sp       ; bump stack
+    DEPDI,Z -1,32,33,high_mask   ; Create Mask 0xffffffff80000000L
+    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
+
+	;
+	; Load up all of the values we are going to use
+	;
+    FLDD     0(a_ptr),a0       
+    FLDD     8(a_ptr),a1       
+    FLDD    16(a_ptr),a2       
+    FLDD    24(a_ptr),a3       
+    FLDD    32(a_ptr),a4       
+    FLDD    40(a_ptr),a5       
+    FLDD    48(a_ptr),a6       
+    FLDD    56(a_ptr),a7       
+
+	SQR_ADD_C a0L,a0R,c1,c2,c3
+
+	STD     c1,0(r_ptr)          ; r[0] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1
+
+	STD     c2,8(r_ptr)          ; r[1] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C a1L,a1R,c3,c1,c2
+	SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2
+
+	STD     c3,16(r_ptr)            ; r[2] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3
+	SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3
+
+	STD     c1,24(r_ptr)           ; r[3] = c1;
+	COPY    %r0,c1
+
+	SQR_ADD_C a2L,a2R,c2,c3,c1
+	SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1
+
+	STD     c2,32(r_ptr)           ; r[4] = c2;
+	COPY    %r0,c2
+
+	SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2
+	STD     c3,40(r_ptr)           ; r[5] = c3;
+	COPY    %r0,c3
+
+	SQR_ADD_C a3L,a3R,c1,c2,c3
+	STD     c1,48(r_ptr)           ; r[6] = c1;
+	STD     c2,56(r_ptr)           ; r[7] = c2;
+
+    .EXIT
+    LDD     -104(%sp),%r6        ; restore r6
+    LDD     -112(%sp),%r5        ; restore r5
+    LDD     -120(%sp),%r4        ; restore r4
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3
+
+	.PROCEND	
+
+
+;---------------------------------------------------------------------------
+
+MUL_ADD_C  .macro  A0L,A0R,B0L,B0R,C1,C2,C3
+    XMPYU   A0L,B0R,ftemp1        ; m1 = bl*ht
+    FSTD    ftemp1,-16(%sp)       ;
+    XMPYU   A0R,B0L,ftemp2        ; m = bh*lt
+    FSTD    ftemp2,-8(%sp)        ;
+    XMPYU   A0R,B0R,ftemp3        ; lt = bl*lt
+    FSTD    ftemp3,-32(%sp)
+    XMPYU   A0L,B0L,ftemp4        ; ht = bh*ht
+    FSTD    ftemp4,-24(%sp)       ;
+
+    LDD     -8(%sp),m             ; r21 = m
+    LDD     -16(%sp),m1           ; r19 = m1
+    ADD,L   m,m1,m                ; m+m1
+
+    DEPD,Z  m,31,32,temp3         ; (m+m1<<32)
+    LDD     -24(%sp),ht           ; r24 = ht
+
+    CMPCLR,*>>= m,m1,%r0          ; if (m < m1)
+    ADD,L   ht,high_one,ht        ; ht+=high_one
+
+    EXTRD,U m,31,32,temp1         ; m >> 32
+    LDD     -32(%sp),lt           ; lt
+    ADD,L   ht,temp1,ht           ; ht+= m>>32
+    ADD     lt,temp3,lt           ; lt = lt+m1
+    ADD,DC  ht,%r0,ht             ; ht++
+
+    ADD     C1,lt,C1              ; c1=c1+lt
+    ADD,DC  ht,%r0,ht             ; bump c3 if overflow,nullify otherwise
+
+    ADD     C2,ht,C2              ; c2 = c2 + ht
+    ADD,DC  C3,%r0,C3             ; add in carry (c3++)
+.endm
+
+
+;
+;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+; arg0 = r_ptr
+; arg1 = a_ptr
+; arg2 = b_ptr
+;
+
+bn_mul_comba8
+	.proc
+	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3
+    STD     %r4,8(%sp)          ; save r4
+    STD     %r5,16(%sp)         ; save r5
+    STD     %r6,24(%sp)         ; save r6
+    FSTD    %fr12,32(%sp)       ; save r6
+    FSTD    %fr13,40(%sp)       ; save r7
+
+	;
+	; Zero out carries
+	;
+	COPY     %r0,c1
+	COPY     %r0,c2
+	COPY     %r0,c3
+
+	LDO      128(%sp),%sp       ; bump stack
+    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
+
+	;
+	; Load up all of the values we are going to use
+	;
+    FLDD      0(a_ptr),a0       
+    FLDD      8(a_ptr),a1       
+    FLDD     16(a_ptr),a2       
+    FLDD     24(a_ptr),a3       
+    FLDD     32(a_ptr),a4       
+    FLDD     40(a_ptr),a5       
+    FLDD     48(a_ptr),a6       
+    FLDD     56(a_ptr),a7       
+
+    FLDD      0(b_ptr),b0       
+    FLDD      8(b_ptr),b1       
+    FLDD     16(b_ptr),b2       
+    FLDD     24(b_ptr),b3       
+    FLDD     32(b_ptr),b4       
+    FLDD     40(b_ptr),b5       
+    FLDD     48(b_ptr),b6       
+    FLDD     56(b_ptr),b7       
+
+	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
+	STD       c1,0(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
+	STD       c2,8(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
+	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
+	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
+	STD       c3,16(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
+	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
+	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
+	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
+	STD       c1,24(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1
+	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
+	MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1
+	STD       c2,32(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2
+	MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2
+	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
+	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
+	MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2
+	MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2
+	STD       c3,40(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3
+	MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3
+	MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3
+	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
+	MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3
+	MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3
+	MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3
+	STD       c1,48(r_ptr)
+	COPY      %r0,c1
+	
+	MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1
+	MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1
+	MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1
+	MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1
+	MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1
+	MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1
+	STD       c2,56(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2
+	MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2
+	MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2
+	MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2
+	MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2
+	MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2
+	MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2
+	STD       c3,64(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3
+	MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3
+	MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3
+	MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3
+	MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3
+	MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3
+	STD       c1,72(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1
+	MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1
+	MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1
+	MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1
+	MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1
+	STD       c2,80(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2
+	MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2
+	MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2
+	MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2
+	STD       c3,88(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3
+	MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3
+	MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3
+	STD       c1,96(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1
+	MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1
+	STD       c2,104(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2
+	STD       c3,112(r_ptr)
+	STD       c1,120(r_ptr)
+
+    .EXIT
+    FLDD    -88(%sp),%fr13 
+    FLDD    -96(%sp),%fr12 
+    LDD     -104(%sp),%r6        ; restore r6
+    LDD     -112(%sp),%r5        ; restore r5
+    LDD     -120(%sp),%r4        ; restore r4
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3
+
+	.PROCEND	
+
+;-----------------------------------------------------------------------------
+;
+;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
+; arg0 = r_ptr
+; arg1 = a_ptr
+; arg2 = b_ptr
+;
+
+bn_mul_comba4
+	.proc
+	.callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE
+	.EXPORT	bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN
+    .entry
+	.align 64
+
+    STD     %r3,0(%sp)          ; save r3
+    STD     %r4,8(%sp)          ; save r4
+    STD     %r5,16(%sp)         ; save r5
+    STD     %r6,24(%sp)         ; save r6
+    FSTD    %fr12,32(%sp)       ; save r6
+    FSTD    %fr13,40(%sp)       ; save r7
+
+	;
+	; Zero out carries
+	;
+	COPY     %r0,c1
+	COPY     %r0,c2
+	COPY     %r0,c3
+
+	LDO      128(%sp),%sp       ; bump stack
+    DEPDI,Z  1,31,1,high_one     ; Create Value  1 << 32
+
+	;
+	; Load up all of the values we are going to use
+	;
+    FLDD      0(a_ptr),a0       
+    FLDD      8(a_ptr),a1       
+    FLDD     16(a_ptr),a2       
+    FLDD     24(a_ptr),a3       
+
+    FLDD      0(b_ptr),b0       
+    FLDD      8(b_ptr),b1       
+    FLDD     16(b_ptr),b2       
+    FLDD     24(b_ptr),b3       
+
+	MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3
+	STD       c1,0(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1
+	STD       c2,8(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2
+	MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2
+	MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2
+	STD       c3,16(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3
+	MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3
+	MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3
+	MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3
+	STD       c1,24(r_ptr)
+	COPY      %r0,c1
+
+	MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1
+	MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1
+	MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1
+	STD       c2,32(r_ptr)
+	COPY      %r0,c2
+
+	MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2
+	MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2
+	STD       c3,40(r_ptr)
+	COPY      %r0,c3
+
+	MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3
+	STD       c1,48(r_ptr)
+	STD       c2,56(r_ptr)
+
+    .EXIT
+    FLDD    -88(%sp),%fr13 
+    FLDD    -96(%sp),%fr12 
+    LDD     -104(%sp),%r6        ; restore r6
+    LDD     -112(%sp),%r5        ; restore r5
+    LDD     -120(%sp),%r4        ; restore r4
+    BVE     (%rp)
+    LDD,MB  -128(%sp),%r3
+
+	.PROCEND	
+
+
+	.SPACE	$TEXT$
+	.SUBSPA	$CODE$
+	.SPACE	$PRIVATE$,SORT=16
+	.IMPORT	$global$,DATA
+	.SPACE	$TEXT$
+	.SUBSPA	$CODE$
+	.SUBSPA	$LIT$,ACCESS=0x2c
+C$4
+	.ALIGN	8
+	.STRINGZ	"Division would overflow (%d)\n"
+	.END
diff --git a/src/tools/gyp/pylib/gyp/input.py b/src/tools/gyp/pylib/gyp/input.py
index 9257dfe..5697aef 100755
--- a/src/tools/gyp/pylib/gyp/input.py
+++ b/src/tools/gyp/pylib/gyp/input.py
@@ -256,6 +256,8 @@
     includes_list.extend(includes)
   if 'includes' in subdict:
     for include in subdict['includes']:
+      # expand environment & command line variables in include path.
+      include = ExpandVariables(include, False, variables, subdict_path)
       # "include" is specified relative to subdict_path, so compute the real
       # path to include by appending the provided "include" to the directory
       # in which subdict_path resides.