Merge with viewer-development (mesh)

2011-05-17 21:01:36 +00:00 · 2011-05-17 21:01:36 +00:00 · 0a86e39d90
parent ae8ed3fc2d 79459bc9ba
commit 0a86e39d90
521 changed files with 38803 additions and 6387 deletions
--- a/.hgignore
+++ b/.hgignore
@ -9,6 +9,7 @@ syntax: glob
 .*.swp
 #OSX image cache file
 *.DS_Store
+*.orig
 LICENSES
 indra/.distcc
 build-linux-*
--- a/.hgtags
+++ b/.hgtags
@ -8,6 +8,7 @@ bb38ff1a763738609e1b3cada6d15fa61e5e84b9 2.1.1-release
 1e2b517adc2ecb342cd3c865f2a6ccf82a3cf8d7 2-1-beta-3
 3469d90a115b900f8f250e137bbd9b684130f5d2 beta-4
 3e4b947f79d88c385e8218cbc0731cef0e42cfc4 2-1-beta-1
+434973a76ab2755f98ab55e1afc193e16692d5c5 2-1-1-beta-2
 46002088d9a4489e323b8d56131c680eaa21258c viewer-2-1-0-start
 4f777ffb99fefdc6497c61385c22688ff149c659 viewer-2-0-0
 52d96ad3d39be29147c5b2181b3bb46af6164f0e alpha-3
@ -16,6 +17,7 @@ bb38ff1a763738609e1b3cada6d15fa61e5e84b9 2.1.1-release
 7f16e79826d377f5f9f5b33dc721ab56d0d7dc8f alpha-4
 7f16e79826d377f5f9f5b33dc721ab56d0d7dc8f fork to viewer-20qa
 80bc6cff515118a36108967af49d3f8105c95bc9 viewer-2-0-2-start
+87bfaf8c76f9b22d9c65d4b315358861be87c863 2-1-1-release
 b03065d018b8a2e28b7de85b293a4c992cb4c12d 2-1-release
 b8419565906e4feb434426d8d9b17dd1458e24b2 alpha-6
 bb38ff1a763738609e1b3cada6d15fa61e5e84b9 2-1-1-release
--- a/32
+++ b/32
@ -11,8 +11,11 @@ Linux.symbolfiles = "newview/secondlife-symbols-linux.tar.bz2"
 # Use Public Upload Locations
 public_build = true

+# skip windows debug build until we can get a fix in.
+build_CYGWIN_Debug = false
+
 # Update Public Inworld Build Status Indicators
-email_status_this_is_os = true
+email_status_this_is_os = false

 # Limit extent of codeticket updates to revisions after...
 codeticket_since = 2.2.0-release
@ -89,6 +92,33 @@ brad-parabuild.email = brad@lindenlab.com
 brad-parabuild.build_server = false
 brad-parabuild.build_server_tests = false

+# ========================================
+# mesh-development
+# ========================================
+mesh-development.viewer_channel = "Project Viewer - Mesh"
+mesh-development.login_channel = "Project Viewer - Mesh"
+mesh-development.viewer_grid = aditi
+mesh-development.build_debug_release_separately = true
+mesh-development.build_CYGWIN_Debug = false
+mesh-development.build_viewer_update_version_manager = false
+
+# ========================================
+# viewer-mesh
+# ========================================
+
+viewer-mesh.build_viewer = true
+viewer-mesh.build_server = false
+viewer-mesh.build_Linux = true
+viewer-mesh.build_hg_bundle = true
+viewer-mesh.build_viewer_update_version_manager = false
+viewer-mesh.build_Debug = false
+viewer-mesh.build_RelWithDebInfo = false
+viewer-mesh.viewer_channel = "Project Viewer - Mesh"
+viewer-mesh.login_channel = "Project Viewer - Mesh"
+viewer-mesh.viewer_grid = aditi
+viewer-mesh.email = shining@lists.lindenlab.com
+
+
 # ========================================
 # CG
 # ========================================
--- a/autobuild.xml
+++ b/autobuild.xml
@ -3,14 +3,14 @@
 <map>
    <key>installables</key>
    <map>
-      <key>GL</key>
+      <key>GLOD</key>
      <map>
        <key>license</key>
-        <string>GL</string>
+        <string>GLOD</string>
        <key>license_file</key>
-        <string>LICENSES/GL.txt</string>
+        <string>LICENSES/glod.txt</string>
        <key>name</key>
-        <string>GL</string>
+        <string>GLOD</string>
        <key>platforms</key>
        <map>
          <key>darwin</key>
@ -18,9 +18,9 @@
            <key>archive</key>
            <map>
              <key>hash</key>
-              <string>0b7c1d43dc2b39301fef6c05948fb826</string>
+              <string>7c546f54f6ed654f713c778af3925dd4</string>
              <key>url</key>
-              <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/GL-darwin-20101004.tar.bz2</string>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-glod/rev/226566/arch/Darwin/installer/glod-1.0pre4-darwin-20110413.tar.bz2</string>
            </map>
            <key>name</key>
            <string>darwin</string>
@ -30,9 +30,9 @@
            <key>archive</key>
            <map>
              <key>hash</key>
-              <string>d85d88088360aee5b67105ad93b5ad16</string>
+              <string>18c708163d2a669bc3c030b05b4ebe61</string>
              <key>url</key>
-              <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/GL-linux-20100929.tar.bz2</string>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-glod/rev/226657/arch/Linux/installer/glod-1.0pre4-linux-20110414.tar.bz2</string>
            </map>
            <key>name</key>
            <string>linux</string>
@ -42,9 +42,9 @@
            <key>archive</key>
            <map>
              <key>hash</key>
-              <string>f8d98cbe78d5aafbc7aaabf840325aaf</string>
+              <string>c4ae6cddc04e0b2908a301726a53922c</string>
              <key>url</key>
-              <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/GL-0.0.0-windows-20110303.tar.bz2</string>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-glod/rev/226194/arch/CYGWIN/installer/glod-1.0pre4-windows-20110408.tar.bz2</string>
            </map>
            <key>name</key>
            <string>windows</string>
@ -219,6 +219,54 @@
          </map>
        </map>
      </map>
+      <key>colladadom</key>
+      <map>
+        <key>license</key>
+        <string>scea</string>
+        <key>license_file</key>
+        <string>LICENSES/collada.txt</string>
+        <key>name</key>
+        <string>colladadom</string>
+        <key>platforms</key>
+        <map>
+          <key>darwin</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>480b27a0cb39a4adfcdeabef895de3e1</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-colladadom/rev/227230/arch/Darwin/installer/colladadom-2.2-darwin-20110420.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>darwin</string>
+          </map>
+          <key>linux</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>d05be8fc196e9ce7b6636b931cf13dff</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-colladadom/rev/226716/arch/Linux/installer/colladadom-2.2-linux-20110415.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>linux</string>
+          </map>
+          <key>windows</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>a9f548eb6f9aaf292508a8b09c7f2f73</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-colladadom/rev/226584/arch/CYGWIN/installer/colladadom-2.2-windows-20110413.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>windows</string>
+          </map>
+        </map>
+      </map>
      <key>curl</key>
      <map>
        <key>license</key>
@ -531,6 +579,42 @@
          </map>
        </map>
      </map>
+      <key>glext</key>
+      <map>
+        <key>license</key>
+        <string>glext</string>
+        <key>license_file</key>
+        <string>LICENSES/glext.txt</string>
+        <key>name</key>
+        <string>glext</string>
+        <key>platforms</key>
+        <map>
+          <key>linux</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>5de58ca0fe19abf68b25956762ee0d29</string>
+              <key>url</key>
+              <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/glext-68-windows-20110406.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>linux</string>
+          </map>
+          <key>windows</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>5de58ca0fe19abf68b25956762ee0d29</string>
+              <key>url</key>
+              <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/glext-68-windows-20110406.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>windows</string>
+          </map>
+        </map>
+      </map>
      <key>glh_linear</key>
      <map>
        <key>license</key>
@ -615,42 +699,6 @@
          </map>
        </map>
      </map>
-      <key>google-perftools</key>
-      <map>
-        <key>license</key>
-        <string>bsd</string>
-        <key>license_file</key>
-        <string>LICENSES/google-perftools.txt</string>
-        <key>name</key>
-        <string>google-perftools</string>
-        <key>platforms</key>
-        <map>
-          <key>linux</key>
-          <map>
-            <key>archive</key>
-            <map>
-              <key>hash</key>
-              <string>cf513fc2eec4a414cc804cf408932a45</string>
-              <key>url</key>
-              <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/google_perftools-1.7-linux-20110315.tar.bz2</string>
-            </map>
-            <key>name</key>
-            <string>linux</string>
-          </map>
-          <key>windows</key>
-          <map>
-            <key>archive</key>
-            <map>
-              <key>hash</key>
-              <string>8108bffe1c814be9d035b47dac3d4541</string>
-              <key>url</key>
-              <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/google-perftools-1.0-windows-20101001b.tar.bz2</string>
-            </map>
-            <key>name</key>
-            <string>windows</string>
-          </map>
-        </map>
-      </map>
      <key>google_breakpad</key>
      <map>
        <key>license</key>
@ -704,7 +752,7 @@
        <key>license</key>
        <string>bsd</string>
        <key>license_file</key>
-        <string>LICENSES/gmock.txt</string>
+        <string>LICENSES/googlemock.txt</string>
        <key>name</key>
        <string>googlemock</string>
        <key>platforms</key>
@ -776,7 +824,7 @@
        <key>license</key>
        <string>lgpl</string>
        <key>license_file</key>
-        <string>LICENSES/gtk.txt</string>
+        <string>LICENSES/gtk-atk-pango-glib.txt</string>
        <key>name</key>
        <string>gtk-atk-pango-glib</string>
        <key>platforms</key>
@ -990,9 +1038,9 @@
            <key>archive</key>
            <map>
              <key>hash</key>
-              <string>735a955e6442733e2342ab12c1087488</string>
+              <string>f194ba857ca8dd86483a3ef24535d0db</string>
              <key>url</key>
-              <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/libpng-1.5.1-windows-20110221.tar.bz2</string>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-libpng/rev/226532/arch/CYGWIN/installer/libpng-1.5.1-windows-20110413.tar.bz2</string>
            </map>
            <key>name</key>
            <string>windows</string>
@ -1047,6 +1095,102 @@
          </map>
        </map>
      </map>
+      <key>llconvexdecomposition</key>
+      <map>
+        <key>license</key>
+        <string>havok</string>
+        <key>license_file</key>
+        <string>on_file</string>
+        <key>name</key>
+        <string>llconvexdecomposition</string>
+        <key>platforms</key>
+        <map>
+          <key>darwin</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>6e45ad68506cd1ba49fd35a3201f0478</string>
+              <key>url</key>
+              <string>http://s3-proxy.lindenlab.com/private-builds-secondlife-com/hg/repo/3p-llconvexdecomposition/rev/228821/arch/Darwin/installer/llconvexdecomposition-0.1-darwin-20110504.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>darwin</string>
+          </map>
+          <key>linux</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>00ff5144612c2e261a0811a4503ce3ba</string>
+              <key>url</key>
+              <string>http://s3-proxy.lindenlab.com/private-builds-secondlife-com/hg/repo/3p-llconvexdecomposition/rev/228821/arch/Linux/installer/llconvexdecomposition-0.1-linux-20110504.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>linux</string>
+          </map>
+          <key>windows</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>a4635dcbbe0915ce023dd41d3b848d4c</string>
+              <key>url</key>
+              <string>http://s3-proxy.lindenlab.com/private-builds-secondlife-com/hg/repo/3p-llconvexdecomposition/rev/228821/arch/CYGWIN/installer/llconvexdecomposition-0.1-windows-20110504.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>windows</string>
+          </map>
+        </map>
+      </map>
+      <key>llconvexdecompositionstub</key>
+      <map>
+        <key>license</key>
+        <string>lgpl</string>
+        <key>license_file</key>
+        <string>LICENSES/lgpl.txt</string>
+        <key>name</key>
+        <string>llconvexdecompositionstub</string>
+        <key>platforms</key>
+        <map>
+          <key>darwin</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>bc1388fc28dbb3bba1fe7cb8d09f49b4</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-llconvexdecompositionstub/rev/227399/arch/Darwin/installer/llconvexdecompositionstub-0.3-darwin-20110421.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>darwin</string>
+          </map>
+          <key>linux</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>3295bd4a0514b7c15dda9044f40c175e</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-llconvexdecompositionstub/rev/227399/arch/Linux/installer/llconvexdecompositionstub-0.3-linux-20110422.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>linux</string>
+          </map>
+          <key>windows</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>92f1dff3249024c1534b55343ed79ea3</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-llconvexdecompositionstub/rev/227399/arch/CYGWIN/installer/llconvexdecompositionstub-0.3-windows-20110421.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>windows</string>
+          </map>
+        </map>
+      </map>
      <key>llqtwebkit</key>
      <map>
        <key>license</key>
@ -1351,6 +1495,42 @@
          </map>
        </map>
      </map>
+      <key>pcre</key>
+      <map>
+        <key>license</key>
+        <string>bsd</string>
+        <key>license_file</key>
+        <string>LICENSES/pcre-license.txt</string>
+        <key>name</key>
+        <string>pcre</string>
+        <key>platforms</key>
+        <map>
+          <key>darwin</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>a8e74694a0f4248228c13c845ed0a6f8</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-pcre/rev/228822/arch/Darwin/installer/pcre-7.6-darwin-20110504.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>darwin</string>
+          </map>
+          <key>linux</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>bb0abe962b3b8208ed2dab0424aab33d</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-pcre/rev/228822/arch/Linux/installer/pcre-7.6-linux-20110504.tar.bz2</string>              
+            </map>
+            <key>name</key>
+            <string>linux</string>
+          </map>
+        </map>
+      </map>
      <key>quicktime</key>
      <map>
        <key>license</key>
@ -1423,6 +1603,42 @@
          </map>
        </map>
      </map>
+      <key>tcmalloc</key>
+      <map>
+        <key>license</key>
+        <string>bsd</string>
+        <key>license_file</key>
+        <string>LICENSES/google-perftools.txt</string>
+        <key>name</key>
+        <string>tcmalloc</string>
+        <key>platforms</key>
+        <map>
+          <key>linux</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>dde928cb24d22a267004a8c17669ba65</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-google-perftools/rev/226426/arch/Linux/installer/google_perftools-1.7-linux-20110412.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>linux</string>
+          </map>
+          <key>windows</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>8308f7bd68bb7083655753b7abe7225f</string>
+              <key>url</key>
+              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-google-perftools/rev/226287/arch/CYGWIN/installer/google_perftools-1.7-windows-20110411.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>windows</string>
+          </map>
+        </map>
+      </map>
      <key>tut</key>
      <map>
        <key>license</key>
@ -1578,6 +1794,10 @@
            <map>
              <key>configure</key>
              <map>
+                <key>arguments</key>
+                <array>
+                  <string>../indra</string>
+                </array>
                <key>command</key>
                <string>cmake</string>
                <key>options</key>
@ -1587,10 +1807,6 @@
                  <string>-DROOT_PROJECT_NAME:STRING=SecondLife</string>
                  <string>-DINSTALL_PROPRIETARY=FALSE</string>
                </array>
-                <key>arguments</key>
-                <array>
-                  <string>../indra</string>
-                </array>
              </map>
              <key>name</key>
              <string>DebugOS</string>
@ -1619,6 +1835,10 @@
            <map>
              <key>configure</key>
              <map>
+                <key>arguments</key>
+                <array>
+                  <string>../indra</string>
+                </array>
                <key>command</key>
                <string>cmake</string>
                <key>options</key>
@ -1628,10 +1848,6 @@
                  <string>-DROOT_PROJECT_NAME:STRING=SecondLife</string>
                  <string>-DINSTALL_PROPRIETARY=FALSE</string>
                </array>
-                <key>arguments</key>
-                <array>
-                  <string>../indra</string>
-                </array>
              </map>
              <key>name</key>
              <string>RelWithDebInfoOS</string>
@ -1660,6 +1876,10 @@
            <map>
              <key>configure</key>
              <map>
+                <key>arguments</key>
+                <array>
+                  <string>../indra</string>
+                </array>
                <key>command</key>
                <string>cmake</string>
                <key>options</key>
@ -1669,10 +1889,6 @@
                  <string>-DROOT_PROJECT_NAME:STRING=SecondLife</string>
                  <string>-DINSTALL_PROPRIETARY=FALSE</string>
                </array>
-                <key>arguments</key>
-                <array>
-                  <string>../indra</string>
-                </array>
              </map>
              <key>name</key>
              <string>ReleaseOS</string>
--- a/build.sh
+++ b/build.sh
@ -42,9 +42,13 @@ installer_Linux()

 installer_CYGWIN()
 {
-  d=$(build_dir_CYGWIN ${last_built_variant:-Release})
-  p=$(sed 's:.*=::' "$d/newview/${last_built_variant:-Release}/touched.bat")
-  echo "$d/newview/${last_built_variant:-Release}/$p"
+  v=${last_built_variant:-Release}
+  d=$(build_dir_CYGWIN $v)
+  if [ -r "$d/newview/$v/touched.bat" ]
+  then
+    p=$(sed 's:.*=::' "$d/newview/$v/touched.bat")
+    echo "$d/newview/$v/$p"
+  fi
 }

 pre_build()
--- a/doc/contributions.txt
+++ b/doc/contributions.txt
@ -196,6 +196,7 @@ blino Nakamura
 Boroondas Gupte
 	OPEN-29
 	OPEN-39
+	OPEN-39
 	SNOW-278
 	SNOW-503
 	SNOW-510
@ -418,6 +419,7 @@ Jonathan Yap
 	VWR-17801
 	VWR-24347
 	STORM-975
+	STORM-990
 	STORM-1019
 	STORM-844
 	STORM-643
@ -816,7 +818,9 @@ Thraxis Epsilon
 tiamat bingyi
 	CT-246
 Tofu Buzzard
+	CTS-411
 	STORM-546
+	VWR-24509
 TraductoresAnonimos Alter
 	CT-324
 Tue Torok
--- a/etc/message.xml
+++ b/etc/message.xml
@ -596,6 +596,13 @@
 					<boolean>false</boolean>
 				</map>

+        <key>ObjectPhysicsProperties</key>
+        <map>
+          <key>flavor</key>
+          <string>llsd</string>
+          <key>trusted-sender</key>
+          <boolean>true</boolean>
+        </map>

 		  </map>
  	  	<key>capBans</key>
--- a/indra/cmake/00-Common.cmake
+++ b/indra/cmake/00-Common.cmake
@ -57,16 +57,18 @@ if (WINDOWS)

  add_definitions(
      /DLL_WINDOWS=1
+      /DDOM_DYNAMIC
      /DUNICODE
      /D_UNICODE 
      /GS
      /TP
-      /W3
+      /W2
      /c
      /Zc:forScope
      /nologo
      /Oy-
      /Zc:wchar_t-
+      /arch:SSE2
      )
     
  # Are we using the crummy Visual Studio KDU build workaround?
@ -141,6 +143,8 @@ if (LINUX)
      -fno-strict-aliasing
      -fsigned-char
      -g
+      -msse2
+      -mfpmath=sse
      -pthread
      )

@ -160,10 +164,6 @@ if (LINUX)
      link_directories(/usr/lib/mysql4/mysql)
    endif (EXISTS /usr/lib/mysql4/mysql)

-    add_definitions(
-        -msse2
-        -mfpmath=sse
-        )
  endif (SERVER)

  if (VIEWER)
@ -171,6 +171,8 @@ if (LINUX)
    add_definitions(-fvisibility=hidden)
    # don't catch SIGCHLD in our base application class for the viewer - some of our 3rd party libs may need their *own* SIGCHLD handler to work.  Sigh!  The viewer doesn't need to catch SIGCHLD anyway.
    add_definitions(-DLL_IGNORE_SIGCHLD)
+    add_definitions(-march=pentium4 -mfpmath=sse)
+    #add_definitions(-ftree-vectorize) # THIS CRASHES GCC 3.1-3.2
    if (NOT STANDALONE)
      # this stops us requiring a really recent glibc at runtime
      add_definitions(-fno-stack-protector)
@ -210,7 +212,7 @@ if (LINUX OR DARWIN)
    set(GCC_WARNINGS "${GCC_WARNINGS} -Werror")
  endif (NOT GCC_DISABLE_FATAL_WARNINGS)

-  set(GCC_CXX_WARNINGS "${GCC_WARNINGS} -Wno-reorder -Wno-non-virtual-dtor -Woverloaded-virtual")
+  set(GCC_CXX_WARNINGS "${GCC_WARNINGS} -Wno-reorder -Wno-non-virtual-dtor")

  set(CMAKE_C_FLAGS "${GCC_WARNINGS} ${CMAKE_C_FLAGS}")
  set(CMAKE_CXX_FLAGS "${GCC_CXX_WARNINGS} ${CMAKE_CXX_FLAGS}")
--- a/indra/cmake/CMakeLists.txt
+++ b/indra/cmake/CMakeLists.txt
@ -34,6 +34,7 @@ set(cmake_SOURCE_FILES
    FindZLIB.cmake
    FMOD.cmake
    FreeType.cmake
+    GLOD.cmake
    GStreamer010Plugin.cmake
    GooglePerfTools.cmake
    JPEG.cmake
@ -41,6 +42,7 @@ set(cmake_SOURCE_FILES
    LLAudio.cmake
    LLCharacter.cmake
    LLCommon.cmake
+    LLConvexDecomposition.cmake
    LLCrashLogger.cmake
    LLDatabase.cmake
    LLImage.cmake
--- a/indra/cmake/Copy3rdPartyLibs.cmake
+++ b/indra/cmake/Copy3rdPartyLibs.cmake
@ -39,6 +39,8 @@ if(WINDOWS)
        libapriconv-1.dll
        ssleay32.dll
        libeay32.dll
+        libcollada14dom22-d.dll
+        glod.dll	
        )

    set(release_src_dir "${ARCH_PREBUILT_DIRS_RELEASE}")
@ -49,6 +51,8 @@ if(WINDOWS)
        libapriconv-1.dll
        ssleay32.dll
        libeay32.dll
+        libcollada14dom22.dll
+        glod.dll
        )

    if(USE_GOOGLE_PERFTOOLS)
@ -207,9 +211,12 @@ elseif(DARWIN)
        libaprutil-1.dylib
        libexpat.1.5.2.dylib
        libexpat.dylib
-        libllqtwebkit.dylib
+        libGLOD.dylib
+	libllqtwebkit.dylib
+	libminizip.a
        libndofdev.dylib
        libexception_handler.dylib
+	libcollada14dom.dylib
       )

    # fmod is statically linked on darwin
@ -245,20 +252,23 @@ elseif(LINUX)
        libaprutil-1.so.0
        libatk-1.0.so
        libbreakpad_client.so.0
+       	libcollada14dom.so
        libcrypto.so.1.0.0
        libdb-5.1.so
        libexpat.so
        libexpat.so.1
+	libglod.so
        libgmock_main.so
        libgmock.so.0
        libgmodule-2.0.so
        libgobject-2.0.so
        libgtest_main.so
        libgtest.so.0
+	libminizip.so
        libopenal.so
        libopenjpeg.so
        libssl.so
-        libtcmalloc.so
+        libtcmalloc_minimal.so
        libuuid.so.16
        libuuid.so.16.0.22
        libssl.so.1.0.0
--- a/indra/cmake/FindTut.cmake
+++ b/indra/cmake/FindTut.cmake
@ -1,30 +0,0 @@
-# -*- cmake -*-
-
-# - Find Tut
-# Find the Tut unit test framework includes and library
-# This module defines
-#  TUT_INCLUDE_DIR, where to find tut/tut.hpp.
-#  TUT_FOUND, If false, do not try to use Tut.
-
-find_path(TUT_INCLUDE_DIR tut/tut.hpp
-    NO_SYSTEM_ENVIRONMENT_PATH
-    )
-
-if (TUT_INCLUDE_DIR)
-  set(TUT_FOUND "YES")
-else (TUT_INCLUDE_DIR)
-  set(TUT_FOUND "NO")
-endif (TUT_INCLUDE_DIR)
-
-if (TUT_FOUND)
-  if (NOT TUT_FIND_QUIETLY)
-    message(STATUS "Found Tut: ${TUT_INCLUDE_DIR}")
-    set(TUT_FIND_QUIETLY TRUE) # Only alert us the first time
-  endif (NOT TUT_FIND_QUIETLY)
-else (TUT_FOUND)
-  if (TUT_FIND_REQUIRED)
-    message(FATAL_ERROR "Could not find Tut")
-  endif (TUT_FIND_REQUIRED)
-endif (TUT_FOUND)
-
-mark_as_advanced(TUT_INCLUDE_DIR)
--- a/indra/cmake/GLOD.cmake
+++ b/indra/cmake/GLOD.cmake
@ -0,0 +1,9 @@
+# -*- cmake -*-
+include(Prebuilt)
+
+if (NOT STANDALONE)
+  use_prebuilt_binary(GLOD)
+endif (NOT STANDALONE)
+
+set(GLOD_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include)
+set(GLOD_LIBRARIES glod)
--- a/indra/cmake/GooglePerfTools.cmake
+++ b/indra/cmake/GooglePerfTools.cmake
@ -5,15 +5,16 @@ if (STANDALONE)
  include(FindGooglePerfTools)
 else (STANDALONE)
  if (WINDOWS)
-    use_prebuilt_binary(google-perftools)
+    use_prebuilt_binary(tcmalloc)
    set(TCMALLOC_LIBRARIES 
        debug libtcmalloc_minimal-debug
        optimized libtcmalloc_minimal)
    set(GOOGLE_PERFTOOLS_FOUND "YES")
  endif (WINDOWS)
  if (LINUX)
-    use_prebuilt_binary(google-perftools)
-    set(TCMALLOC_LIBRARIES tcmalloc)
+    use_prebuilt_binary(tcmalloc)
+    set(TCMALLOC_LIBRARIES 
+	tcmalloc)
    set(PROFILER_LIBRARIES profiler)
    set(GOOGLE_PERFTOOLS_INCLUDE_DIR
        ${LIBS_PREBUILT_DIR}/include)
@ -28,12 +29,11 @@ if (GOOGLE_PERFTOOLS_FOUND)
 endif (GOOGLE_PERFTOOLS_FOUND)

 if (WINDOWS)
-    # *TODO -reenable this once we get server usage sorted out
-    #set(USE_GOOGLE_PERFTOOLS ON)
+    set(USE_GOOGLE_PERFTOOLS ON)
 endif (WINDOWS)

 if (USE_GOOGLE_PERFTOOLS)
-  set(TCMALLOC_FLAG -DLL_USE_TCMALLOC=1)
+  set(TCMALLOC_FLAG -ULL_USE_TCMALLOC=1)
  include_directories(${GOOGLE_PERFTOOLS_INCLUDE_DIR})
  set(GOOGLE_PERFTOOLS_LIBRARIES ${TCMALLOC_LIBRARIES} ${STACKTRACE_LIBRARIES} ${PROFILER_LIBRARIES})
 else (USE_GOOGLE_PERFTOOLS)
--- a/indra/cmake/LLConvexDecomposition.cmake
+++ b/indra/cmake/LLConvexDecomposition.cmake
@ -0,0 +1,12 @@
+# -*- cmake -*-
+include(Prebuilt)
+
+set(LLCONVEXDECOMP_INCLUDE_DIRS ${LIBS_PREBUILT_DIR}/include)
+  
+if (INSTALL_PROPRIETARY AND NOT STANDALONE)
+  use_prebuilt_binary(llconvexdecomposition)
+  set(LLCONVEXDECOMP_LIBRARY llconvexdecomposition)
+else (INSTALL_PROPRIETARY AND NOT STANDALONE)
+  use_prebuilt_binary(llconvexdecompositionstub)
+  set(LLCONVEXDECOMP_LIBRARY llconvexdecompositionstub)
+endif (INSTALL_PROPRIETARY AND NOT STANDALONE)
--- a/indra/cmake/LLPrimitive.cmake
+++ b/indra/cmake/LLPrimitive.cmake
@ -1,7 +1,33 @@
 # -*- cmake -*-

+# these should be moved to their own cmake file
+include(Prebuilt)
+use_prebuilt_binary(colladadom)
+use_prebuilt_binary(pcre)
+use_prebuilt_binary(libxml)
+
 set(LLPRIMITIVE_INCLUDE_DIRS
    ${LIBS_OPEN_DIR}/llprimitive
    )
+if (WINDOWS)
+	set(LLPRIMITIVE_LIBRARIES 
+        debug llprimitive
+        optimized llprimitive
+        debug libcollada14dom22-d
+        optimized libcollada14dom22
+        debug libboost_filesystem-vc100-mt-gd-1_45
+        optimized libboost_filesystem-vc100-mt-1_45
+        debug libboost_system-vc100-mt-gd-1_45
+        optimized libboost_system-vc100-mt-1_45
+        )
+else (WINDOWS)
+    set(LLPRIMITIVE_LIBRARIES 
+        llprimitive
+        collada14dom
+        minizip
+        xml2
+        pcrecpp
+        pcre
+        )
+endif (WINDOWS)

-set(LLPRIMITIVE_LIBRARIES llprimitive)
--- a/indra/cmake/LLTestCommand.cmake
+++ b/indra/cmake/LLTestCommand.cmake
@ -1,5 +1,4 @@
 include(Python)
-
 MACRO(LL_TEST_COMMAND OUTVAR LD_LIBRARY_PATH)
  # nat wonders how Kitware can use the term 'function' for a construct that
  # cannot return a value. And yet, variables you set inside a FUNCTION are
--- a/indra/cmake/OpenGL.cmake
+++ b/indra/cmake/OpenGL.cmake
@ -2,8 +2,7 @@
 include(Prebuilt)

 if (NOT STANDALONE)
-  use_prebuilt_binary(GL)
-  # possible glh_linear should have its own .cmake file instead
+  use_prebuilt_binary(glext)
  use_prebuilt_binary(glh_linear)
  set(GLEXT_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include)
 endif (NOT STANDALONE)
--- a/indra/cmake/Tut.cmake
+++ b/indra/cmake/Tut.cmake
@ -1,11 +1,6 @@
 # -*- cmake -*-
 include(Prebuilt)

-set(TUT_FIND_REQUIRED TRUE)
-set(TUT_FIND_QUIETLY TRUE)
-
-if (STANDALONE)
-  include(FindTut)
-else (STANDALONE)
+if (NOT STANDALONE)
  use_prebuilt_binary(tut)
-endif (STANDALONE)
+endif(NOT STANDALONE)
--- a/indra/cmake/Variables.cmake
+++ b/indra/cmake/Variables.cmake
@ -29,6 +29,7 @@ set(SERVER_PREFIX)
 set(VIEWER_PREFIX)
 set(INTEGRATION_TESTS_PREFIX)
 set(LL_TESTS ON CACHE BOOL "Build and run unit and integration tests (disable for build timing runs to reduce variation")
+set(INCREMENTAL_LINK OFF CACHE BOOL "Use incremental linking on win32 builds (enable for faster links on some machines)")

 if(LIBS_CLOSED_DIR)
  file(TO_CMAKE_PATH "${LIBS_CLOSED_DIR}" LIBS_CLOSED_DIR)
--- a/indra/cmake/WebKitLibPlugin.cmake
+++ b/indra/cmake/WebKitLibPlugin.cmake
@ -26,42 +26,45 @@ if (STANDALONE)
  endforeach(qlibname)
  # qjpeg depends on libjpeg
  list(APPEND QT_PLUGIN_LIBRARIES jpeg)
-  set(WEBKITLIBPLUGIN OFF CACHE BOOL
-      "WEBKITLIBPLUGIN support for the llplugin/llmedia test apps.")
+    set(WEBKITLIBPLUGIN OFF CACHE BOOL
+        "WEBKITLIBPLUGIN support for the llplugin/llmedia test apps.")
 else (STANDALONE)
-  use_prebuilt_binary(llqtwebkit)
-  set(WEBKITLIBPLUGIN ON CACHE BOOL
-      "WEBKITLIBPLUGIN support for the llplugin/llmedia test apps.")
+    use_prebuilt_binary(llqtwebkit)
+    set(WEBKITLIBPLUGIN ON CACHE BOOL
+        "WEBKITLIBPLUGIN support for the llplugin/llmedia test apps.")
 endif (STANDALONE)

 if (WINDOWS)
-  set(WEBKIT_PLUGIN_LIBRARIES
-      debug llqtwebkitd
-      debug QtWebKitd4
-      debug QtOpenGLd4
-      debug QtNetworkd4
-      debug QtGuid4
-      debug QtCored4
-      debug qtmaind
-      optimized llqtwebkit
-      optimized QtWebKit4
-      optimized QtOpenGL4
-      optimized QtNetwork4
-      optimized QtGui4
-      optimized QtCore4
-      optimized qtmain
-      )
+    set(WEBKIT_PLUGIN_LIBRARIES 
+    debug llqtwebkitd
+    debug QtWebKitd4
+    debug QtOpenGLd4
+    debug QtNetworkd4
+    debug QtGuid4
+    debug QtCored4
+    debug qtmaind
+    optimized llqtwebkit
+    optimized QtWebKit4
+    optimized QtOpenGL4
+    optimized QtNetwork4
+    optimized QtGui4
+    optimized QtCore4
+    optimized qtmain
+    )
 elseif (DARWIN)
-  set(WEBKIT_PLUGIN_LIBRARIES
-      optimized ${ARCH_PREBUILT_DIRS_RELEASE}/libllqtwebkit.dylib
-      debug ${ARCH_PREBUILT_DIRS_RELEASE}/libllqtwebkit.dylib
-      )
+    set(WEBKIT_PLUGIN_LIBRARIES
+        optimized ${ARCH_PREBUILT_DIRS_RELEASE}/libllqtwebkit.dylib
+        debug ${ARCH_PREBUILT_DIRS_RELEASE}/libllqtwebkit.dylib
+        )
 elseif (LINUX)
-  if (STANDALONE) 
    set(WEBKIT_PLUGIN_LIBRARIES ${LLQTWEBKIT_LIBRARY} ${QT_LIBRARIES} ${QT_PLUGIN_LIBRARIES})
-  else (STANDALONE)
    set(WEBKIT_PLUGIN_LIBRARIES
        llqtwebkit
+#        qico
+#        qpng
+#        qtiff
+#        qsvg
+#        QtSvg
        QtWebKit
        QtOpenGL
        QtNetwork
@ -74,6 +77,9 @@ elseif (LINUX)
        X11
        Xrender
        GL
+
+#        sqlite3
+#        Xi
+#        SM
        )
-  endif (STANDALONE)
 endif (WINDOWS)
--- a/indra/llcharacter/CMakeLists.txt
+++ b/indra/llcharacter/CMakeLists.txt
@ -77,12 +77,13 @@ list(APPEND llcharacter_SOURCE_FILES ${llcharacter_HEADER_FILES})
 add_library (llcharacter ${llcharacter_SOURCE_FILES})


-if(LL_TESTS)
-  # Add tests
-  include(LLAddBuildTest)
-  # UNIT TESTS
-  SET(llcharacter_TEST_SOURCE_FILES
-      lljoint.cpp
-      )
-  LL_ADD_PROJECT_UNIT_TESTS(llcharacter "${llcharacter_TEST_SOURCE_FILES}")
-endif(LL_TESTS)
+# Add tests
+if (LL_TESTS)
+	include(LLAddBuildTest)
+	# UNIT TESTS
+	SET(llcharacter_TEST_SOURCE_FILES
+	  lljoint.cpp
+	  )
+	LL_ADD_PROJECT_UNIT_TESTS(llcharacter "${llcharacter_TEST_SOURCE_FILES}")
+endif (LL_TESTS)
+
--- a/indra/llcharacter/lljoint.cpp
+++ b/indra/llcharacter/lljoint.cpp
@ -50,6 +50,7 @@ LLJoint::LLJoint()
 	mUpdateXform = TRUE;
 	mJointNum = -1;
 	touch();
+	mResetAfterRestoreOldXform = false;
 }


@ -234,6 +235,42 @@ void LLJoint::setPosition( const LLVector3& pos )
 }


+//--------------------------------------------------------------------
+// setPosition()
+//--------------------------------------------------------------------
+void LLJoint::setDefaultFromCurrentXform( void )
+{
+	mDefaultXform = mXform;
+	touch(MATRIX_DIRTY | POSITION_DIRTY);
+	
+}
+
+//--------------------------------------------------------------------
+// storeCurrentXform()
+//--------------------------------------------------------------------
+void LLJoint::storeCurrentXform( const LLVector3& pos )
+{
+	mOldXform = mXform;
+	mResetAfterRestoreOldXform = true;
+	setPosition( pos );
+}
+//--------------------------------------------------------------------
+// restoreOldXform()
+//--------------------------------------------------------------------
+void LLJoint::restoreOldXform( void )
+{
+	mResetAfterRestoreOldXform = false;
+	mXform = mOldXform;
+}
+//--------------------------------------------------------------------
+// restoreOldXform()
+//--------------------------------------------------------------------
+void LLJoint::restoreToDefaultXform( void )
+{	
+	mXform = mDefaultXform;
+	setPosition( mXform.getPosition() );	
+}
+
 //--------------------------------------------------------------------
 // getWorldPosition()
 //--------------------------------------------------------------------
@ -522,3 +559,4 @@ void LLJoint::clampRotation(LLQuaternion old_rot, LLQuaternion new_rot)
 }

 // End
+
--- a/indra/llcharacter/lljoint.h
+++ b/indra/llcharacter/lljoint.h
@ -80,11 +80,16 @@ protected:

 	// explicit transformation members
 	LLXformMatrix		mXform;
+	LLXformMatrix		mOldXform;
+	LLXformMatrix		mDefaultXform;

+	LLUUID				mId;
 public:
 	U32				mDirtyFlags;
 	BOOL			mUpdateXform;

+	BOOL			mResetAfterRestoreOldXform;
+
 	// describes the skin binding pose
 	LLVector3		mSkinOffset;

@ -130,7 +135,9 @@ public:
 	// get/set local position
 	const LLVector3& getPosition();
 	void setPosition( const LLVector3& pos );
-
+	
+	void setDefaultPosition( const LLVector3& pos );
+	
 	// get/set world position
 	LLVector3 getWorldPosition();
 	LLVector3 getLastWorldPosition();
@ -172,6 +179,21 @@ public:

 	S32 getJointNum() const { return mJointNum; }
 	void setJointNum(S32 joint_num) { mJointNum = joint_num; }
+	
+	void restoreOldXform( void );
+	void restoreToDefaultXform( void );
+	void setDefaultFromCurrentXform( void );
+	void storeCurrentXform( const LLVector3& pos );
+
+	//Accessor for the joint id
+	LLUUID getId( void ) { return mId; }
+	//Setter for the joints id
+	void setId( const LLUUID& id ) { mId = id;}
+
+	//If the old transform flag has been set, then the reset logic in avatar needs to be aware(test) of it
+	const BOOL doesJointNeedToBeReset( void ) const { return mResetAfterRestoreOldXform; }
+	//Setter for joint reset flag
+	void setJointToBeReset( BOOL val ) { mResetAfterRestoreOldXform = val; }
 };
 #endif // LL_LLJOINT_H

--- a/indra/llcharacter/llkeyframemotion.cpp
+++ b/indra/llcharacter/llkeyframemotion.cpp
@ -1145,7 +1145,7 @@ void LLKeyframeMotion::applyConstraint(JointConstraint* constraint, F32 time, U8
 			constraint->mPositions[joint_num] = new_pos;
 		}
 		constraint->mFixupDistanceRMS *= 1.f / (constraint->mTotalLength * (F32)(shared_data->mChainLength - 1));
-		constraint->mFixupDistanceRMS = fsqrtf(constraint->mFixupDistanceRMS);
+		constraint->mFixupDistanceRMS = (F32) sqrt(constraint->mFixupDistanceRMS);

 		//reset old joint rots
 		for (joint_num = 0; joint_num <= shared_data->mChainLength; joint_num++)
--- a/indra/llcommon/llassettype.cpp
+++ b/indra/llcommon/llassettype.cpp
@ -93,8 +93,9 @@ LLAssetDictionary::LLAssetDictionary()

 	addEntry(LLAssetType::AT_LINK, 				new AssetEntry("LINK",				"link",		"sym link",			false,		false,		true));
 	addEntry(LLAssetType::AT_LINK_FOLDER, 		new AssetEntry("FOLDER_LINK",		"link_f", 	"sym folder link",	false,		false,		true));
+	addEntry(LLAssetType::AT_MESH,              new AssetEntry("MESH",              "mesh",     "mesh",             false, false, false));
+	addEntry(LLAssetType::AT_NONE, 				new AssetEntry("NONE",				"-1",		NULL,		  		FALSE,		FALSE,		FALSE));

-	addEntry(LLAssetType::AT_NONE, 				new AssetEntry("NONE",				"-1",		NULL,		  		false,		false,		false));
 };

 // static
--- a/indra/llcommon/llassettype.h
+++ b/indra/llcommon/llassettype.h
@ -108,8 +108,10 @@ public:

 		AT_LINK_FOLDER = 25,
 			// Inventory folder link
-		
-		AT_COUNT = 26,
+		AT_MESH = 49,
+		    // Mesh data in our proprietary SLM format
+
+		AT_COUNT = 50,

 			// +*********************************************************+
 			// |  TO ADD AN ELEMENT TO THIS ENUM:                        |
--- a/indra/llcommon/lldefs.h
+++ b/indra/llcommon/lldefs.h
@ -236,5 +236,13 @@ inline LLDATATYPE llclampb(const LLDATATYPE& a)
 	return llmin(llmax(a, (LLDATATYPE)0), (LLDATATYPE)255);
 }

+template <class LLDATATYPE> 
+inline void llswap(LLDATATYPE& lhs, LLDATATYPE& rhs)
+{
+	LLDATATYPE tmp = lhs;
+	lhs = rhs;
+	rhs = tmp;
+}
+
 #endif // LL_LLDEFS_H

--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@ -27,140 +27,9 @@
 #ifndef LL_FASTTIMER_H
 #define LL_FASTTIMER_H

+// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp.
+
 // pull in the actual class definition
 #include "llfasttimer_class.h"

-//
-// Important note: These implementations must be FAST!
-//
-
-#if LL_WINDOWS
-//
-// Windows implementation of CPU clock
-//
-
-//
-// NOTE: put back in when we aren't using platform sdk anymore
-//
-// because MS has different signatures for these functions in winnt.h
-// need to rename them to avoid conflicts
-//#define _interlockedbittestandset _renamed_interlockedbittestandset
-//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
-//#include <intrin.h>
-//#undef _interlockedbittestandset
-//#undef _interlockedbittestandreset
-
-//inline U32 LLFastTimer::getCPUClockCount32()
-//{
-//	U64 time_stamp = __rdtsc();
-//	return (U32)(time_stamp >> 8);
-//}
-//
-//// return full timer value, *not* shifted by 8 bits
-//inline U64 LLFastTimer::getCPUClockCount64()
-//{
-//	return __rdtsc();
-//}
-
-// shift off lower 8 bits for lower resolution but longer term timing
-// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	U32 ret_val;
-	__asm
-	{
-        _emit   0x0f
-        _emit   0x31
-		shr eax,8
-		shl edx,24
-		or eax, edx
-		mov dword ptr [ret_val], eax
-	}
-    return ret_val;
-}
-
-// return full timer value, *not* shifted by 8 bits
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	U64 ret_val;
-	__asm
-	{
-        _emit   0x0f
-        _emit   0x31
-		mov eax,eax
-		mov edx,edx
-		mov dword ptr [ret_val+4], edx
-		mov dword ptr [ret_val], eax
-	}
-    return ret_val;
-}
-#endif
-
-
-#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
-//
-// Linux and Solaris implementation of CPU clock - non-x86.
-// This is accurate but SLOW!  Only use out of desperation.
-//
-// Try to use the MONOTONIC clock if available, this is a constant time counter
-// with nanosecond resolution (but not necessarily accuracy) and attempts are
-// made to synchronize this value between cores at kernel start. It should not
-// be affected by CPU frequency. If not available use the REALTIME clock, but
-// this may be affected by NTP adjustments or other user activity affecting
-// the system time.
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	struct timespec tp;
-	
-#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
-	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
-#endif
-		clock_gettime(CLOCK_REALTIME,&tp);
-
-	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;        
-}
-
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
-}
-#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
-
-
-#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
-//
-// Mac+Linux+Solaris FAST x86 implementation of CPU clock
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	U64 x;
-	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
-	return (U32)(x >> 8);
-}
-
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	U64 x;
-	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
-	return x;
-}
-#endif
-
-
-#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)))
-//
-// Mac PPC (deprecated) implementation of CPU clock
-//
-// Just use gettimeofday implementation for now
-
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	return (U32)(get_clock_count()>>8);
-}
-
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	return get_clock_count();
-}
-#endif
-
 #endif // LL_LLFASTTIMER_H
--- a/indra/llcommon/llfasttimer_class.cpp
+++ b/indra/llcommon/llfasttimer_class.cpp
@ -35,10 +35,13 @@

 #include <boost/bind.hpp>

+
 #if LL_WINDOWS
+#include "lltimer.h"
 #elif LL_LINUX || LL_SOLARIS
 #include <sys/time.h>
 #include <sched.h>
+#include "lltimer.h"
 #elif LL_DARWIN
 #include <sys/time.h>
 #include "lltimer.h"	// get_clock_count()
@ -61,6 +64,8 @@ BOOL LLFastTimer::sMetricLog = FALSE;
 LLMutex* LLFastTimer::sLogLock = NULL;
 std::queue<LLSD> LLFastTimer::sLogQueue;

+#define USE_RDTSC 0
+
 #if LL_LINUX || LL_SOLARIS
 U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution
 #else
@ -234,10 +239,23 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
 #else // windows or x86-mac or x86-linux or x86-solaris
 U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
 {
+#if USE_RDTSC || !LL_WINDOWS
 	//getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
 	static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0);

 	// we drop the low-order byte in our timers, so report a lower frequency
+#else
+	// If we're not using RDTSC, each fasttimer tick is just a performance counter tick.
+	// Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
+	// since that would change displayed MHz stats for CPUs
+	static bool firstcall = true;
+	static U64 sCPUClockFrequency;
+	if (firstcall)
+	{
+		QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency);
+		firstcall = false;
+	}
+#endif
 	return sCPUClockFrequency >> 8;
 }
 #endif
@ -482,6 +500,19 @@ void LLFastTimer::NamedTimer::resetFrame()
 {
 	if (sLog)
 	{ //output current frame counts to performance log
+
+		static S32 call_count = 0;
+		if (call_count % 100 == 0)
+		{
+			llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl;
+			llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl;
+			llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl;
+			llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl;
+			llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl;
+			llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl;
+		}
+		call_count++;
+
 		F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency

 		F64 total_time = 0;
@ -763,3 +794,144 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)


 //////////////////////////////////////////////////////////////////////////////
+//
+// Important note: These implementations must be FAST!
+//
+
+
+#if LL_WINDOWS
+//
+// Windows implementation of CPU clock
+//
+
+//
+// NOTE: put back in when we aren't using platform sdk anymore
+//
+// because MS has different signatures for these functions in winnt.h
+// need to rename them to avoid conflicts
+//#define _interlockedbittestandset _renamed_interlockedbittestandset
+//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
+//#include <intrin.h>
+//#undef _interlockedbittestandset
+//#undef _interlockedbittestandreset
+
+//inline U32 LLFastTimer::getCPUClockCount32()
+//{
+//	U64 time_stamp = __rdtsc();
+//	return (U32)(time_stamp >> 8);
+//}
+//
+//// return full timer value, *not* shifted by 8 bits
+//inline U64 LLFastTimer::getCPUClockCount64()
+//{
+//	return __rdtsc();
+//}
+
+// shift off lower 8 bits for lower resolution but longer term timing
+// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
+#if USE_RDTSC
+U32 LLFastTimer::getCPUClockCount32()
+{
+	U32 ret_val;
+	__asm
+	{
+        _emit   0x0f
+        _emit   0x31
+		shr eax,8
+		shl edx,24
+		or eax, edx
+		mov dword ptr [ret_val], eax
+	}
+    return ret_val;
+}
+
+// return full timer value, *not* shifted by 8 bits
+U64 LLFastTimer::getCPUClockCount64()
+{
+	U64 ret_val;
+	__asm
+	{
+        _emit   0x0f
+        _emit   0x31
+		mov eax,eax
+		mov edx,edx
+		mov dword ptr [ret_val+4], edx
+		mov dword ptr [ret_val], eax
+	}
+    return ret_val;
+}
+
+std::string LLFastTimer::sClockType = "rdtsc";
+
+#else
+//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
+// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures.
+U32 LLFastTimer::getCPUClockCount32()
+{
+	return (U32)(get_clock_count()>>8);
+}
+
+U64 LLFastTimer::getCPUClockCount64()
+{
+	return get_clock_count();
+}
+
+std::string LLFastTimer::sClockType = "QueryPerformanceCounter";
+#endif
+
+#endif
+
+
+#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+//
+// Linux and Solaris implementation of CPU clock - non-x86.
+// This is accurate but SLOW!  Only use out of desperation.
+//
+// Try to use the MONOTONIC clock if available, this is a constant time counter
+// with nanosecond resolution (but not necessarily accuracy) and attempts are
+// made to synchronize this value between cores at kernel start. It should not
+// be affected by CPU frequency. If not available use the REALTIME clock, but
+// this may be affected by NTP adjustments or other user activity affecting
+// the system time.
+U64 LLFastTimer::getCPUClockCount64()
+{
+	struct timespec tp;
+	
+#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
+	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
+#endif
+		clock_gettime(CLOCK_REALTIME,&tp);
+
+	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;        
+}
+
+U32 LLFastTimer::getCPUClockCount32()
+{
+	return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
+}
+
+std::string LLFastTimer::sClockType = "clock_gettime";
+
+#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+
+
+#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
+//
+// Mac+Linux+Solaris FAST x86 implementation of CPU clock
+U32 LLFastTimer::getCPUClockCount32()
+{
+	U64 x;
+	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+	return (U32)(x >> 8);
+}
+
+U64 LLFastTimer::getCPUClockCount64()
+{
+	U64 x;
+	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+	return x;
+}
+
+std::string LLFastTimer::sClockType = "rdtsc";
+#endif
+
--- a/indra/llcommon/llfasttimer_class.h
+++ b/indra/llcommon/llfasttimer_class.h
@ -31,12 +31,15 @@

 #define FAST_TIMER_ON 1
 #define TIME_FAST_TIMERS 0
+#define DEBUG_FAST_TIMER_THREADS 1

 class LLMutex;

 #include <queue>
 #include "llsd.h"

+LL_COMMON_API void assert_main_thread();
+
 class LL_COMMON_API LLFastTimer
 {
 public:
@ -175,6 +178,11 @@ public:
 #if TIME_FAST_TIMERS
 		U64 timer_end = getCPUClockCount64();
 		sTimerCycles += timer_end - timer_start;
+#endif
+#if DEBUG_FAST_TIMER_THREADS
+#if !LL_RELEASE
+		assert_main_thread();
+#endif
 #endif
 	}

@ -245,6 +253,7 @@ public:
 		U32				mChildTime;
 	};
 	static CurTimerData		sCurTimerData;
+	static std::string sClockType;

 private:
 	static U32 getCPUClockCount32();
--- a/indra/llcommon/llfoldertype.cpp
+++ b/indra/llcommon/llfoldertype.cpp
@ -89,6 +89,9 @@ LLFolderDictionary::LLFolderDictionary()
 	addEntry(LLFolderType::FT_CURRENT_OUTFIT, 		new FolderEntry("current",	TRUE));
 	addEntry(LLFolderType::FT_OUTFIT, 				new FolderEntry("outfit",	FALSE));
 	addEntry(LLFolderType::FT_MY_OUTFITS, 			new FolderEntry("my_otfts",	TRUE));
+
+	addEntry(LLFolderType::FT_MESH, 				new FolderEntry("mesh",	TRUE));
+
 	addEntry(LLFolderType::FT_INBOX, 				new FolderEntry("inbox",	TRUE));
 		 
 	addEntry(LLFolderType::FT_NONE, 				new FolderEntry("-1",		FALSE));
--- a/indra/llcommon/llfoldertype.h
+++ b/indra/llcommon/llfoldertype.h
@ -80,9 +80,11 @@ public:
 		FT_OUTFIT = 47,
 		FT_MY_OUTFITS = 48,
 		
-		FT_INBOX = 49,
+		FT_MESH = 49,

-		FT_COUNT = 50,
+		FT_INBOX = 50,
+
+		FT_COUNT = 51,

 		FT_NONE = -1
 	};
--- a/indra/llcommon/llmd5.h
+++ b/indra/llcommon/llmd5.h
@ -103,7 +103,7 @@ public:
  void				raw_digest(unsigned char *array) const;	// provide 16-byte array for binary data
  void				hex_digest(char *string) const;			// provide 33-byte array for ascii-hex string

-  friend std::ostream&   operator<< (std::ostream&, LLMD5 context);
+  friend LL_COMMON_API std::ostream&   operator<< (std::ostream&, LLMD5 context);

 private:

--- a/indra/llcommon/llmemory.cpp
+++ b/indra/llcommon/llmemory.cpp
@ -71,25 +71,6 @@ void LLMemory::freeReserve()
 	reserveMem = NULL;
 }

-void* ll_allocate (size_t size)
-{
-	if (size == 0)
-	{
-		llwarns << "Null allocation" << llendl;
-	}
-	void *p = malloc(size);
-	if (p == NULL)
-	{
-		LLMemory::freeReserve();
-		llerrs << "Out of memory Error" << llendl;
-	}
-	return p;
-}
-
-void ll_release (void *p)
-{
-	free(p);
-}

 //----------------------------------------------------------------------------

--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@ -1,25 +1,25 @@
-/** 
+/**
 * @file llmemory.h
 * @brief Memory allocation/deallocation header-stuff goes here.
 *
 * $LicenseInfo:firstyear=2002&license=viewerlgpl$
 * Second Life Viewer Source Code
 * Copyright (C) 2010, Linden Research, Inc.
- * 
+ *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2.1 of the License only.
- * 
+ *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
- * 
+ *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- * 
+ *
 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 * $/LicenseInfo$
 */
@ -28,12 +28,82 @@

 #include "llmemtype.h"

-extern S32 gTotalDAlloc;
-extern S32 gTotalDAUse;
-extern S32 gDACount;
+#if LL_DEBUG
+inline void* ll_aligned_malloc( size_t size, int align )
+{
+	void* mem = malloc( size + (align - 1) + sizeof(void*) );
+	char* aligned = ((char*)mem) + sizeof(void*);
+	aligned += align - ((uintptr_t)aligned & (align - 1));

-extern void* ll_allocate (size_t size);
-extern void ll_release (void *p);
+	((void**)aligned)[-1] = mem;
+	return aligned;
+}
+
+inline void ll_aligned_free( void* ptr )
+{
+	free( ((void**)ptr)[-1] );
+}
+
+inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
+{
+#if defined(LL_WINDOWS)
+	return _mm_malloc(size, 16);
+#elif defined(LL_DARWIN)
+	return malloc(size); // default osx malloc is 16 byte aligned.
+#else
+	void *rtn;
+	if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size)))
+		return rtn;
+	else // bad alignment requested, or out of memory
+		return NULL;
+#endif
+}
+
+inline void ll_aligned_free_16(void *p)
+{
+#if defined(LL_WINDOWS)
+	_mm_free(p);
+#elif defined(LL_DARWIN)
+	return free(p);
+#else
+	free(p); // posix_memalign() is compatible with heap deallocator
+#endif
+}
+
+inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
+{
+#if defined(LL_WINDOWS)
+	return _mm_malloc(size, 32);
+#elif defined(LL_DARWIN)
+	return ll_aligned_malloc( size, 32 );
+#else
+	void *rtn;
+	if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size)))
+		return rtn;
+	else // bad alignment requested, or out of memory
+		return NULL;
+#endif
+}
+
+inline void ll_aligned_free_32(void *p)
+{
+#if defined(LL_WINDOWS)
+	_mm_free(p);
+#elif defined(LL_DARWIN)
+	ll_aligned_free( p );
+#else
+	free(p); // posix_memalign() is compatible with heap deallocator
+#endif
+}
+#else // LL_DEBUG
+// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals)
+#define ll_aligned_malloc( size, align ) malloc(size)
+#define ll_aligned_free( ptr ) free(ptr)
+#define ll_aligned_malloc_16 malloc
+#define ll_aligned_free_16 free
+#define ll_aligned_malloc_32 malloc
+#define ll_aligned_free_32 free
+#endif // LL_DEBUG

 class LL_COMMON_API LLMemory
 {
--- a/indra/llcommon/llrefcount.cpp
+++ b/indra/llcommon/llrefcount.cpp
@ -29,9 +29,25 @@

 #include "llerror.h"

+#if LL_REF_COUNT_DEBUG
+#include "llthread.h"
+#include "llapr.h"
+#endif
+
 LLRefCount::LLRefCount(const LLRefCount& other)
 :	mRef(0)
 {
+#if LL_REF_COUNT_DEBUG
+	if(gAPRPoolp)
+	{
+		mMutexp = new LLMutex(gAPRPoolp) ;
+	}
+	else
+	{
+		mMutexp = NULL ;
+	}
+	mCrashAtUnlock = FALSE ;
+#endif
 }

 LLRefCount& LLRefCount::operator=(const LLRefCount&)
@ -43,6 +59,17 @@ LLRefCount& LLRefCount::operator=(const LLRefCount&)
 LLRefCount::LLRefCount() :
 	mRef(0)
 {
+#if LL_REF_COUNT_DEBUG
+	if(gAPRPoolp)
+	{
+		mMutexp = new LLMutex(gAPRPoolp) ;
+	}
+	else
+	{
+		mMutexp = NULL ;
+	}
+	mCrashAtUnlock = FALSE ;
+#endif
 }

 LLRefCount::~LLRefCount()
@ -51,4 +78,87 @@ LLRefCount::~LLRefCount()
 	{
 		llerrs << "deleting non-zero reference" << llendl;
 	}
+
+#if LL_REF_COUNT_DEBUG
+	if(gAPRPoolp)
+	{
+		delete mMutexp ;
+	}
+#endif
 }
+
+#if LL_REF_COUNT_DEBUG
+void LLRefCount::ref() const
+{ 
+	if(mMutexp)
+	{
+		if(mMutexp->isLocked()) 
+		{
+			mCrashAtUnlock = TRUE ;
+			llerrs << "the mutex is locked by the thread: " << mLockedThreadID 
+				<< " Current thread: " << LLThread::currentID() << llendl ;
+		}
+
+		mMutexp->lock() ;
+		mLockedThreadID = LLThread::currentID() ;
+
+		mRef++; 
+
+		if(mCrashAtUnlock)
+		{
+			while(1); //crash here.
+		}
+		mMutexp->unlock() ;
+	}
+	else
+	{
+		mRef++; 
+	}
+} 
+
+S32 LLRefCount::unref() const
+{
+	if(mMutexp)
+	{
+		if(mMutexp->isLocked()) 
+		{
+			mCrashAtUnlock = TRUE ;
+			llerrs << "the mutex is locked by the thread: " << mLockedThreadID 
+				<< " Current thread: " << LLThread::currentID() << llendl ;
+		}
+
+		mMutexp->lock() ;
+		mLockedThreadID = LLThread::currentID() ;
+		
+		llassert(mRef >= 1);
+		if (0 == --mRef) 
+		{
+			if(mCrashAtUnlock)
+			{
+				while(1); //crash here.
+			}
+			mMutexp->unlock() ;
+
+			delete this; 
+			return 0;
+		}
+
+		if(mCrashAtUnlock)
+		{
+			while(1); //crash here.
+		}
+		mMutexp->unlock() ;
+		return mRef;
+	}
+	else
+	{
+		llassert(mRef >= 1);
+		if (0 == --mRef) 
+		{
+			delete this; 
+			return 0;
+		}
+		return mRef;
+	}
+}	
+#endif
--- a/indra/llcommon/llrefcount.h
+++ b/indra/llcommon/llrefcount.h
@ -28,6 +28,11 @@

 #include <boost/noncopyable.hpp>

+#define LL_REF_COUNT_DEBUG 0
+#if LL_REF_COUNT_DEBUG
+class LLMutex ;
+#endif
+
 //----------------------------------------------------------------------------
 // RefCount objects should generally only be accessed by way of LLPointer<>'s
 // see llthread.h for LLThreadSafeRefCount
@ -43,12 +48,16 @@ protected:
 public:
 	LLRefCount();

-	void ref() const
+#if LL_REF_COUNT_DEBUG
+	void ref() const ;
+	S32 unref() const ;
+#else
+	inline void ref() const
 	{ 
 		mRef++; 
 	} 

-	S32 unref() const
+	inline S32 unref() const
 	{
 		llassert(mRef >= 1);
 		if (0 == --mRef) 
@ -58,6 +67,7 @@ public:
 		}
 		return mRef;
 	}	
+#endif

 	//NOTE: when passing around a const LLRefCount object, this can return different results
 	// at different types, since mRef is mutable
@ -68,6 +78,12 @@ public:

 private: 
 	mutable S32	mRef; 
+
+#if LL_REF_COUNT_DEBUG
+	LLMutex*  mMutexp ;
+	mutable U32  mLockedThreadID ;
+	mutable BOOL mCrashAtUnlock ; 
+#endif
 };

 #endif
--- a/indra/llcommon/llsdserialize.cpp
+++ b/indra/llcommon/llsdserialize.cpp
@ -34,6 +34,12 @@
 #include <iostream>
 #include "apr_base64.h"

+#ifdef LL_STANDALONE
+# include <zlib.h>
+#else
+# include "zlib/zlib.h"  // for davep's dirty little zip functions
+#endif
+
 #if !LL_WINDOWS
 #include <netinet/in.h> // htonl & ntohl
 #endif
@ -1983,3 +1989,180 @@ std::ostream& operator<<(std::ostream& s, const LLSD& llsd)
 	return s;
 }

+
+//dirty little zippers -- yell at davep if these are horrid
+
+//return a string containing gzipped bytes of binary serialized LLSD
+// VERY inefficient -- creates several copies of LLSD block in memory
+std::string zip_llsd(LLSD& data)
+{ 
+	std::stringstream llsd_strm;
+
+	LLSDSerialize::toBinary(data, llsd_strm);
+
+	const U32 CHUNK = 65536;
+
+	z_stream strm;
+	strm.zalloc = Z_NULL;
+	strm.zfree = Z_NULL;
+	strm.opaque = Z_NULL;
+
+	S32 ret = deflateInit(&strm, Z_BEST_COMPRESSION);
+	if (ret != Z_OK)
+	{
+		llwarns << "Failed to compress LLSD block." << llendl;
+		return std::string();
+	}
+
+	std::string source = llsd_strm.str();
+
+	U8 out[CHUNK];
+
+	strm.avail_in = source.size();
+	strm.next_in = (U8*) source.data();
+	U8* output = NULL;
+
+	U32 cur_size = 0;
+
+	U32 have = 0;
+
+	do
+	{
+		strm.avail_out = CHUNK;
+		strm.next_out = out;
+
+		ret = deflate(&strm, Z_FINISH);
+		if (ret == Z_OK || ret == Z_STREAM_END)
+		{ //copy result into output
+			if (strm.avail_out >= CHUNK)
+			{
+				llerrs << "WTF?" << llendl;
+			}
+
+			have = CHUNK-strm.avail_out;
+			output = (U8*) realloc(output, cur_size+have);
+			memcpy(output+cur_size, out, have);
+			cur_size += have;
+		}
+		else 
+		{
+			free(output);
+			llwarns << "Failed to compress LLSD block." << llendl;
+			return std::string();
+		}
+	}
+	while (ret == Z_OK);
+
+	std::string::size_type size = cur_size;
+
+	std::string result((char*) output, size);
+	deflateEnd(&strm);
+	free(output);
+
+#if 0 //verify results work with unzip_llsd
+	std::istringstream test(result);
+	LLSD test_sd;
+	if (!unzip_llsd(test_sd, test, result.size()))
+	{
+		llerrs << "Invalid compression result!" << llendl;
+	}
+#endif
+
+	return result;
+}
+
+//decompress a block of LLSD from provided istream
+// not very efficient -- creats a copy of decompressed LLSD block in memory
+// and deserializes from that copy using LLSDSerialize
+bool unzip_llsd(LLSD& data, std::istream& is, S32 size)
+{
+	U8* result = NULL;
+	U32 cur_size = 0;
+	z_stream strm;
+		
+	const U32 CHUNK = 65536;
+
+	U8 *in = new U8[size];
+	is.read((char*) in, size); 
+
+	U8 out[CHUNK];
+		
+	strm.zalloc = Z_NULL;
+	strm.zfree = Z_NULL;
+	strm.opaque = Z_NULL;
+	strm.avail_in = size;
+	strm.next_in = in;
+
+	S32 ret = inflateInit(&strm);
+
+	do
+	{
+		strm.avail_out = CHUNK;
+		strm.next_out = out;
+		ret = inflate(&strm, Z_NO_FLUSH);
+		if (ret == Z_STREAM_ERROR)
+		{
+			inflateEnd(&strm);
+			free(result);
+			delete [] in;
+			return false;
+		}
+		
+		switch (ret)
+		{
+		case Z_NEED_DICT:
+			ret = Z_DATA_ERROR;
+		case Z_DATA_ERROR:
+		case Z_MEM_ERROR:
+			inflateEnd(&strm);
+			free(result);
+			delete [] in;
+			return false;
+			break;
+		}
+
+		U32 have = CHUNK-strm.avail_out;
+
+		result = (U8*) realloc(result, cur_size + have);
+		memcpy(result+cur_size, out, have);
+		cur_size += have;
+
+	} while (ret == Z_OK);
+
+	inflateEnd(&strm);
+	delete [] in;
+
+	if (ret != Z_STREAM_END)
+	{
+		free(result);
+		return false;
+	}
+
+	//result now points to the decompressed LLSD block
+	{
+		std::string res_str((char*) result, cur_size);
+
+		std::string deprecated_header("<? LLSD/Binary ?>");
+
+		if (res_str.substr(0, deprecated_header.size()) == deprecated_header)
+		{
+			res_str = res_str.substr(deprecated_header.size()+1, cur_size);
+		}
+		cur_size = res_str.size();
+
+		std::istringstream istr(res_str);
+		
+		if (!LLSDSerialize::fromBinary(data, istr, cur_size))
+		{
+			llwarns << "Failed to unzip LLSD block" << llendl;
+			free(result);
+			return false;
+		}
+	}
+
+	free(result);
+	return true;
+}
+
+
+
--- a/indra/llcommon/llsdserialize.h
+++ b/indra/llcommon/llsdserialize.h
@ -790,4 +790,8 @@ public:
 	}
 };

+//dirty little zip functions -- yell at davep
+LL_COMMON_API std::string zip_llsd(LLSD& data);
+LL_COMMON_API bool unzip_llsd(LLSD& data, std::istream& is, S32 size);
+
 #endif // LL_LLSDSERIALIZE_H
--- a/indra/llcommon/llthread.cpp
+++ b/indra/llcommon/llthread.cpp
@ -56,6 +56,21 @@
 // 
 //----------------------------------------------------------------------------

+#if !LL_DARWIN
+U32 ll_thread_local sThreadID = 0;
+#endif 
+
+U32 LLThread::sIDIter = 0;
+
+LL_COMMON_API void assert_main_thread()
+{
+	static U32 s_thread_id = LLThread::currentID();
+	if (LLThread::currentID() != s_thread_id)
+	{
+		llerrs << "Illegal execution outside main thread." << llendl;
+	}
+}
+
 //
 // Handed to the APR thread creation function
 //
@ -63,10 +78,14 @@ void *APR_THREAD_FUNC LLThread::staticRun(apr_thread_t *apr_threadp, void *datap
 {
 	LLThread *threadp = (LLThread *)datap;

+#if !LL_DARWIN
+	sThreadID = threadp->mID;
+#endif
+
 	// Run the user supplied function
 	threadp->run();

-	llinfos << "LLThread::staticRun() Exiting: " << threadp->mName << llendl;
+	//llinfos << "LLThread::staticRun() Exiting: " << threadp->mName << llendl;
 	
 	// We're done with the run function, this thread is done executing now.
 	threadp->mStatus = STOPPED;
@ -81,6 +100,8 @@ LLThread::LLThread(const std::string& name, apr_pool_t *poolp) :
 	mAPRThreadp(NULL),
 	mStatus(STOPPED)
 {
+	mID = ++sIDIter;
+
 	// Thread creation probably CAN be paranoid about APR being initialized, if necessary
 	if (poolp)
 	{
@ -121,7 +142,7 @@ void LLThread::shutdown()
 			// First, set the flag that indicates that we're ready to die
 			setQuitting();

-			llinfos << "LLThread::~LLThread() Killing thread " << mName << " Status: " << mStatus << llendl;
+			//llinfos << "LLThread::~LLThread() Killing thread " << mName << " Status: " << mStatus << llendl;
 			// Now wait a bit for the thread to exit
 			// It's unclear whether I should even bother doing this - this destructor
 			// should netver get called unless we're already stopped, really...
@ -143,7 +164,7 @@ void LLThread::shutdown()
 		if (!isStopped())
 		{
 			// This thread just wouldn't stop, even though we gave it time
-			llwarns << "LLThread::~LLThread() exiting thread before clean exit!" << llendl;
+			//llwarns << "LLThread::~LLThread() exiting thread before clean exit!" << llendl;
 			// Put a stake in its heart.
 			apr_thread_exit(mAPRThreadp, -1);
 			return;
@ -283,7 +304,7 @@ void LLThread::wakeLocked()
 //============================================================================

 LLMutex::LLMutex(apr_pool_t *poolp) :
-	mAPRMutexp(NULL)
+	mAPRMutexp(NULL), mCount(0), mLockingThread(NO_THREAD)
 {
 	//if (poolp)
 	//{
@ -315,7 +336,18 @@ LLMutex::~LLMutex()

 void LLMutex::lock()
 {
+#if LL_DARWIN
+	if (mLockingThread == LLThread::currentID())
+#else
+	if (mLockingThread == sThreadID)
+#endif
+	{ //redundant lock
+		mCount++;
+		return;
+	}
+	
 	apr_thread_mutex_lock(mAPRMutexp);
+	
 #if MUTEX_DEBUG
 	// Have to have the lock before we can access the debug info
 	U32 id = LLThread::currentID();
@ -323,10 +355,22 @@ void LLMutex::lock()
 		llerrs << "Already locked in Thread: " << id << llendl;
 	mIsLocked[id] = TRUE;
 #endif
+
+#if LL_DARWIN
+	mLockingThread = LLThread::currentID();
+#else
+	mLockingThread = sThreadID;
+#endif
 }

 void LLMutex::unlock()
 {
+	if (mCount > 0)
+	{ //not the root unlock
+		mCount--;
+		return;
+	}
+	
 #if MUTEX_DEBUG
 	// Access the debug info while we have the lock
 	U32 id = LLThread::currentID();
@ -334,6 +378,8 @@ void LLMutex::unlock()
 		llerrs << "Not locked in Thread: " << id << llendl;	
 	mIsLocked[id] = FALSE;
 #endif
+
+	mLockingThread = NO_THREAD;
 	apr_thread_mutex_unlock(mAPRMutexp);
 }

@ -351,6 +397,11 @@ bool LLMutex::isLocked()
 	}
 }

+U32 LLMutex::lockingThread() const
+{
+	return mLockingThread;
+}
+
 //============================================================================

 LLCondition::LLCondition(apr_pool_t *poolp) :
@ -371,6 +422,15 @@ LLCondition::~LLCondition()

 void LLCondition::wait()
 {
+	if (!isLocked())
+	{ //mAPRMutexp MUST be locked before calling apr_thread_cond_wait
+		apr_thread_mutex_lock(mAPRMutexp);
+#if MUTEX_DEBUG
+		// avoid asserts on destruction in non-release builds
+		U32 id = LLThread::currentID();
+		mIsLocked[id] = TRUE;
+#endif
+	}
 	apr_thread_cond_wait(mAPRCondp, mAPRMutexp);
 }

--- a/indra/llcommon/llthread.h
+++ b/indra/llcommon/llthread.h
@ -35,8 +35,17 @@ class LLThread;
 class LLMutex;
 class LLCondition;

+#if LL_WINDOWS
+#define ll_thread_local __declspec(thread)
+#else
+#define ll_thread_local __thread
+#endif
+
 class LL_COMMON_API LLThread
 {
+private:
+	static U32 sIDIter;
+
 public:
 	typedef enum e_thread_status
 	{
@ -77,6 +86,8 @@ public:
 	apr_pool_t *getAPRPool() { return mAPRPoolp; }
 	LLVolatileAPRPool* getLocalAPRFilePool() { return mLocalAPRFilePoolp ; }

+	U32 getID() const { return mID; }
+
 private:
 	BOOL				mPaused;
 	
@ -91,6 +102,7 @@ protected:
 	apr_pool_t			*mAPRPoolp;
 	BOOL				mIsLocalPool;
 	EThreadStatus		mStatus;
+	U32					mID;

 	//a local apr_pool for APRFile operations in this thread. If it exists, LLAPRFile::sAPRFilePoolp should not be used.
 	//Note: this pool is used by APRFile ONLY, do NOT use it for any other purposes.
@ -128,17 +140,27 @@ protected:
 class LL_COMMON_API LLMutex
 {
 public:
+	typedef enum
+	{
+		NO_THREAD = 0xFFFFFFFF
+	} e_locking_thread;
+
 	LLMutex(apr_pool_t *apr_poolp); // NULL pool constructs a new pool for the mutex
 	virtual ~LLMutex();
 	
 	void lock();		// blocks
 	void unlock();
 	bool isLocked(); 	// non-blocking, but does do a lock/unlock so not free
+	U32 lockingThread() const; //get ID of locking thread
 	
 protected:
 	apr_thread_mutex_t *mAPRMutexp;
+	mutable U32			mCount;
+	mutable U32			mLockingThread;
+	
 	apr_pool_t			*mAPRPoolp;
 	BOOL				mIsLocalPool;
+	
 #if MUTEX_DEBUG
 	std::map<U32, BOOL> mIsLocked;
 #endif
--- a/indra/llcommon/stdenums.h
+++ b/indra/llcommon/stdenums.h
@ -49,7 +49,8 @@ enum EDragAndDropType
 	DAD_ANIMATION		= 12,
 	DAD_GESTURE			= 13,
 	DAD_LINK			= 14,
-	DAD_COUNT			= 15,   // number of types in this enum
+	DAD_MESH           		= 15,
+	DAD_COUNT			= 16,   // number of types in this enum
 };

 // Reasons for drags to be denied.
--- a/indra/llinventory/CMakeLists.txt
+++ b/indra/llinventory/CMakeLists.txt
@ -59,16 +59,17 @@ list(APPEND llinventory_SOURCE_FILES ${llinventory_HEADER_FILES})
 add_library (llinventory ${llinventory_SOURCE_FILES})


-if(LL_TESTS)
-  #add unit tests
-  INCLUDE(LLAddBuildTest)
-  SET(llinventory_TEST_SOURCE_FILES
-      # no real unit tests yet!
-      )
-  LL_ADD_PROJECT_UNIT_TESTS(llinventory "${llinventory_TEST_SOURCE_FILES}")

-  #set(TEST_DEBUG on)
-  set(test_libs llinventory ${LLMESSAGE_LIBRARIES} ${LLVFS_LIBRARIES} ${LLMATH_LIBRARIES} ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES})
-  LL_ADD_INTEGRATION_TEST(inventorymisc "" "${test_libs}")
-  LL_ADD_INTEGRATION_TEST(llparcel "" "${test_libs}")
-endif(LL_TESTS)
+#add unit tests
+if (LL_TESTS)
+	INCLUDE(LLAddBuildTest)
+	SET(llinventory_TEST_SOURCE_FILES
+	  # no real unit tests yet!
+	  )
+	LL_ADD_PROJECT_UNIT_TESTS(llinventory "${llinventory_TEST_SOURCE_FILES}")
+
+	#set(TEST_DEBUG on)
+	set(test_libs llinventory ${LLMESSAGE_LIBRARIES} ${LLVFS_LIBRARIES} ${LLMATH_LIBRARIES} ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES})
+	LL_ADD_INTEGRATION_TEST(inventorymisc "" "${test_libs}")
+	LL_ADD_INTEGRATION_TEST(llparcel "" "${test_libs}")
+endif (LL_TESTS)
--- a/indra/llinventory/llinventorytype.cpp
+++ b/indra/llinventory/llinventorytype.cpp
@ -83,6 +83,7 @@ LLInventoryDictionary::LLInventoryDictionary()
 	addEntry(LLInventoryType::IT_WEARABLE,            new InventoryEntry("wearable",  "wearable",      2, LLAssetType::AT_CLOTHING, LLAssetType::AT_BODYPART));
 	addEntry(LLInventoryType::IT_ANIMATION,           new InventoryEntry("animation", "animation",     1, LLAssetType::AT_ANIMATION));  
 	addEntry(LLInventoryType::IT_GESTURE,             new InventoryEntry("gesture",   "gesture",       1, LLAssetType::AT_GESTURE)); 
+	addEntry(LLInventoryType::IT_MESH,                new InventoryEntry("mesh",      "mesh",          1, LLAssetType::AT_MESH));
 }


@ -91,32 +92,58 @@ LLInventoryDictionary::LLInventoryDictionary()
 static const LLInventoryType::EType
 DEFAULT_ASSET_FOR_INV_TYPE[LLAssetType::AT_COUNT] =
 {
-	LLInventoryType::IT_TEXTURE,		// AT_TEXTURE
-	LLInventoryType::IT_SOUND,			// AT_SOUND
-	LLInventoryType::IT_CALLINGCARD,	// AT_CALLINGCARD
-	LLInventoryType::IT_LANDMARK,		// AT_LANDMARK
-	LLInventoryType::IT_LSL,			// AT_SCRIPT
-	LLInventoryType::IT_WEARABLE,		// AT_CLOTHING
-	LLInventoryType::IT_OBJECT,			// AT_OBJECT
-	LLInventoryType::IT_NOTECARD,		// AT_NOTECARD
-	LLInventoryType::IT_CATEGORY,		// AT_CATEGORY
-	LLInventoryType::IT_NONE,			// (null entry)
-	LLInventoryType::IT_LSL,			// AT_LSL_TEXT
-	LLInventoryType::IT_LSL,			// AT_LSL_BYTECODE
-	LLInventoryType::IT_TEXTURE,		// AT_TEXTURE_TGA
-	LLInventoryType::IT_WEARABLE,		// AT_BODYPART
-	LLInventoryType::IT_CATEGORY,		// AT_TRASH
-	LLInventoryType::IT_CATEGORY,		// AT_SNAPSHOT_CATEGORY
-	LLInventoryType::IT_CATEGORY,		// AT_LOST_AND_FOUND
-	LLInventoryType::IT_SOUND,			// AT_SOUND_WAV
-	LLInventoryType::IT_NONE,			// AT_IMAGE_TGA
-	LLInventoryType::IT_NONE,			// AT_IMAGE_JPEG
-	LLInventoryType::IT_ANIMATION,		// AT_ANIMATION
-	LLInventoryType::IT_GESTURE,		// AT_GESTURE
-	LLInventoryType::IT_NONE,			// AT_SIMSTATE
+	LLInventoryType::IT_TEXTURE,		// 0	AT_TEXTURE
+	LLInventoryType::IT_SOUND,			// 1	AT_SOUND
+	LLInventoryType::IT_CALLINGCARD,	// 2	AT_CALLINGCARD
+	LLInventoryType::IT_LANDMARK,		// 3	AT_LANDMARK
+	LLInventoryType::IT_LSL,			// 4	AT_SCRIPT
+	LLInventoryType::IT_WEARABLE,		// 5	AT_CLOTHING
+	LLInventoryType::IT_OBJECT,			// 6	AT_OBJECT
+	LLInventoryType::IT_NOTECARD,		// 7	AT_NOTECARD
+	LLInventoryType::IT_CATEGORY,		// 8	AT_CATEGORY
+	LLInventoryType::IT_NONE,			// 9	(null entry)
+	LLInventoryType::IT_LSL,			// 10	AT_LSL_TEXT
+	LLInventoryType::IT_LSL,			// 11	AT_LSL_BYTECODE
+	LLInventoryType::IT_TEXTURE,		// 12	AT_TEXTURE_TGA
+	LLInventoryType::IT_WEARABLE,		// 13	AT_BODYPART
+	LLInventoryType::IT_CATEGORY,		// 14	AT_TRASH
+	LLInventoryType::IT_CATEGORY,		// 15	AT_SNAPSHOT_CATEGORY
+	LLInventoryType::IT_CATEGORY,		// 16	AT_LOST_AND_FOUND
+	LLInventoryType::IT_SOUND,			// 17	AT_SOUND_WAV
+	LLInventoryType::IT_NONE,			// 18	AT_IMAGE_TGA
+	LLInventoryType::IT_NONE,			// 19	AT_IMAGE_JPEG
+	LLInventoryType::IT_ANIMATION,		// 20	AT_ANIMATION
+	LLInventoryType::IT_GESTURE,		// 21	AT_GESTURE
+	LLInventoryType::IT_NONE,			// 22	AT_SIMSTATE

-	LLInventoryType::IT_NONE,			// AT_LINK
-	LLInventoryType::IT_NONE,			// AT_LINK_FOLDER
+	LLInventoryType::IT_NONE,			// 23	AT_LINK
+	LLInventoryType::IT_NONE,			// 24	AT_LINK_FOLDER
+
+	LLInventoryType::IT_NONE,			// 25	AT_NONE
+	LLInventoryType::IT_NONE,			// 26	AT_NONE
+	LLInventoryType::IT_NONE,			// 27	AT_NONE
+	LLInventoryType::IT_NONE,			// 28	AT_NONE
+	LLInventoryType::IT_NONE,			// 29	AT_NONE
+	LLInventoryType::IT_NONE,			// 30	AT_NONE
+	LLInventoryType::IT_NONE,			// 31	AT_NONE
+	LLInventoryType::IT_NONE,			// 32	AT_NONE
+	LLInventoryType::IT_NONE,			// 33	AT_NONE
+	LLInventoryType::IT_NONE,			// 34	AT_NONE
+	LLInventoryType::IT_NONE,			// 35	AT_NONE
+	LLInventoryType::IT_NONE,			// 36	AT_NONE
+	LLInventoryType::IT_NONE,			// 37	AT_NONE
+	LLInventoryType::IT_NONE,			// 38	AT_NONE
+	LLInventoryType::IT_NONE,			// 39	AT_NONE
+	LLInventoryType::IT_NONE,			// 40	AT_NONE
+	LLInventoryType::IT_NONE,			// 41	AT_NONE
+	LLInventoryType::IT_NONE,			// 42	AT_NONE
+	LLInventoryType::IT_NONE,			// 43	AT_NONE
+	LLInventoryType::IT_NONE,			// 44	AT_NONE
+	LLInventoryType::IT_NONE,			// 45	AT_NONE
+	LLInventoryType::IT_NONE,			// 46	AT_NONE
+	LLInventoryType::IT_NONE,			// 47	AT_NONE
+	LLInventoryType::IT_NONE,			// 48	AT_NONE
+	LLInventoryType::IT_MESH            // 49	AT_MESH
 };

 // static
--- a/indra/llinventory/llinventorytype.h
+++ b/indra/llinventory/llinventorytype.h
@ -61,7 +61,8 @@ public:
 		IT_WEARABLE = 18,
 		IT_ANIMATION = 19,
 		IT_GESTURE = 20,
-		IT_COUNT = 21,
+		IT_MESH = 22,
+		IT_COUNT = 23,

 		IT_NONE = -1
 	};
--- a/indra/llmath/CMakeLists.txt
+++ b/indra/llmath/CMakeLists.txt
@ -15,13 +15,16 @@ set(llmath_SOURCE_FILES
    llcamera.cpp
    llcoordframe.cpp
    llline.cpp
+    llmatrix3a.cpp
    llmodularmath.cpp
    llperlin.cpp
    llquaternion.cpp
    llrect.cpp
    llsphere.cpp
+    llvector4a.cpp
    llvolume.cpp
    llvolumemgr.cpp
+    llvolumeoctree.cpp
    llsdutil_math.cpp
    m3math.cpp
    m4math.cpp
@ -49,21 +52,32 @@ set(llmath_HEADER_FILES
    llinterp.h
    llline.h
    llmath.h
+    llmatrix3a.h
+    llmatrix3a.inl
    llmodularmath.h
    lloctree.h
    llperlin.h
    llplane.h
    llquantize.h
    llquaternion.h
+    llquaternion2.h
+    llquaternion2.inl
    llrect.h
+    llsimdmath.h
+    llsimdtypes.h
+    llsimdtypes.inl
    llsphere.h
    lltreenode.h
+    llvector4a.h
+    llvector4a.inl
+    llvector4logical.h
    llv4math.h
    llv4matrix3.h
    llv4matrix4.h
    llv4vector3.h
    llvolume.h
    llvolumemgr.h
+    llvolumeoctree.h
    llsdutil_math.h
    m3math.h
    m4math.h
--- a/indra/llmath/llcamera.cpp
+++ b/indra/llmath/llcamera.cpp
@ -45,7 +45,6 @@ LLCamera::LLCamera() :
 	calculateFrustumPlanes();
 } 

-
 LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_pixels, F32 near_plane, F32 far_plane) :
 	LLCoordFrame(),
 	mViewHeightInPixels(view_height_in_pixels),
@ -61,6 +60,10 @@ LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_p
 	setView(vertical_fov_rads);
 } 

+LLCamera::~LLCamera()
+{
+
+}

 // ---------------- LLCamera::getFoo() member functions ----------------

@ -82,11 +85,11 @@ F32 LLCamera::getMaxView() const

 // ---------------- LLCamera::setFoo() member functions ----------------

-void LLCamera::setUserClipPlane(LLPlane plane)
+void LLCamera::setUserClipPlane(LLPlane& plane)
 {
 	mPlaneCount = 7;
-	mAgentPlanes[6].p = plane;
-	mAgentPlanes[6].mask = calcPlaneMask(plane);
+	mAgentPlanes[6] = plane;
+	mPlaneMask[6] = plane.calcPlaneMask();
 }

 void LLCamera::disableUserClipPlane()
@ -158,166 +161,91 @@ size_t LLCamera::readFrustumFromBuffer(const char *buffer)

 // ---------------- test methods  ---------------- 

-S32 LLCamera::AABBInFrustum(const LLVector3 &center, const LLVector3& radius) 
+S32 LLCamera::AABBInFrustum(const LLVector4a &center, const LLVector4a& radius) 
 {
-	static const LLVector3 scaler[] = {
-		LLVector3(-1,-1,-1),
-		LLVector3( 1,-1,-1),
-		LLVector3(-1, 1,-1),
-		LLVector3( 1, 1,-1),
-		LLVector3(-1,-1, 1),
-		LLVector3( 1,-1, 1),
-		LLVector3(-1, 1, 1),
-		LLVector3( 1, 1, 1)
+	static const LLVector4a scaler[] = {
+		LLVector4a(-1,-1,-1),
+		LLVector4a( 1,-1,-1),
+		LLVector4a(-1, 1,-1),
+		LLVector4a( 1, 1,-1),
+		LLVector4a(-1,-1, 1),
+		LLVector4a( 1,-1, 1),
+		LLVector4a(-1, 1, 1),
+		LLVector4a( 1, 1, 1)
 	};

 	U8 mask = 0;
-	S32 result = 2;
-
-	/*if (mFrustumCornerDist > 0.f && radius.magVecSquared() > mFrustumCornerDist * mFrustumCornerDist)
-	{ //box is larger than frustum, check frustum quads against box planes
-
-		static const LLVector3 dir[] = 
-		{
-			LLVector3(1, 0, 0),
-			LLVector3(-1, 0, 0),
-			LLVector3(0, 1, 0),
-			LLVector3(0, -1, 0),
-			LLVector3(0, 0, 1),
-			LLVector3(0, 0, -1)
-		};
-
-		U32 quads[] = 
-		{
-			0, 1, 2, 3,
-			0, 1, 5, 4,
-			2, 3, 7, 6,
-			3, 0, 7, 4,
-			1, 2, 6, 4,
-			4, 5, 6, 7
-		};
-
-		result = 0;
-
-		BOOL total_inside = TRUE;
-		for (U32 i = 0; i < 6; i++)
-		{ 
-			LLVector3 p = center + radius.scaledVec(dir[i]);
-			F32 d = -p*dir[i];
-
-			for (U32 j = 0; j <	6; j++)
-			{ //for each quad
-				F32 dist = mAgentFrustum[quads[j*4+0]]*dir[i] + d;
-				if (dist > 0)
-				{ //at least one frustum point is outside the AABB
-					total_inside = FALSE;
-					for (U32 k = 1; k < 4; k++)
-					{ //for each other point on quad
-						if ( mAgentFrustum[quads[j*4+k]]*dir[i]+d  <= 0.f)
-						{ //quad is straddling some plane of AABB
-							return 1;
-						}
-					}
-				}
-				else
-				{
-					for (U32 k = 1; k < 4; k++)
-					{
-						if (mAgentFrustum[quads[j*4+k]]*dir[i]+d > 0.f)
-						{
-							return 1;
-						}
-					}
-				}
-			}
-		}
-
-		if (total_inside)
-		{
-			result = 1;
-		}
-	}
-	else*/
+	bool result = false;
+	LLVector4a rscale, maxp, minp;
+	LLSimdScalar d;
+	for (U32 i = 0; i < mPlaneCount; i++)
 	{
-		for (U32 i = 0; i < mPlaneCount; i++)
+		mask = mPlaneMask[i];
+		if (mask != 0xff)
 		{
-			mask = mAgentPlanes[i].mask;
-			if (mask == 0xff)
-			{
-				continue;
-			}
-			LLPlane p = mAgentPlanes[i].p;
-			LLVector3 n = LLVector3(p);
-			float d = p.mV[3];
-			LLVector3 rscale = radius.scaledVec(scaler[mask]);
-
-			LLVector3 minp = center - rscale;
-			LLVector3 maxp = center + rscale;
-
-			if (n * minp > -d) 
+			const LLPlane& p(mAgentPlanes[i]);
+			p.getAt<3>(d);
+			rscale.setMul(radius, scaler[mask]);
+			minp.setSub(center, rscale);
+			d = -d;
+			if (p.dot3(minp).getF32() > d) 
 			{
 				return 0;
 			}
-		
-			if (n * maxp > -d)
+			
+			if(!result)
 			{
-				result = 1;
+				maxp.setAdd(center, rscale);
+				result = (p.dot3(maxp).getF32() > d);
 			}
 		}
 	}

-	
-	return result;
+	return result?1:2;
 }

-S32 LLCamera::AABBInFrustumNoFarClip(const LLVector3 &center, const LLVector3& radius) 
+
+S32 LLCamera::AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius) 
 {
-	static const LLVector3 scaler[] = {
-		LLVector3(-1,-1,-1),
-		LLVector3( 1,-1,-1),
-		LLVector3(-1, 1,-1),
-		LLVector3( 1, 1,-1),
-		LLVector3(-1,-1, 1),
-		LLVector3( 1,-1, 1),
-		LLVector3(-1, 1, 1),
-		LLVector3( 1, 1, 1)
+	static const LLVector4a scaler[] = {
+		LLVector4a(-1,-1,-1),
+		LLVector4a( 1,-1,-1),
+		LLVector4a(-1, 1,-1),
+		LLVector4a( 1, 1,-1),
+		LLVector4a(-1,-1, 1),
+		LLVector4a( 1,-1, 1),
+		LLVector4a(-1, 1, 1),
+		LLVector4a( 1, 1, 1)
 	};

 	U8 mask = 0;
-	S32 result = 2;
-
+	bool result = false;
+	LLVector4a rscale, maxp, minp;
+	LLSimdScalar d;
 	for (U32 i = 0; i < mPlaneCount; i++)
 	{
-		if (i == 5)
+		mask = mPlaneMask[i];
+		if ((i != 5) && (mask != 0xff))
 		{
-			continue;
-		}
-
-		mask = mAgentPlanes[i].mask;
-		if (mask == 0xff)
-		{
-			continue;
-		}
-		LLPlane p = mAgentPlanes[i].p;
-		LLVector3 n = LLVector3(p);
-		float d = p.mV[3];
-		LLVector3 rscale = radius.scaledVec(scaler[mask]);
-
-		LLVector3 minp = center - rscale;
-		LLVector3 maxp = center + rscale;
-
-		if (n * minp > -d) 
-		{
-			return 0;
-		}
-	
-		if (n * maxp > -d)
-		{
-			result = 1;
+			const LLPlane& p(mAgentPlanes[i]);
+			p.getAt<3>(d);
+			rscale.setMul(radius, scaler[mask]);
+			minp.setSub(center, rscale);
+			d = -d;
+			if (p.dot3(minp).getF32() > d) 
+			{
+				return 0;
+			}
+			
+			if(!result)
+			{
+				maxp.setAdd(center, rscale);
+				result = (p.dot3(maxp).getF32() > d);
+			}
 		}
 	}

-	return result;
+	return result?1:2;
 }

 int LLCamera::sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 radius) 
@ -438,28 +366,22 @@ int LLCamera::sphereInFrustumOld(const LLVector3 &sphere_center, const F32 radiu
 int LLCamera::sphereInFrustum(const LLVector3 &sphere_center, const F32 radius) const 
 {
 	// Returns 1 if sphere is in frustum, 0 if not.
-	int res = 2;
+	bool res = false;
 	for (int i = 0; i < 6; i++)
 	{
-		if (mAgentPlanes[i].mask == 0xff)
+		if (mPlaneMask[i] != 0xff)
 		{
-			continue;
-		}
+			float d = mAgentPlanes[i].dist(sphere_center);

-		float d = mAgentPlanes[i].p.dist(sphere_center);
-
-		if (d > radius) 
-		{
-			return 0;
-		}
-
-		if (d > -radius)
-		{
-			res = 1;
+			if (d > radius) 
+			{
+				return 0;
+			}
+			res = res || (d > -radius);
 		}
 	}

-	return res;
+	return res?1:2;
 }


@ -611,25 +533,6 @@ LLPlane planeFromPoints(LLVector3 p1, LLVector3 p2, LLVector3 p3)
 	return LLPlane(p1, n);
 }

-U8 LLCamera::calcPlaneMask(const LLPlane& plane)
-{
-	U8 mask = 0;
-	
-	if (plane.mV[0] >= 0)
-	{
-		mask |= 1;
-	}
-	if (plane.mV[1] >= 0)
-	{
-		mask |= 2;
-	}
-	if (plane.mV[2] >= 0)
-	{
-		mask |= 4;
-	}
-
-	return mask;
-}

 void LLCamera::ignoreAgentFrustumPlane(S32 idx)
 {
@ -638,12 +541,13 @@ void LLCamera::ignoreAgentFrustumPlane(S32 idx)
 		return;
 	}

-	mAgentPlanes[idx].mask = 0xff;
-	mAgentPlanes[idx].p.clearVec();
+	mPlaneMask[idx] = 0xff;
+	mAgentPlanes[idx].clear();
 }

 void LLCamera::calcAgentFrustumPlanes(LLVector3* frust)
 {
+	
 	for (int i = 0; i < 8; i++)
 	{
 		mAgentFrustum[i] = frust[i];
@ -656,27 +560,27 @@ void LLCamera::calcAgentFrustumPlanes(LLVector3* frust)
 	//order of planes is important, keep most likely to fail in the front of the list

 	//near - frust[0], frust[1], frust[2]
-	mAgentPlanes[2].p = planeFromPoints(frust[0], frust[1], frust[2]);
+	mAgentPlanes[2] = planeFromPoints(frust[0], frust[1], frust[2]);

 	//far  
-	mAgentPlanes[5].p = planeFromPoints(frust[5], frust[4], frust[6]);
+	mAgentPlanes[5] = planeFromPoints(frust[5], frust[4], frust[6]);

 	//left  
-	mAgentPlanes[0].p = planeFromPoints(frust[4], frust[0], frust[7]);
+	mAgentPlanes[0] = planeFromPoints(frust[4], frust[0], frust[7]);

 	//right  
-	mAgentPlanes[1].p = planeFromPoints(frust[1], frust[5], frust[6]);
+	mAgentPlanes[1] = planeFromPoints(frust[1], frust[5], frust[6]);

 	//top  
-	mAgentPlanes[4].p = planeFromPoints(frust[3], frust[2], frust[6]);
+	mAgentPlanes[4] = planeFromPoints(frust[3], frust[2], frust[6]);

 	//bottom  
-	mAgentPlanes[3].p = planeFromPoints(frust[1], frust[0], frust[4]);
+	mAgentPlanes[3] = planeFromPoints(frust[1], frust[0], frust[4]);

 	//cache plane octant facing mask for use in AABBInFrustum
 	for (U32 i = 0; i < mPlaneCount; i++)
 	{
-		mAgentPlanes[i].mask = calcPlaneMask(mAgentPlanes[i].p);
+		mPlaneMask[i] = mAgentPlanes[i].calcPlaneMask();
 	}
 }

@ -730,9 +634,10 @@ void LLCamera::calculateWorldFrustumPlanes()
 	F32 d;
 	LLVector3 center = mOrigin - mXAxis*mNearPlane;
 	mWorldPlanePos = center;
+	LLVector3 pnorm;	
 	for (int p=0; p<4; p++)
 	{
-		LLVector3 pnorm = LLVector3(mLocalPlanes[p]);
+		mLocalPlanes[p].getVector3(pnorm);
 		LLVector3 norm = rotateToAbsolute(pnorm);
 		norm.normVec();
 		d = -(center * norm);
@ -742,13 +647,15 @@ void LLCamera::calculateWorldFrustumPlanes()
 	LLVector3 zaxis(0, 0, 1.0f);
 	F32 yaw = getYaw();
 	{
-		LLVector3 tnorm = LLVector3(mLocalPlanes[PLANE_LEFT]);
+		LLVector3 tnorm;
+		mLocalPlanes[PLANE_LEFT].getVector3(tnorm);
 		tnorm.rotVec(yaw, zaxis);
 		d = -(mOrigin * tnorm);
 		mHorizPlanes[HORIZ_PLANE_LEFT] = LLPlane(tnorm, d);
 	}
 	{
-		LLVector3 tnorm = LLVector3(mLocalPlanes[PLANE_RIGHT]);
+		LLVector3 tnorm;
+		mLocalPlanes[PLANE_RIGHT].getVector3(tnorm);
 		tnorm.rotVec(yaw, zaxis);
 		d = -(mOrigin * tnorm);
 		mHorizPlanes[HORIZ_PLANE_RIGHT] = LLPlane(tnorm, d);
--- a/indra/llmath/llcamera.h
+++ b/indra/llmath/llcamera.h
@ -31,6 +31,7 @@
 #include "llmath.h"
 #include "llcoordframe.h"
 #include "llplane.h"
+#include "llvector4a.h"

 const F32 DEFAULT_FIELD_OF_VIEW 	= 60.f * DEG_TO_RAD;
 const F32 DEFAULT_ASPECT_RATIO 		= 640.f / 480.f;
@ -64,6 +65,12 @@ class LLCamera
 : 	public LLCoordFrame
 {
 public:
+	
+	LLCamera(const LLCamera& rhs)
+	{
+		*this = rhs;
+	}
+	
 	enum {
 		PLANE_LEFT = 0,
 		PLANE_RIGHT = 1,
@ -101,6 +108,9 @@ public:
 	};

 private:
+	LLPlane mAgentPlanes[7];  //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP
+	U8 mPlaneMask[8];         // 8 for alignment	
+	
 	F32 mView;					// angle between top and bottom frustum planes in radians.
 	F32 mAspect;				// width/height
 	S32 mViewHeightInPixels;	// for ViewHeightInPixels() only
@ -114,30 +124,22 @@ private:
 	LLPlane mWorldPlanes[PLANE_NUM];
 	LLPlane mHorizPlanes[HORIZ_PLANE_NUM];

-	struct frustum_plane
-	{
-		frustum_plane() : mask(0) {}
-		LLPlane p;
-		U8 mask;
-	};
-	frustum_plane mAgentPlanes[7];  //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP
-
 	U32 mPlaneCount;  //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in

 	LLVector3 mWorldPlanePos;		// Position of World Planes (may be offset from camera)
 public:
 	LLVector3 mAgentFrustum[8];  //8 corners of 6-plane frustum
 	F32	mFrustumCornerDist;		//distance to corner of frustum against far clip plane
-	LLPlane getAgentPlane(U32 idx) { return mAgentPlanes[idx].p; }
+	LLPlane& getAgentPlane(U32 idx) { return mAgentPlanes[idx]; }

 public:
 	LLCamera();
 	LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_pixels, F32 near_plane, F32 far_plane);
-	virtual ~LLCamera(){} // no-op virtual destructor
+	virtual ~LLCamera();
+	

-	void setUserClipPlane(LLPlane plane);
+	void setUserClipPlane(LLPlane& plane);
 	void disableUserClipPlane();
-	U8 calcPlaneMask(const LLPlane& plane);
 	virtual void setView(F32 vertical_fov_rads);
 	void setViewHeightInPixels(S32 height);
 	void setAspect(F32 new_aspect);
@ -184,8 +186,8 @@ public:
 	S32 sphereInFrustum(const LLVector3 &center, const F32 radius) const;
 	S32 pointInFrustum(const LLVector3 &point) const { return sphereInFrustum(point, 0.0f); }
 	S32 sphereInFrustumFull(const LLVector3 &center, const F32 radius) const { return sphereInFrustum(center, radius); }
-	S32 AABBInFrustum(const LLVector3 &center, const LLVector3& radius);
-	S32 AABBInFrustumNoFarClip(const LLVector3 &center, const LLVector3& radius);
+	S32 AABBInFrustum(const LLVector4a& center, const LLVector4a& radius);
+	S32 AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius);

 	//does a quick 'n dirty sphere-sphere check
 	S32 sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 radius); 
--- a/indra/llmath/llmath.h
+++ b/indra/llmath/llmath.h
@ -29,7 +29,7 @@

 #include <cmath>
 #include <cstdlib>
-#include <complex>
+#include <vector>
 #include "lldefs.h"
 //#include "llstl.h" // *TODO: Remove when LLString is gone
 //#include "llstring.h" // *TODO: Remove when LLString is gone
@ -55,32 +55,11 @@
 #endif

 // Single Precision Floating Point Routines
-#ifndef sqrtf
-#define sqrtf(x)	((F32)sqrt((F64)(x)))
-#endif
-#ifndef fsqrtf
-#define fsqrtf(x)	sqrtf(x)
-#endif
-
-#ifndef cosf
-#define cosf(x)		((F32)cos((F64)(x)))
-#endif
-#ifndef sinf
-#define sinf(x)		((F32)sin((F64)(x)))
-#endif
-#ifndef tanf
+// (There used to be more defined here, but they appeared to be redundant and 
+// were breaking some other includes. Removed by Falcon, reviewed by Andrew, 11/25/09)
+/*#ifndef tanf
 #define tanf(x)		((F32)tan((F64)(x)))
-#endif
-#ifndef acosf
-#define acosf(x)	((F32)acos((F64)(x)))
-#endif
-
-#ifndef powf
-#define powf(x,y)	((F32)pow((F64)(x),(F64)(y)))
-#endif
-#ifndef expf
-#define expf(x)		((F32)exp((F64)(x)))
-#endif
+#endif*/

 const F32	GRAVITY			= -9.8f;

@ -200,7 +179,7 @@ inline S32 llfloor( F32 f )
 		}
 		return result;
 #else
-		return (S32)floorf(f);
+		return (S32)floor(f);
 #endif
 }

@ -378,11 +357,14 @@ inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs)
 		bar *= 10.f;
 	}

-	foo = (F32)llround(foo * bar);
+	//F32 new_foo = (F32)llround(foo * bar);
+	// the llround() implementation sucks.  Don't us it.

-	// shift back
-	foo /= bar;
-	return foo;
+	F32 sign = (foo > 0.f) ? 1.f : -1.f;
+	F32 new_foo = F32( S64(foo * bar + sign * 0.5f));
+	new_foo /= bar;
+
+	return new_foo;
 }

 inline F32 lerp(F32 a, F32 b, F32 u) 
@ -516,4 +498,45 @@ inline F32 llgaussian(F32 x, F32 o)
 	return 1.f/(F_SQRT_TWO_PI*o)*powf(F_E, -(x*x)/(2*o*o));
 }

+//helper function for removing outliers
+template <class VEC_TYPE>
+inline void ll_remove_outliers(std::vector<VEC_TYPE>& data, F32 k)
+{
+	if (data.size() < 100)
+	{ //not enough samples
+		return;
+	}
+
+	VEC_TYPE Q1 = data[data.size()/4];
+	VEC_TYPE Q3 = data[data.size()-data.size()/4-1];
+
+	VEC_TYPE min = (VEC_TYPE) ((F32) Q1-k * (F32) (Q3-Q1));
+	VEC_TYPE max = (VEC_TYPE) ((F32) Q3+k * (F32) (Q3-Q1));
+
+	U32 i = 0;
+	while (i < data.size() && data[i] < min)
+	{
+		i++;
+	}
+
+	S32 j = data.size()-1;
+	while (j > 0 && data[j] > max)
+	{
+		j--;
+	}
+
+	if (j < data.size()-1)
+	{
+		data.erase(data.begin()+j, data.end());
+	}
+
+	if (i > 0)
+	{
+		data.erase(data.begin(), data.begin()+i);
+	}
+}
+
+// Include simd math header
+#include "llsimdmath.h"
+
 #endif
--- a/indra/llmath/llmatrix3a.cpp
+++ b/indra/llmath/llmatrix3a.cpp
@ -0,0 +1,134 @@
+/** 
+ * @file llvector4a.cpp
+ * @brief SIMD vector implementation
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#include "llmath.h"
+
+static LL_ALIGN_16(const F32 M_IDENT_3A[12]) = 
+												{	1.f, 0.f, 0.f, 0.f, // Column 1
+													0.f, 1.f, 0.f, 0.f, // Column 2
+													0.f, 0.f, 1.f, 0.f }; // Column 3
+
+extern const LLMatrix3a LL_M3A_IDENTITY = *reinterpret_cast<const LLMatrix3a*> (M_IDENT_3A);
+
+void LLMatrix3a::setMul( const LLMatrix3a& lhs, const LLMatrix3a& rhs )
+{
+	const LLVector4a col0 = lhs.getColumn(0);
+	const LLVector4a col1 = lhs.getColumn(1);
+	const LLVector4a col2 = lhs.getColumn(2);
+
+	for ( int i = 0; i < 3; i++ )
+	{
+		LLVector4a xxxx = _mm_load_ss( rhs.mColumns[i].getF32ptr() );
+		xxxx.splat<0>( xxxx );
+		xxxx.mul( col0 );
+
+		{
+			LLVector4a yyyy = _mm_load_ss( rhs.mColumns[i].getF32ptr() +  1 );
+			yyyy.splat<0>( yyyy );
+			yyyy.mul( col1 ); 
+			xxxx.add( yyyy );
+		}
+
+		{
+			LLVector4a zzzz = _mm_load_ss( rhs.mColumns[i].getF32ptr() +  2 );
+			zzzz.splat<0>( zzzz );
+			zzzz.mul( col2 );
+			xxxx.add( zzzz );
+		}
+
+		xxxx.store4a( mColumns[i].getF32ptr() );
+	}
+	
+}
+
+/*static */void LLMatrix3a::batchTransform( const LLMatrix3a& xform, const LLVector4a* src, int numVectors, LLVector4a* dst )
+{
+	const LLVector4a col0 = xform.getColumn(0);
+	const LLVector4a col1 = xform.getColumn(1);
+	const LLVector4a col2 = xform.getColumn(2);
+	const LLVector4a* maxAddr = src + numVectors;
+
+	if ( numVectors & 0x1 )
+	{
+		LLVector4a xxxx = _mm_load_ss( (const F32*)src );
+		LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 );
+		LLVector4a zzzz = _mm_load_ss( (const F32*)src + 2 );
+		xxxx.splat<0>( xxxx );
+		yyyy.splat<0>( yyyy );
+		zzzz.splat<0>( zzzz );
+		xxxx.mul( col0 );
+		yyyy.mul( col1 ); 
+		zzzz.mul( col2 );
+		xxxx.add( yyyy );
+		xxxx.add( zzzz );
+		xxxx.store4a( (F32*)dst );
+		src++;
+		dst++;
+	}
+
+
+	numVectors >>= 1;
+	while ( src < maxAddr )
+	{
+		_mm_prefetch( (const char*)(src + 32 ), _MM_HINT_NTA );
+		_mm_prefetch( (const char*)(dst + 32), _MM_HINT_NTA );
+		LLVector4a xxxx = _mm_load_ss( (const F32*)src );
+		LLVector4a xxxx1= _mm_load_ss( (const F32*)(src + 1) );
+
+		xxxx.splat<0>( xxxx );
+		xxxx1.splat<0>( xxxx1 );
+		xxxx.mul( col0 );
+		xxxx1.mul( col0 );
+
+		{
+			LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 );
+			LLVector4a yyyy1 = _mm_load_ss( (const F32*)(src + 1) + 1);
+			yyyy.splat<0>( yyyy );
+			yyyy1.splat<0>( yyyy1 );
+			yyyy.mul( col1 );
+			yyyy1.mul( col1 );
+			xxxx.add( yyyy );
+			xxxx1.add( yyyy1 );
+		}
+
+		{
+			LLVector4a zzzz = _mm_load_ss( (const F32*)(src) + 2 );
+			LLVector4a zzzz1 = _mm_load_ss( (const F32*)(++src) + 2 );
+			zzzz.splat<0>( zzzz );
+			zzzz1.splat<0>( zzzz1 );
+			zzzz.mul( col2 );
+			zzzz1.mul( col2 );
+			xxxx.add( zzzz );
+			xxxx1.add( zzzz1 );
+		}
+
+		xxxx.store4a(dst->getF32ptr());
+		src++;
+		dst++;
+
+		xxxx1.store4a((F32*)dst++);
+	}
+}
--- a/indra/llmath/llmatrix3a.h
+++ b/indra/llmath/llmatrix3a.h
@ -0,0 +1,128 @@
+/** 
+ * @file llmatrix3a.h
+ * @brief LLMatrix3a class header file - memory aligned and vectorized 3x3 matrix
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef	LL_LLMATRIX3A_H
+#define	LL_LLMATRIX3A_H
+
+/////////////////////////////
+// LLMatrix3a, LLRotation
+/////////////////////////////
+// This class stores a 3x3 (technically 4x3) matrix in column-major order
+/////////////////////////////
+/////////////////////////////
+// These classes are intentionally minimal right now. If you need additional
+// functionality, please contact someone with SSE experience (e.g., Falcon or
+// Huseby).
+/////////////////////////////
+
+// LLMatrix3a is the base class for LLRotation, which should be used instead any time you're dealing with a 
+// rotation matrix.
+class LLMatrix3a
+{
+public:
+
+	// Utility function for quickly transforming an array of LLVector4a's
+	// For transforming a single LLVector4a, see LLVector4a::setRotated
+	static void batchTransform( const LLMatrix3a& xform, const LLVector4a* src, int numVectors, LLVector4a* dst );
+
+	// Utility function to obtain the identity matrix
+	static inline const LLMatrix3a& getIdentity();
+
+	//////////////////////////
+	// Ctors
+	//////////////////////////
+	
+	// Ctor
+	LLMatrix3a() {}
+
+	// Ctor for setting by columns
+	inline LLMatrix3a( const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2 );
+
+	//////////////////////////
+	// Get/Set
+	//////////////////////////
+
+	// Loads from an LLMatrix3
+	inline void loadu(const LLMatrix3& src);
+	
+	// Set rows
+	inline void setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2);
+	
+	// Set columns
+	inline void setColumns(const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2);
+
+	// Get the read-only access to a specified column. Valid columns are 0-2, but the 
+	// function is unchecked. You've been warned.
+	inline const LLVector4a& getColumn(const U32 column) const;
+
+	/////////////////////////
+	// Matrix modification
+	/////////////////////////
+	
+	// Set this matrix to the product of lhs and rhs ( this = lhs * rhs )
+	void setMul( const LLMatrix3a& lhs, const LLMatrix3a& rhs );
+
+	// Set this matrix to the transpose of src
+	inline void setTranspose(const LLMatrix3a& src);
+
+	// Set this matrix to a*w + b*(1-w)
+	inline void setLerp(const LLMatrix3a& a, const LLMatrix3a& b, F32 w);
+
+	/////////////////////////
+	// Matrix inspection
+	/////////////////////////
+
+	// Sets all 4 elements in 'dest' to the determinant of this matrix.
+	// If you will be using the determinant in subsequent ops with LLVector4a, use this version
+	inline void getDeterminant( LLVector4a& dest ) const;
+
+	// Returns the determinant as an LLSimdScalar. Use this if you will be using the determinant
+	// primary for scalar operations.
+	inline LLSimdScalar getDeterminant() const;
+
+	// Returns nonzero if rows 0-2 and colums 0-2 contain no NaN or INF values. Row 3 is ignored
+	inline LLBool32 isFinite() const;
+
+	// Returns true if this matrix is equal to 'rhs' up to 'tolerance'
+	inline bool isApproximatelyEqual( const LLMatrix3a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const;
+
+protected:
+
+	LLVector4a mColumns[3];
+
+};
+
+class LLRotation : public LLMatrix3a
+{
+public:
+	
+	LLRotation() {}
+	
+	// Returns true if this rotation is orthonormal with det ~= 1
+	inline bool isOkRotation() const;		
+};
+
+#endif
--- a/indra/llmath/llmatrix3a.inl
+++ b/indra/llmath/llmatrix3a.inl
@ -0,0 +1,119 @@
+/** 
+ * @file llmatrix3a.inl
+ * @brief LLMatrix3a inline definitions
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#include "llmatrix3a.h"
+#include "m3math.h"
+
+inline LLMatrix3a::LLMatrix3a( const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2 )
+{
+	setColumns( c0, c1, c2 );
+}
+
+inline void LLMatrix3a::loadu(const LLMatrix3& src)
+{
+	mColumns[0].load3(src.mMatrix[0]);
+	mColumns[1].load3(src.mMatrix[1]);
+	mColumns[2].load3(src.mMatrix[2]);
+}
+
+inline void LLMatrix3a::setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2)
+{
+	mColumns[0] = r0;
+	mColumns[1] = r1;
+	mColumns[2] = r2;
+	setTranspose( *this );
+}
+
+inline void LLMatrix3a::setColumns(const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2)
+{
+	mColumns[0] = c0;
+	mColumns[1] = c1;
+	mColumns[2] = c2;
+}
+
+inline void LLMatrix3a::setTranspose(const LLMatrix3a& src)
+{
+	const LLQuad srcCol0 = src.mColumns[0];
+	const LLQuad srcCol1 = src.mColumns[1];
+	const LLQuad unpacklo = _mm_unpacklo_ps( srcCol0, srcCol1 );
+	mColumns[0] = _mm_movelh_ps( unpacklo, src.mColumns[2] );
+	mColumns[1] = _mm_shuffle_ps( _mm_movehl_ps( srcCol0, unpacklo ), src.mColumns[2], _MM_SHUFFLE(0, 1, 1, 0) );
+	mColumns[2] = _mm_shuffle_ps( _mm_unpackhi_ps( srcCol0, srcCol1 ), src.mColumns[2], _MM_SHUFFLE(0, 2, 1, 0) );
+}
+
+inline const LLVector4a& LLMatrix3a::getColumn(const U32 column) const
+{
+	llassert( column < 3 );
+	return mColumns[column];
+}
+
+inline void LLMatrix3a::setLerp(const LLMatrix3a& a, const LLMatrix3a& b, F32 w)
+{
+	mColumns[0].setLerp( a.mColumns[0], b.mColumns[0], w );
+	mColumns[1].setLerp( a.mColumns[1], b.mColumns[1], w );
+	mColumns[2].setLerp( a.mColumns[2], b.mColumns[2], w );
+}
+
+inline LLBool32 LLMatrix3a::isFinite() const
+{
+	return mColumns[0].isFinite3() && mColumns[1].isFinite3() && mColumns[2].isFinite3();
+}
+
+inline void LLMatrix3a::getDeterminant( LLVector4a& dest ) const
+{
+	LLVector4a col1xcol2; col1xcol2.setCross3( mColumns[1], mColumns[2] );
+	dest.setAllDot3( col1xcol2, mColumns[0] );
+}
+
+inline LLSimdScalar LLMatrix3a::getDeterminant() const
+{
+	LLVector4a col1xcol2; col1xcol2.setCross3( mColumns[1], mColumns[2] );
+	return col1xcol2.dot3( mColumns[0] );
+}
+
+inline bool LLMatrix3a::isApproximatelyEqual( const LLMatrix3a& rhs, F32 tolerance /*= F_APPROXIMATELY_ZERO*/ ) const
+{
+	return rhs.getColumn(0).equals3(mColumns[0], tolerance) 
+		&& rhs.getColumn(1).equals3(mColumns[1], tolerance) 
+		&& rhs.getColumn(2).equals3(mColumns[2], tolerance); 
+}
+
+inline const LLMatrix3a& LLMatrix3a::getIdentity()
+{
+	extern const LLMatrix3a LL_M3A_IDENTITY;
+	return LL_M3A_IDENTITY;
+}
+
+inline bool LLRotation::isOkRotation() const
+{
+	LLMatrix3a transpose; transpose.setTranspose( *this );
+	LLMatrix3a product; product.setMul( *this, transpose );
+
+	LLSimdScalar detMinusOne = getDeterminant() - 1.f;
+
+	return product.isApproximatelyEqual( LLMatrix3a::getIdentity() ) && (detMinusOne.getAbs() < F_APPROXIMATELY_ZERO);
+}
+
--- a/indra/llmath/llmatrix4a.h
+++ b/indra/llmath/llmatrix4a.h
@ -0,0 +1,143 @@
+/** 
+ * @file llmatrix4a.h
+ * @brief LLMatrix4a class header file - memory aligned and vectorized 4x4 matrix
+ *
+ * $LicenseInfo:firstyear=2007&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef	LL_LLMATRIX4A_H
+#define	LL_LLMATRIX4A_H
+
+#include "llvector4a.h"
+#include "m4math.h"
+#include "m3math.h"
+
+class LLMatrix4a
+{
+public:
+	LLVector4a mMatrix[4];
+
+	inline void clear()
+	{
+		mMatrix[0].clear();
+		mMatrix[1].clear();
+		mMatrix[2].clear();
+		mMatrix[3].clear();
+	}
+
+	inline void loadu(const LLMatrix4& src)
+	{
+		mMatrix[0] = _mm_loadu_ps(src.mMatrix[0]);
+		mMatrix[1] = _mm_loadu_ps(src.mMatrix[1]);
+		mMatrix[2] = _mm_loadu_ps(src.mMatrix[2]);
+		mMatrix[3] = _mm_loadu_ps(src.mMatrix[3]);
+		
+	}
+
+	inline void loadu(const LLMatrix3& src)
+	{
+		mMatrix[0].load3(src.mMatrix[0]);
+		mMatrix[1].load3(src.mMatrix[1]);
+		mMatrix[2].load3(src.mMatrix[2]);
+		mMatrix[3].set(0,0,0,1.f);
+	}
+
+	inline void add(const LLMatrix4a& rhs)
+	{
+		mMatrix[0].add(rhs.mMatrix[0]);
+		mMatrix[1].add(rhs.mMatrix[1]);
+		mMatrix[2].add(rhs.mMatrix[2]);
+		mMatrix[3].add(rhs.mMatrix[3]);
+	}
+
+	inline void setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2)
+	{
+		mMatrix[0] = r0;
+		mMatrix[1] = r1;
+		mMatrix[2] = r2;
+	}
+
+	inline void setMul(const LLMatrix4a& m, const F32 s)
+	{
+		mMatrix[0].setMul(m.mMatrix[0], s);
+		mMatrix[1].setMul(m.mMatrix[1], s);
+		mMatrix[2].setMul(m.mMatrix[2], s);
+		mMatrix[3].setMul(m.mMatrix[3], s);
+	}
+
+	inline void setLerp(const LLMatrix4a& a, const LLMatrix4a& b, F32 w)
+	{
+		LLVector4a d0,d1,d2,d3;
+		d0.setSub(b.mMatrix[0], a.mMatrix[0]);
+		d1.setSub(b.mMatrix[1], a.mMatrix[1]);
+		d2.setSub(b.mMatrix[2], a.mMatrix[2]);
+		d3.setSub(b.mMatrix[3], a.mMatrix[3]);
+
+		// this = a + d*w
+		
+		d0.mul(w);
+		d1.mul(w);
+		d2.mul(w);
+		d3.mul(w);
+
+		mMatrix[0].setAdd(a.mMatrix[0],d0);
+		mMatrix[1].setAdd(a.mMatrix[1],d1);
+		mMatrix[2].setAdd(a.mMatrix[2],d2);
+		mMatrix[3].setAdd(a.mMatrix[3],d3);
+	}
+
+	inline void rotate(const LLVector4a& v, LLVector4a& res)
+	{
+		res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
+		res.mul(mMatrix[0]);
+		
+		LLVector4a y;
+		y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
+		y.mul(mMatrix[1]);
+
+		LLVector4a z;
+		z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
+		z.mul(mMatrix[2]);
+
+		res.add(y);
+		res.add(z);
+	}
+
+	inline void affineTransform(const LLVector4a& v, LLVector4a& res)
+	{
+		LLVector4a x,y,z;
+
+		x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
+		y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
+		z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
+		
+		x.mul(mMatrix[0]);
+		y.mul(mMatrix[1]);
+		z.mul(mMatrix[2]);
+
+		x.add(y);
+		z.add(mMatrix[3]);
+		res.setAdd(x,z);
+	}
+};
+
+#endif
--- a/indra/llmath/lloctree.h
+++ b/indra/llmath/lloctree.h
@ -29,14 +29,11 @@

 #include "lltreenode.h"
 #include "v3math.h"
+#include "llvector4a.h"
 #include <vector>
 #include <set>

-#if LL_RELEASE_WITH_DEBUG_INFO || LL_DEBUG
-#define OCT_ERRS LL_ERRS("OctreeErrors")
-#else
 #define OCT_ERRS LL_WARNS("OctreeErrors")
-#endif

 #define LL_OCTREE_PARANOIA_CHECK 0
 #if LL_DARWIN
@ -66,6 +63,13 @@ public:
 	virtual void visit(const LLOctreeNode<T>* branch) = 0;
 };

+template <class T>
+class LLOctreeTravelerDepthFirst : public LLOctreeTraveler<T>
+{
+public:
+	virtual void traverse(const LLOctreeNode<T>* node);
+};
+
 template <class T>
 class LLOctreeNode : public LLTreeNode<T>
 {
@ -81,23 +85,30 @@ public:
 	typedef LLOctreeNode<T>		oct_node;
 	typedef LLOctreeListener<T>	oct_listener;

-	static const U8 OCTANT_POSITIVE_X = 0x01;
-	static const U8 OCTANT_POSITIVE_Y = 0x02;
-	static const U8 OCTANT_POSITIVE_Z = 0x04;
-		
-	LLOctreeNode(	LLVector3d center, 
-					LLVector3d size, 
+	/*void* operator new(size_t size)
+	{
+		return ll_aligned_malloc_16(size);
+	}
+
+	void operator delete(void* ptr)
+	{
+		ll_aligned_free_16(ptr);
+	}*/
+
+	LLOctreeNode(	const LLVector4a& center, 
+					const LLVector4a& size, 
 					BaseType* parent, 
 					U8 octant = 255)
 	:	mParent((oct_node*)parent), 
-		mCenter(center), 
-		mSize(size), 
 		mOctant(octant) 
 	{ 
+		mCenter = center;
+		mSize = size;
+
 		updateMinMax();
 		if ((mOctant == 255) && mParent)
 		{
-			mOctant = ((oct_node*) mParent)->getOctant(mCenter.mdV);
+			mOctant = ((oct_node*) mParent)->getOctant(mCenter);
 		}

 		clearChildren();
@ -114,40 +125,24 @@ public:
 	}

 	inline const BaseType* getParent()	const			{ return mParent; }
-	inline void setParent(BaseType* parent)			{ mParent = (oct_node*) parent; }
-	inline const LLVector3d& getCenter() const			{ return mCenter; }
-	inline const LLVector3d& getSize() const			{ return mSize; }
-	inline void setCenter(LLVector3d center)			{ mCenter = center; }
-	inline void setSize(LLVector3d size)				{ mSize = size; }
-    inline oct_node* getNodeAt(T* data)				{ return getNodeAt(data->getPositionGroup(), data->getBinRadius()); }
-	inline U8 getOctant() const						{ return mOctant; }
-	inline void setOctant(U8 octant)					{ mOctant = octant; }
+	inline void setParent(BaseType* parent)				{ mParent = (oct_node*) parent; }
+	inline const LLVector4a& getCenter() const			{ return mCenter; }
+	inline const LLVector4a& getSize() const			{ return mSize; }
+	inline void setCenter(const LLVector4a& center)		{ mCenter = center; }
+	inline void setSize(const LLVector4a& size)			{ mSize = size; }
+    inline oct_node* getNodeAt(T* data)					{ return getNodeAt(data->getPositionGroup(), data->getBinRadius()); }
+	inline U8 getOctant() const							{ return mOctant; }
 	inline const oct_node*	getOctParent() const		{ return (const oct_node*) getParent(); }
 	inline oct_node* getOctParent() 					{ return (oct_node*) getParent(); }
 	
-	U8 getOctant(const F64 pos[]) const	//get the octant pos is in
+	U8 getOctant(const LLVector4a& pos) const			//get the octant pos is in
 	{
-		U8 ret = 0;
-
-		if (pos[0] > mCenter.mdV[0])
-		{
-			ret |= OCTANT_POSITIVE_X;
-		}
-		if (pos[1] > mCenter.mdV[1])
-		{
-			ret |= OCTANT_POSITIVE_Y;
-		}
-		if (pos[2] > mCenter.mdV[2])
-		{
-			ret |= OCTANT_POSITIVE_Z;
-		}
-
-		return ret;
+		return (U8) (pos.greaterThan(mCenter).getGatheredBits() & 0x7);
 	}
 	
-	inline bool isInside(const LLVector3d& pos, const F64& rad) const
+	inline bool isInside(const LLVector4a& pos, const F32& rad) const
 	{
-		return rad <= mSize.mdV[0]*2.0 && isInside(pos); 
+		return rad <= mSize[0]*2.f && isInside(pos); 
 	}

 	inline bool isInside(T* data) const			
@ -155,29 +150,27 @@ public:
 		return isInside(data->getPositionGroup(), data->getBinRadius());
 	}

-	bool isInside(const LLVector3d& pos) const
+	bool isInside(const LLVector4a& pos) const
 	{
-		const F64& x = pos.mdV[0];
-		const F64& y = pos.mdV[1];
-		const F64& z = pos.mdV[2];
-			
-		if (x > mMax.mdV[0] || x <= mMin.mdV[0] ||
-			y > mMax.mdV[1] || y <= mMin.mdV[1] ||
-			z > mMax.mdV[2] || z <= mMin.mdV[2])
+		S32 gt = pos.greaterThan(mMax).getGatheredBits() & 0x7;
+		if (gt)
 		{
 			return false;
 		}
-		
+
+		S32 lt = pos.lessEqual(mMin).getGatheredBits() & 0x7;
+		if (lt)
+		{
+			return false;
+		}
+				
 		return true;
 	}
 	
 	void updateMinMax()
 	{
-		for (U32 i = 0; i < 3; i++)
-		{
-			mMax.mdV[i] = mCenter.mdV[i] + mSize.mdV[i];
-			mMin.mdV[i] = mCenter.mdV[i] - mSize.mdV[i];
-		}
+		mMax.setAdd(mCenter, mSize);
+		mMin.setSub(mCenter, mSize);
 	}

 	inline oct_listener* getOctListener(U32 index) 
@ -190,34 +183,34 @@ public:
 		return contains(xform->getBinRadius());
 	}

-	bool contains(F64 radius)
+	bool contains(F32 radius)
 	{
 		if (mParent == NULL)
 		{	//root node contains nothing
 			return false;
 		}

-		F64 size = mSize.mdV[0];
-		F64 p_size = size * 2.0;
+		F32 size = mSize[0];
+		F32 p_size = size * 2.f;

-		return (radius <= 0.001 && size <= 0.001) ||
+		return (radius <= 0.001f && size <= 0.001f) ||
 				(radius <= p_size && radius > size);
 	}

-	static void pushCenter(LLVector3d &center, const LLVector3d &size, const T* data)
+	static void pushCenter(LLVector4a &center, const LLVector4a &size, const T* data)
 	{
-		const LLVector3d& pos = data->getPositionGroup();
-		for (U32 i = 0; i < 3; i++)
-		{
-			if (pos.mdV[i] > center.mdV[i])
-			{
-				center.mdV[i] += size.mdV[i];
-			}
-			else 
-			{
-				center.mdV[i] -= size.mdV[i];
-			}
-		}
+		const LLVector4a& pos = data->getPositionGroup();
+
+		LLVector4Logical gt = pos.greaterThan(center);
+
+		LLVector4a up;
+		up = _mm_and_ps(size, gt);
+
+		LLVector4a down;
+		down = _mm_andnot_ps(gt, size);
+
+		center.add(up);
+		center.sub(down);
 	}

 	void accept(oct_traveler* visitor)				{ visitor->visit(this); }
@ -236,32 +229,49 @@ public:
 	void accept(tree_traveler* visitor) const		{ visitor->visit(this); }
 	void accept(oct_traveler* visitor) const		{ visitor->visit(this); }
 	
-	oct_node* getNodeAt(const LLVector3d& pos, const F64& rad)
+	void validateChildMap()
+	{
+		for (U32 i = 0; i < 8; i++)
+		{
+			U8 idx = mChildMap[i];
+			if (idx != 255)
+			{
+				LLOctreeNode<T>* child = mChild[idx];
+
+				if (child->getOctant() != i)
+				{
+					llerrs << "Invalid child map, bad octant data." << llendl;
+				}
+
+				if (getOctant(child->getCenter()) != child->getOctant())
+				{
+					llerrs << "Invalid child octant compared to position data." << llendl;
+				}
+			}
+		}
+	}
+
+
+	oct_node* getNodeAt(const LLVector4a& pos, const F32& rad)
 	{ 
 		LLOctreeNode<T>* node = this;

 		if (node->isInside(pos, rad))
 		{		
 			//do a quick search by octant
-			U8 octant = node->getOctant(pos.mdV);
-			BOOL keep_going = TRUE;
-
+			U8 octant = node->getOctant(pos);
+			
 			//traverse the tree until we find a node that has no node
 			//at the appropriate octant or is smaller than the object.  
 			//by definition, that node is the smallest node that contains 
 			// the data
-			while (keep_going && node->getSize().mdV[0] >= rad)
+			U8 next_node = node->mChildMap[octant];
+			
+			while (next_node != 255 && node->getSize()[0] >= rad)
 			{	
-				keep_going = FALSE;
-				for (U32 i = 0; i < node->getChildCount() && !keep_going; i++)
-				{
-					if (node->getChild(i)->getOctant() == octant)
-					{
-						node = node->getChild(i);
-						octant = node->getOctant(pos.mdV);
-						keep_going = TRUE;
-					}
-				}
+				node = node->getChild(next_node);
+				octant = node->getOctant(pos);
+				next_node = node->mChildMap[octant];
 			}
 		}
 		else if (!node->contains(rad) && node->getParent())
@ -276,7 +286,7 @@ public:
 	{
 		if (data == NULL)
 		{
-			//OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl;
+			OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl;
 			return false;
 		}
 		LLOctreeNode<T>* parent = getOctParent();
@ -284,10 +294,8 @@ public:
 		//is it here?
 		if (isInside(data->getPositionGroup()))
 		{
-			if (getElementCount() < LL_OCTREE_MAX_CAPACITY &&
-				(contains(data->getBinRadius()) ||
-				(data->getBinRadius() > getSize().mdV[0] &&
-				parent && parent->getElementCount() >= LL_OCTREE_MAX_CAPACITY))) 
+			if ((getElementCount() < LL_OCTREE_MAX_CAPACITY && contains(data->getBinRadius()) ||
+				(data->getBinRadius() > getSize()[0] &&	parent && parent->getElementCount() >= LL_OCTREE_MAX_CAPACITY))) 
 			{ //it belongs here
 #if LL_OCTREE_PARANOIA_CHECK
 				//if this is a redundant insertion, error out (should never happen)
@ -317,16 +325,21 @@ public:
 				}
 				
 				//it's here, but no kids are in the right place, make a new kid
-				LLVector3d center(getCenter());
-				LLVector3d size(getSize()*0.5);
+				LLVector4a center = getCenter();
+				LLVector4a size = getSize();
+				size.mul(0.5f);
 		        		
 				//push center in direction of data
 				LLOctreeNode<T>::pushCenter(center, size, data);

 				// handle case where floating point number gets too small
-				if( llabs(center.mdV[0] - getCenter().mdV[0]) < F_APPROXIMATELY_ZERO &&
-					llabs(center.mdV[1] - getCenter().mdV[1]) < F_APPROXIMATELY_ZERO &&
-					llabs(center.mdV[2] - getCenter().mdV[2]) < F_APPROXIMATELY_ZERO)
+				LLVector4a val;
+				val.setSub(center, getCenter());
+				val.setAbs(val);
+								
+				S32 lt = val.lessThan(LLVector4a::getEpsilon()).getGatheredBits() & 0x7;
+
+				if( lt == 0x7 )
 				{
 					mData.insert(data);
 					BaseType::insert(data);
@ -344,7 +357,7 @@ public:
 				//make sure no existing node matches this position
 				for (U32 i = 0; i < getChildCount(); i++)
 				{
-					if (mChild[i]->getCenter() == center)
+					if (mChild[i]->getCenter().equals3(center))
 					{
 						OCT_ERRS << "Octree detected duplicate child center and gave up." << llendl;
 						return false;
@ -362,7 +375,7 @@ public:
 		else 
 		{
 			//it's not in here, give it to the root
-			//OCT_ERRS << "Octree insertion failed, starting over from root!" << llendl;
+			OCT_ERRS << "Octree insertion failed, starting over from root!" << llendl;

 			oct_node* node = this;

@ -436,6 +449,9 @@ public:
 	void clearChildren()
 	{
 		mChild.clear();
+
+		U32* foo = (U32*) mChildMap;
+		foo[0] = foo[1] = 0xFFFFFFFF;
 	}

 	void validate()
@ -469,13 +485,19 @@ public:
 	void addChild(oct_node* child, BOOL silent = FALSE) 
 	{
 #if LL_OCTREE_PARANOIA_CHECK
+
+		if (child->getSize().equals3(getSize()))
+		{
+			OCT_ERRS << "Child size is same as parent size!" << llendl;
+		}
+
 		for (U32 i = 0; i < getChildCount(); i++)
 		{
-			if(mChild[i]->getSize() != child->getSize()) 
+			if(!mChild[i]->getSize().equals3(child->getSize())) 
 			{
 				OCT_ERRS <<"Invalid octree child size." << llendl;
 			}
-			if (mChild[i]->getCenter() == child->getCenter())
+			if (mChild[i]->getCenter().equals3(child->getCenter()))
 			{
 				OCT_ERRS <<"Duplicate octree child position." << llendl;
 			}
@ -487,6 +509,8 @@ public:
 		}
 #endif

+		mChildMap[child->getOctant()] = (U8) mChild.size();
+
 		mChild.push_back(child);
 		child->setParent(this);

@ -500,7 +524,7 @@ public:
 		}
 	}

-	void removeChild(U8 index, BOOL destroy = FALSE)
+	void removeChild(S32 index, BOOL destroy = FALSE)
 	{
 		for (U32 i = 0; i < this->getListenerCount(); i++)
 		{
@ -508,6 +532,8 @@ public:
 			listener->handleChildRemoval(this, getChild(index));
 		}

+		
+
 		if (destroy)
 		{
 			mChild[index]->destroy();
@ -515,6 +541,15 @@ public:
 		}
 		mChild.erase(mChild.begin() + index);

+		//rebuild child map
+		U32* foo = (U32*) mChildMap;
+		foo[0] = foo[1] = 0xFFFFFFFF;
+
+		for (U32 i = 0; i < mChild.size(); ++i)
+		{
+			mChildMap[mChild[i]->getOctant()] = i;
+		}
+
 		checkAlive();
 	}

@ -541,19 +576,32 @@ public:
 			}
 		}

-		//OCT_ERRS << "Octree failed to delete requested child." << llendl;
+		OCT_ERRS << "Octree failed to delete requested child." << llendl;
 	}

 protected:	
-	child_list mChild;
-	element_list mData;
+	typedef enum
+	{
+		CENTER = 0,
+		SIZE = 1,
+		MAX = 2,
+		MIN = 3
+	} eDName;
+
+	LLVector4a mCenter;
+	LLVector4a mSize;
+	LLVector4a mMax;
+	LLVector4a mMin;
+	
 	oct_node* mParent;
-	LLVector3d mCenter;
-	LLVector3d mSize;
-	LLVector3d mMax;
-	LLVector3d mMin;
 	U8 mOctant;
-};
+
+	child_list mChild;
+	U8 mChildMap[8];
+
+	element_list mData;
+		
+}; 

 //just like a regular node, except it might expand on insert and compress on balance
 template <class T>
@ -563,9 +611,9 @@ public:
 	typedef LLOctreeNode<T>	BaseType;
 	typedef LLOctreeNode<T>		oct_node;

-	LLOctreeRoot(	LLVector3d center, 
-					LLVector3d size, 
-					BaseType* parent)
+	LLOctreeRoot(const LLVector4a& center, 
+				 const LLVector4a& size, 
+				 BaseType* parent)
 	:	BaseType(center, size, parent)
 	{
 	}
@ -596,6 +644,8 @@ public:
 			//destroy child
 			child->clearChildren();
 			delete child;
+
+			return false;
 		}
 		
 		return true;
@ -606,28 +656,33 @@ public:
 	{
 		if (data == NULL) 
 		{
-			//OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE ROOT !!!" << llendl;
+			OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE ROOT !!!" << llendl;
 			return false;
 		}
 		
 		if (data->getBinRadius() > 4096.0)
 		{
-			//OCT_ERRS << "!!! ELEMENT EXCEEDS MAXIMUM SIZE IN OCTREE ROOT !!!" << llendl;
+			OCT_ERRS << "!!! ELEMENT EXCEEDS MAXIMUM SIZE IN OCTREE ROOT !!!" << llendl;
 			return false;
 		}
 		
-		const F64 MAX_MAG = 1024.0*1024.0;
+		LLVector4a MAX_MAG;
+		MAX_MAG.splat(1024.f*1024.f);

-		const LLVector3d& v = data->getPositionGroup();
-		if (!(fabs(v.mdV[0]-this->mCenter.mdV[0]) < MAX_MAG &&
-		      fabs(v.mdV[1]-this->mCenter.mdV[1]) < MAX_MAG &&
-		      fabs(v.mdV[2]-this->mCenter.mdV[2]) < MAX_MAG))
+		const LLVector4a& v = data->getPositionGroup();
+
+		LLVector4a val;
+		val.setSub(v, BaseType::mCenter);
+		val.setAbs(val);
+		S32 lt = val.lessThan(MAX_MAG).getGatheredBits() & 0x7;
+
+		if (lt != 0x7)
 		{
-			//OCT_ERRS << "!!! ELEMENT EXCEEDS RANGE OF SPATIAL PARTITION !!!" << llendl;
+			OCT_ERRS << "!!! ELEMENT EXCEEDS RANGE OF SPATIAL PARTITION !!!" << llendl;
 			return false;
 		}

-		if (this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup()))
+		if (this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup()))
 		{
 			//we got it, just act like a branch
 			oct_node* node = getNodeAt(data);
@ -643,31 +698,34 @@ public:
 		else if (this->getChildCount() == 0)
 		{
 			//first object being added, just wrap it up
-			while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup())))
+			while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup())))
 			{
-				LLVector3d center, size;
+				LLVector4a center, size;
 				center = this->getCenter();
 				size = this->getSize();
 				LLOctreeNode<T>::pushCenter(center, size, data);
 				this->setCenter(center);
-				this->setSize(size*2);
+				size.mul(2.f);
+				this->setSize(size);
 				this->updateMinMax();
 			}
 			LLOctreeNode<T>::insert(data);
 		}
 		else
 		{
-			while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup())))
+			while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup())))
 			{
 				//the data is outside the root node, we need to grow
-				LLVector3d center(this->getCenter());
-				LLVector3d size(this->getSize());
+				LLVector4a center(this->getCenter());
+				LLVector4a size(this->getSize());

 				//expand this node
-				LLVector3d newcenter(center);
+				LLVector4a newcenter(center);
 				LLOctreeNode<T>::pushCenter(newcenter, size, data);
 				this->setCenter(newcenter);
-				this->setSize(size*2);
+				LLVector4a size2 = size;
+				size2.mul(2.f);
+				this->setSize(size2);
 				this->updateMinMax();

 				//copy our children to a new branch
@ -704,4 +762,15 @@ void LLOctreeTraveler<T>::traverse(const LLOctreeNode<T>* node)
 		traverse(node->getChild(i));
 	}
 }
+
+template <class T>
+void LLOctreeTravelerDepthFirst<T>::traverse(const LLOctreeNode<T>* node)
+{
+	for (U32 i = 0; i < node->getChildCount(); i++)
+	{
+		traverse(node->getChild(i));
+	}
+	node->accept(this);
+}
+
 #endif
--- a/indra/llmath/llplane.h
+++ b/indra/llmath/llplane.h
@ -36,19 +36,23 @@
 // The plane normal = [A, B, C]
 // The closest approach = D / sqrt(A*A + B*B + C*C)

-class LLPlane : public LLVector4
+class LLPlane
 {
 public:
+	
+	// Constructors
 	LLPlane() {}; // no default constructor
 	LLPlane(const LLVector3 &p0, F32 d) { setVec(p0, d); }
 	LLPlane(const LLVector3 &p0, const LLVector3 &n) { setVec(p0, n); }
-	void setVec(const LLVector3 &p0, F32 d) { LLVector4::setVec(p0[0], p0[1], p0[2], d); }
-	void setVec(const LLVector3 &p0, const LLVector3 &n)
+	inline void setVec(const LLVector3 &p0, F32 d) { mV.set(p0[0], p0[1], p0[2], d); }
+	
+	// Set
+	inline void setVec(const LLVector3 &p0, const LLVector3 &n)
 	{
 		F32 d = -(p0 * n);
 		setVec(n, d);
 	}
-	void setVec(const LLVector3 &p0, const LLVector3 &p1, const LLVector3 &p2)
+	inline void setVec(const LLVector3 &p0, const LLVector3 &p1, const LLVector3 &p2)
 	{
 		LLVector3 u, v, w;
 		u = p1 - p0;
@ -58,8 +62,38 @@ public:
 		F32 d = -(w * p0);
 		setVec(w, d);
 	}
-	LLPlane& operator=(const LLVector4& v2) {  LLVector4::setVec(v2[0],v2[1],v2[2],v2[3]); return *this;}
+	
+	inline LLPlane& operator=(const LLVector4& v2) {  mV.set(v2[0],v2[1],v2[2],v2[3]); return *this;}
+	
+	inline LLPlane& operator=(const LLVector4a& v2) {  mV.set(v2[0],v2[1],v2[2],v2[3]); return *this;}	
+	
+	inline void set(const LLPlane& p2) { mV = p2.mV; }
+	
+	// 
 	F32 dist(const LLVector3 &v2) const { return mV[0]*v2[0] + mV[1]*v2[1] + mV[2]*v2[2] + mV[3]; }
+	
+	inline LLSimdScalar dot3(const LLVector4a& b) const { return mV.dot3(b); }
+	
+	// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates
+	// the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead	
+	inline F32 operator[](const S32 idx) const { return mV[idx]; }
+	
+	// preferable when index is known at compile time
+	template <int N> LL_FORCE_INLINE void getAt(LLSimdScalar& v) const { v = mV.getScalarAt<N>(); } 
+	
+	// reset the vector to 0, 0, 0, 1
+	inline void clear() { mV.set(0, 0, 0, 1); }
+	
+	inline void getVector3(LLVector3& vec) const { vec.set(mV[0], mV[1], mV[2]); }
+	
+	// Retrieve the mask indicating which of the x, y, or z axis are greater or equal to zero.
+	inline U8 calcPlaneMask() 
+	{ 
+		return mV.greaterEqual(LLVector4a::getZero()).getGatheredBits() & LLVector4Logical::MASK_XYZ;
+	}
+		
+private:
+	LLVector4a mV;
 };


--- a/indra/llmath/llquantize.h
+++ b/indra/llmath/llquantize.h
@ -29,10 +29,16 @@
 #define LL_LLQUANTIZE_H

 const U16 U16MAX = 65535;
+LL_ALIGN_16( const F32 F_U16MAX_4A[4] ) = { 65535.f, 65535.f, 65535.f, 65535.f };
+
 const F32 OOU16MAX = 1.f/(F32)(U16MAX);
+LL_ALIGN_16( const F32 F_OOU16MAX_4A[4] ) = { OOU16MAX, OOU16MAX, OOU16MAX, OOU16MAX };

 const U8 U8MAX = 255;
+LL_ALIGN_16( const F32 F_U8MAX_4A[4] ) = { 255.f, 255.f, 255.f, 255.f };
+
 const F32 OOU8MAX = 1.f/(F32)(U8MAX);
+LL_ALIGN_16( const F32 F_OOU8MAX_4A[4] ) = { OOU8MAX, OOU8MAX, OOU8MAX, OOU8MAX };

 const U8 FIRSTVALIDCHAR = 54;
 const U8 MAXSTRINGVAL = U8MAX - FIRSTVALIDCHAR; //we don't allow newline or null 
--- a/indra/llmath/llquaternion.cpp
+++ b/indra/llmath/llquaternion.cpp
@ -26,9 +26,10 @@

 #include "linden_common.h"

+#include "llmath.h"	// for F_PI
+
 #include "llquaternion.h"

-#include "llmath.h"	// for F_PI
 //#include "vmath.h"
 #include "v3math.h"
 #include "v3dmath.h"
--- a/indra/llmath/llquaternion.h
+++ b/indra/llmath/llquaternion.h
@ -27,7 +27,11 @@
 #ifndef LLQUATERNION_H
 #define LLQUATERNION_H

-#include "llmath.h"
+#include <iostream>
+
+#ifndef LLMATH_H //enforce specific include order to avoid tangling inline dependencies
+#error "Please include llmath.h first."
+#endif

 class LLVector4;
 class LLVector3;
--- a/indra/llmath/llquaternion2.h
+++ b/indra/llmath/llquaternion2.h
@ -0,0 +1,105 @@
+/** 
+ * @file llquaternion2.h
+ * @brief LLQuaternion2 class header file - SIMD-enabled quaternion class
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef	LL_QUATERNION2_H
+#define	LL_QUATERNION2_H
+
+/////////////////////////////
+// LLQuaternion2
+/////////////////////////////
+// This class stores a quaternion x*i + y*j + z*k + w in <x, y, z, w> order
+// (i.e., w in high order element of vector)
+/////////////////////////////
+/////////////////////////////
+// These classes are intentionally minimal right now. If you need additional
+// functionality, please contact someone with SSE experience (e.g., Falcon or
+// Huseby).
+/////////////////////////////
+#include "llquaternion.h"
+
+class LLQuaternion2
+{
+public:
+
+	//////////////////////////
+	// Ctors
+	//////////////////////////
+	
+	// Ctor
+	LLQuaternion2() {}
+
+	// Ctor from LLQuaternion
+	explicit LLQuaternion2( const class LLQuaternion& quat );
+
+	//////////////////////////
+	// Get/Set
+	//////////////////////////
+
+	// Load from an LLQuaternion
+	inline void operator=( const LLQuaternion& quat )
+	{
+		mQ.loadua( quat.mQ );
+	}
+
+	// Return the internal LLVector4a representation of the quaternion
+	inline const LLVector4a& getVector4a() const;
+	inline LLVector4a& getVector4aRw();
+
+	/////////////////////////
+	// Quaternion modification
+	/////////////////////////
+	
+	// Set this quaternion to the conjugate of src
+	inline void setConjugate(const LLQuaternion2& src);
+
+	// Renormalizes the quaternion. Assumes it has nonzero length.
+	inline void normalize();
+
+	// Quantize this quaternion to 8 bit precision
+	inline void quantize8();
+
+	// Quantize this quaternion to 16 bit precision
+	inline void quantize16();
+
+	/////////////////////////
+	// Quaternion inspection
+	/////////////////////////
+
+	// Return true if this quaternion is equal to 'rhs'. 
+	// Note! Quaternions exhibit "double-cover", so any rotation has two equally valid
+	// quaternion representations and they will NOT compare equal.
+	inline bool equals(const LLQuaternion2& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const;
+
+	// Return true if all components are finite and the quaternion is normalized
+	inline bool isOkRotation() const;
+
+protected:
+
+	LLVector4a mQ;
+
+};
+
+#endif
--- a/indra/llmath/llquaternion2.inl
+++ b/indra/llmath/llquaternion2.inl
@ -0,0 +1,102 @@
+/** 
+ * @file llquaternion2.inl
+ * @brief LLQuaternion2 inline definitions
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#include "llquaternion2.h"
+
+static const LLQuad LL_V4A_PLUS_ONE = {1.f, 1.f, 1.f, 1.f};
+static const LLQuad LL_V4A_MINUS_ONE = {-1.f, -1.f, -1.f, -1.f};
+
+// Ctor from LLQuaternion
+inline LLQuaternion2::LLQuaternion2( const LLQuaternion& quat )
+{
+	mQ.set(quat.mQ[VX], quat.mQ[VY], quat.mQ[VZ], quat.mQ[VW]);
+}
+
+//////////////////////////
+// Get/Set
+//////////////////////////
+
+// Return the internal LLVector4a representation of the quaternion
+inline const LLVector4a& LLQuaternion2::getVector4a() const
+{
+	return mQ;
+}
+
+inline LLVector4a& LLQuaternion2::getVector4aRw()
+{
+	return mQ;
+}
+
+/////////////////////////
+// Quaternion modification
+/////////////////////////
+
+// Set this quaternion to the conjugate of src
+inline void LLQuaternion2::setConjugate(const LLQuaternion2& src)
+{
+	static LL_ALIGN_16( const U32 F_QUAT_INV_MASK_4A[4] ) = { 0x80000000, 0x80000000, 0x80000000, 0x00000000 };
+	mQ = _mm_xor_ps(src.mQ, *reinterpret_cast<const LLQuad*>(&F_QUAT_INV_MASK_4A));	
+}
+
+// Renormalizes the quaternion. Assumes it has nonzero length.
+inline void LLQuaternion2::normalize()
+{
+	mQ.normalize4();
+}
+
+// Quantize this quaternion to 8 bit precision
+inline void LLQuaternion2::quantize8()
+{
+	mQ.quantize8( LL_V4A_MINUS_ONE, LL_V4A_PLUS_ONE );
+	normalize();
+}
+
+// Quantize this quaternion to 16 bit precision
+inline void LLQuaternion2::quantize16()
+{
+	mQ.quantize16( LL_V4A_MINUS_ONE, LL_V4A_PLUS_ONE );
+	normalize();
+}
+
+
+/////////////////////////
+// Quaternion inspection
+/////////////////////////
+
+// Return true if this quaternion is equal to 'rhs'. 
+// Note! Quaternions exhibit "double-cover", so any rotation has two equally valid
+// quaternion representations and they will NOT compare equal.
+inline bool LLQuaternion2::equals(const LLQuaternion2 &rhs, F32 tolerance/* = F_APPROXIMATELY_ZERO*/) const
+{
+	return mQ.equals4(rhs.mQ, tolerance);
+}
+
+// Return true if all components are finite and the quaternion is normalized
+inline bool LLQuaternion2::isOkRotation() const
+{
+	return mQ.isFinite4() && mQ.isNormalized4();
+}
+
--- a/indra/llmath/llsimdmath.h
+++ b/indra/llmath/llsimdmath.h
@ -0,0 +1,93 @@
+/** 
+ * @file llsimdmath.h
+ * @brief Common header for SIMD-based math library (llvector4a, llmatrix3a, etc.)
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef	LL_SIMD_MATH_H
+#define	LL_SIMD_MATH_H
+
+#ifndef LLMATH_H
+#error "Please include llmath.h before this file."
+#endif
+
+#if ( ( LL_DARWIN || LL_LINUX ) && !(__SSE2__) ) || ( LL_WINDOWS && ( _M_IX86_FP < 2 ) )
+#error SSE2 not enabled. LLVector4a and related class will not compile.
+#endif
+
+#if !LL_WINDOWS
+#include <stdint.h>
+#endif
+
+template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address) 
+{ 
+	return reinterpret_cast<T*>(
+		(reinterpret_cast<uintptr_t>(address) + 0xF) & ~0xF);
+}
+
+template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) 
+{ 
+	return reinterpret_cast<T*>(
+		(reinterpret_cast<uintptr_t>(address) + 0x3F) & ~0x3F);
+}
+
+#if LL_LINUX || LL_DARWIN
+
+#define			LL_ALIGN_PREFIX(x)
+#define			LL_ALIGN_POSTFIX(x)		__attribute__((aligned(x)))
+
+#elif LL_WINDOWS
+
+#define			LL_ALIGN_PREFIX(x)		__declspec(align(x))
+#define			LL_ALIGN_POSTFIX(x)
+
+#else
+#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined"
+#endif
+
+#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)
+
+
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#include "llsimdtypes.h"
+#include "llsimdtypes.inl"
+
+class LLMatrix3a;
+class LLRotation;
+class LLMatrix3;
+
+#include "llquaternion.h"
+
+#include "llvector4logical.h"
+#include "llvector4a.h"
+#include "llmatrix3a.h"
+#include "llquaternion2.h"
+#include "llvector4a.inl"
+#include "llmatrix3a.inl"
+#include "llquaternion2.inl"
+
+
+#endif //LL_SIMD_MATH_H
--- a/indra/llmath/llsimdtypes.h
+++ b/indra/llmath/llsimdtypes.h
@ -0,0 +1,124 @@
+/** 
+ * @file llsimdtypes.h
+ * @brief Declaration of basic SIMD math related types
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_SIMD_TYPES_H
+#define LL_SIMD_TYPES_H
+
+#ifndef LL_SIMD_MATH_H
+#error "Please include llmath.h before this file."
+#endif
+
+typedef __m128	LLQuad;
+
+
+#if LL_WINDOWS
+#pragma warning(push)
+#pragma warning( disable : 4800 3 ) // Disable warning about casting int to bool for this class.
+#if defined(_MSC_VER) && (_MSC_VER < 1500)
+// VC++ 2005 is missing these intrinsics
+// __forceinline is MSVC specific and attempts to override compiler inlining judgment. This is so
+// even in debug builds this call is a NOP.
+__forceinline const __m128 _mm_castsi128_ps( const __m128i a ) { return reinterpret_cast<const __m128&>(a); }
+__forceinline const __m128i _mm_castps_si128( const __m128 a ) { return reinterpret_cast<const __m128i&>(a); }
+#endif // _MSC_VER
+
+#endif // LL_WINDOWS
+
+class LLBool32
+{
+public:
+	inline LLBool32() {}
+	inline LLBool32(int rhs) : m_bool(rhs) {}
+	inline LLBool32(unsigned int rhs) : m_bool(rhs) {}
+	inline LLBool32(bool rhs) { m_bool = static_cast<const int>(rhs); }
+	inline LLBool32& operator= (bool rhs) { m_bool = (int)rhs; return *this; }
+	inline bool operator== (bool rhs) const { return static_cast<const bool&>(m_bool) == rhs; }
+	inline bool operator!= (bool rhs) const { return !operator==(rhs); }
+	inline operator bool() const { return static_cast<const bool&>(m_bool); }
+
+private:
+	int m_bool;
+};
+
+#if LL_WINDOWS
+#pragma warning(pop)
+#endif
+
+class LLSimdScalar
+{
+public:
+	inline LLSimdScalar() {}
+	inline LLSimdScalar(LLQuad q) 
+	{ 
+		mQ = q; 
+	}
+
+	inline LLSimdScalar(F32 f) 
+	{ 
+		mQ = _mm_set_ss(f); 
+	}
+
+	static inline const LLSimdScalar& getZero()
+	{
+		extern const LLQuad F_ZERO_4A;
+		return reinterpret_cast<const LLSimdScalar&>(F_ZERO_4A);
+	}
+
+	inline F32 getF32() const;
+
+	inline LLBool32 isApproximatelyEqual(const LLSimdScalar& rhs, F32 tolerance = F_APPROXIMATELY_ZERO) const;
+
+	inline LLSimdScalar getAbs() const;
+
+	inline void setMax( const LLSimdScalar& a, const LLSimdScalar& b );
+	
+	inline void setMin( const LLSimdScalar& a, const LLSimdScalar& b );
+
+	inline LLSimdScalar& operator=(F32 rhs);
+
+	inline LLSimdScalar& operator+=(const LLSimdScalar& rhs);
+
+	inline LLSimdScalar& operator-=(const LLSimdScalar& rhs);
+
+	inline LLSimdScalar& operator*=(const LLSimdScalar& rhs);
+
+	inline LLSimdScalar& operator/=(const LLSimdScalar& rhs);
+
+	inline operator LLQuad() const
+	{ 
+		return mQ; 
+	}
+	
+	inline const LLQuad& getQuad() const 
+	{ 
+		return mQ; 
+	}
+
+private:
+	LLQuad mQ;
+};
+
+#endif //LL_SIMD_TYPES_H
--- a/indra/llmath/llsimdtypes.inl
+++ b/indra/llmath/llsimdtypes.inl
@ -0,0 +1,157 @@
+/** 
+ * @file llsimdtypes.inl
+ * @brief Inlined definitions of basic SIMD math related types
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+
+
+
+//////////////////
+// LLSimdScalar
+//////////////////
+
+inline LLSimdScalar operator+(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	LLSimdScalar t(a);
+	t += b;
+	return t;
+}
+
+inline LLSimdScalar operator-(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	LLSimdScalar t(a);
+	t -= b;
+	return t;
+}
+
+inline LLSimdScalar operator*(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	LLSimdScalar t(a);
+	t *= b;
+	return t;
+}
+
+inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	LLSimdScalar t(a);
+	t /= b;
+	return t;
+}
+
+inline LLSimdScalar operator-(const LLSimdScalar& a)
+{
+	static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+	return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a);
+}
+
+inline LLBool32 operator==(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	return _mm_comieq_ss(a, b);
+}
+
+inline LLBool32 operator!=(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	return _mm_comineq_ss(a, b);
+}
+
+inline LLBool32 operator<(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	return _mm_comilt_ss(a, b);
+}
+
+inline LLBool32 operator<=(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	return _mm_comile_ss(a, b);
+}
+
+inline LLBool32 operator>(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	return _mm_comigt_ss(a, b);
+}
+
+inline LLBool32 operator>=(const LLSimdScalar& a, const LLSimdScalar& b)
+{
+	return _mm_comige_ss(a, b);
+}
+
+inline LLBool32 LLSimdScalar::isApproximatelyEqual(const LLSimdScalar& rhs, F32 tolerance /* = F_APPROXIMATELY_ZERO */) const
+{
+	const LLSimdScalar tol( tolerance );
+	const LLSimdScalar diff = _mm_sub_ss( mQ, rhs.mQ );
+	const LLSimdScalar absDiff = diff.getAbs();
+	return absDiff <= tol;
+}
+
+inline void LLSimdScalar::setMax( const LLSimdScalar& a, const LLSimdScalar& b )
+{
+	mQ = _mm_max_ss( a, b );
+}
+
+inline void LLSimdScalar::setMin( const LLSimdScalar& a, const LLSimdScalar& b )
+{
+	mQ = _mm_min_ss( a, b );
+}
+
+inline LLSimdScalar& LLSimdScalar::operator=(F32 rhs) 
+{ 
+	mQ = _mm_set_ss(rhs); 
+	return *this; 
+}
+
+inline LLSimdScalar& LLSimdScalar::operator+=(const LLSimdScalar& rhs) 
+{
+	mQ = _mm_add_ss( mQ, rhs );
+	return *this;
+}
+
+inline LLSimdScalar& LLSimdScalar::operator-=(const LLSimdScalar& rhs)
+{
+	mQ = _mm_sub_ss( mQ, rhs );
+	return *this;
+}
+
+inline LLSimdScalar& LLSimdScalar::operator*=(const LLSimdScalar& rhs)
+{
+	mQ = _mm_mul_ss( mQ, rhs );
+	return *this;
+}
+
+inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs)
+{
+	mQ = _mm_div_ss( mQ, rhs );
+	return *this;
+}
+
+inline LLSimdScalar LLSimdScalar::getAbs() const
+{
+	static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
+	return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A));
+}
+
+inline F32 LLSimdScalar::getF32() const
+{ 
+	F32 ret; 
+	_mm_store_ss(&ret, mQ); 
+	return ret; 
+}
--- a/indra/llmath/lltreenode.h
+++ b/indra/llmath/lltreenode.h
@ -28,6 +28,9 @@

 #include "stdtypes.h"
 #include "xform.h"
+#include "llpointer.h"
+#include "llrefcount.h"
+
 #include <vector>

 template <class T> class LLTreeNode;
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@ -0,0 +1,222 @@
+/** 
+ * @file llvector4a.cpp
+ * @brief SIMD vector implementation
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#include "llmath.h"
+#include "llquantize.h"
+
+extern const LLQuad F_ZERO_4A		= { 0, 0, 0, 0 };
+extern const LLQuad F_APPROXIMATELY_ZERO_4A = { 
+	F_APPROXIMATELY_ZERO,
+	F_APPROXIMATELY_ZERO,
+	F_APPROXIMATELY_ZERO,
+	F_APPROXIMATELY_ZERO
+};
+
+extern const LLVector4a LL_V4A_ZERO = reinterpret_cast<const LLVector4a&> ( F_ZERO_4A );
+extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F_APPROXIMATELY_ZERO_4A );
+
+/*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
+{
+	assert(src != NULL);
+	assert(dst != NULL);
+	assert(bytes > 0);
+	assert((bytes % sizeof(F32))== 0); 
+	
+	F32* end = dst + (bytes / sizeof(F32) );
+
+	if (bytes > 64)
+	{
+		F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst);
+		
+		//at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies
+		F32* end_64 = end-16;
+		
+		_mm_prefetch((char*)begin_64, _MM_HINT_NTA);
+		_mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA);
+		_mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA);
+		_mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA);
+		
+		while (dst < begin_64)
+		{
+			copy4a(dst, src);
+			dst += 4;
+			src += 4;
+		}
+		
+		while (dst < end_64)
+		{
+			_mm_prefetch((char*)src + 512, _MM_HINT_NTA);
+			_mm_prefetch((char*)dst + 512, _MM_HINT_NTA);
+			copy4a(dst, src);
+			copy4a(dst+4, src+4);
+			copy4a(dst+8, src+8);
+			copy4a(dst+12, src+12);
+			
+			dst += 16;
+			src += 16;
+		}
+	}
+
+	while (dst < end)
+	{
+		copy4a(dst, src);
+		dst += 4;
+		src += 4;
+	}
+}
+
+void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec )
+{
+	const LLVector4a col0 = rot.getColumn(0);
+	const LLVector4a col1 = rot.getColumn(1);
+	const LLVector4a col2 = rot.getColumn(2);
+
+	LLVector4a result = _mm_load_ss( vec.getF32ptr() );
+	result.splat<0>( result );
+	result.mul( col0 );
+
+	{
+		LLVector4a yyyy = _mm_load_ss( vec.getF32ptr() +  1 );
+		yyyy.splat<0>( yyyy );
+		yyyy.mul( col1 ); 
+		result.add( yyyy );
+	}
+
+	{
+		LLVector4a zzzz = _mm_load_ss( vec.getF32ptr() +  2 );
+		zzzz.splat<0>( zzzz );
+		zzzz.mul( col2 );
+		result.add( zzzz );
+	}
+
+	*this = result;
+}
+
+void LLVector4a::setRotated( const LLQuaternion2& quat, const LLVector4a& vec )
+{
+	const LLVector4a& quatVec = quat.getVector4a();
+	LLVector4a temp; temp.setCross3(quatVec, vec);
+	temp.add( temp );
+	
+	const LLVector4a realPart( quatVec.getScalarAt<3>() );
+	LLVector4a tempTimesReal; tempTimesReal.setMul( temp, realPart );
+
+	mQ = vec;
+	add( tempTimesReal );
+	
+	LLVector4a imagCrossTemp; imagCrossTemp.setCross3( quatVec, temp );
+	add(imagCrossTemp);
+}
+
+void LLVector4a::quantize8( const LLVector4a& low, const LLVector4a& high )
+{
+	LLVector4a val(mQ);
+	LLVector4a delta; delta.setSub( high, low );
+
+	{
+		val.clamp(low, high);
+		val.sub(low);
+
+		// 8-bit quantization means we can do with just 12 bits of reciprocal accuracy
+		const LLVector4a oneOverDelta = _mm_rcp_ps(delta.mQ);
+// 		{
+// 			static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f };
+// 			LLVector4a two; two.load4a( F_TWO_4A );
+// 
+// 			// Here we use _mm_rcp_ps plus one round of newton-raphson
+// 			// We wish to find 'x' such that x = 1/delta
+// 			// As a first approximation, we take x0 = _mm_rcp_ps(delta)
+// 			// Then x1 = 2 * x0 - a * x0^2 or x1 = x0 * ( 2 - a * x0 )
+// 			// See Intel AP-803 http://ompf.org/!/Intel_application_note_AP-803.pdf
+// 			const LLVector4a recipApprox = _mm_rcp_ps(delta.mQ);
+// 			oneOverDelta.setMul( delta, recipApprox );
+// 			oneOverDelta.setSub( two, oneOverDelta );
+// 			oneOverDelta.mul( recipApprox );
+// 		}
+
+		val.mul(oneOverDelta);
+		val.mul(*reinterpret_cast<const LLVector4a*>(F_U8MAX_4A));
+	}
+
+	val = _mm_cvtepi32_ps(_mm_cvtps_epi32( val.mQ ));
+
+	{
+		val.mul(*reinterpret_cast<const LLVector4a*>(F_OOU8MAX_4A));
+		val.mul(delta);
+		val.add(low);
+	}
+
+	{
+		LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU8MAX_4A));
+		LLVector4a absVal; absVal.setAbs( val );
+		setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val );
+	}	
+}
+
+void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high )
+{
+	LLVector4a val(mQ);
+	LLVector4a delta; delta.setSub( high, low );
+
+	{
+		val.clamp(low, high);
+		val.sub(low);
+
+		// 16-bit quantization means we need a round of Newton-Raphson
+		LLVector4a oneOverDelta;
+		{
+			static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f };
+			LLVector4a two; two.load4a( F_TWO_4A );
+
+			// Here we use _mm_rcp_ps plus one round of newton-raphson
+			// We wish to find 'x' such that x = 1/delta
+			// As a first approximation, we take x0 = _mm_rcp_ps(delta)
+			// Then x1 = 2 * x0 - a * x0^2 or x1 = x0 * ( 2 - a * x0 )
+			// See Intel AP-803 http://ompf.org/!/Intel_application_note_AP-803.pdf
+			const LLVector4a recipApprox = _mm_rcp_ps(delta.mQ);
+			oneOverDelta.setMul( delta, recipApprox );
+			oneOverDelta.setSub( two, oneOverDelta );
+			oneOverDelta.mul( recipApprox );
+		}
+
+		val.mul(oneOverDelta);
+		val.mul(*reinterpret_cast<const LLVector4a*>(F_U16MAX_4A));
+	}
+
+	val = _mm_cvtepi32_ps(_mm_cvtps_epi32( val.mQ ));
+
+	{
+		val.mul(*reinterpret_cast<const LLVector4a*>(F_OOU16MAX_4A));
+		val.mul(delta);
+		val.add(low);
+	}
+
+	{
+		LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU16MAX_4A));
+		LLVector4a absVal; absVal.setAbs( val );
+		setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val );
+	}	
+}
--- a/indra/llmath/llvector4a.h
+++ b/indra/llmath/llvector4a.h
@ -0,0 +1,324 @@
+/** 
+ * @file llvector4a.h
+ * @brief LLVector4a class header file - memory aligned and vectorized 4 component vector
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef	LL_LLVECTOR4A_H
+#define	LL_LLVECTOR4A_H
+
+
+class LLRotation;
+
+#include <assert.h>
+#include "llpreprocessor.h"
+
+///////////////////////////////////
+// FIRST TIME USERS PLEASE READ
+//////////////////////////////////
+// This is just the beginning of LLVector4a. There are many more useful functions
+// yet to be implemented. For example, setNeg to negate a vector, rotate() to apply
+// a matrix rotation, various functions to manipulate only the X, Y, and Z elements
+// and many others (including a whole variety of accessors). So if you don't see a 
+// function here that you need, please contact Falcon or someone else with SSE 
+// experience (Richard, I think, has some and davep has a little as of the time 
+// of this writing, July 08, 2010) about getting it implemented before you resort to
+// LLVector3/LLVector4. 
+/////////////////////////////////
+
+class LLVector4a
+{
+public:
+
+	///////////////////////////////////
+	// STATIC METHODS
+	///////////////////////////////////
+	
+	// Call initClass() at startup to avoid 15,000+ cycle penalties from denormalized numbers
+	static void initClass()
+	{
+		_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+		_MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
+	}
+
+	// Return a vector of all zeros
+	static inline const LLVector4a& getZero()
+	{
+		extern const LLVector4a LL_V4A_ZERO;
+		return LL_V4A_ZERO;
+	}
+	
+	// Return a vector of all epsilon, where epsilon is a small float suitable for approximate equality checks
+	static inline const LLVector4a& getEpsilon()
+	{
+		extern const LLVector4a LL_V4A_EPSILON;
+		return LL_V4A_EPSILON;
+	}
+
+	// Copy 16 bytes from src to dst. Source and destination must be 16-byte aligned
+	static inline void copy4a(F32* dst, const F32* src)
+	{
+		_mm_store_ps(dst, _mm_load_ps(src));
+	}
+
+	// Copy words 16-byte blocks from src to dst. Source and destination must not overlap. 
+	static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes);
+
+	////////////////////////////////////
+	// CONSTRUCTORS 
+	////////////////////////////////////
+	
+	LLVector4a()
+	{ //DO NOT INITIALIZE -- The overhead is completely unnecessary
+	}
+	
+	LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f)
+	{
+		set(x,y,z,w);
+	}
+	
+	LLVector4a(F32 x)
+	{
+		splat(x);
+	}
+	
+	LLVector4a(const LLSimdScalar& x)
+	{
+		splat(x);
+	}
+
+	LLVector4a(LLQuad q)
+	{
+		mQ = q;
+	}
+
+	////////////////////////////////////
+	// LOAD/STORE
+	////////////////////////////////////
+	
+	// Load from 16-byte aligned src array (preferred method of loading)
+	inline void load4a(const F32* src);
+	
+	// Load from unaligned src array (NB: Significantly slower than load4a)
+	inline void loadua(const F32* src);
+	
+	// Load only three floats beginning at address 'src'. Slowest method.
+	inline void load3(const F32* src);
+	
+	// Store to a 16-byte aligned memory address
+	inline void store4a(F32* dst) const;
+	
+	////////////////////////////////////
+	// BASIC GET/SET 
+	////////////////////////////////////
+	
+	// Return a "this" as an F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
+	inline F32* getF32ptr();
+	
+	// Return a "this" as a const F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
+	inline const F32* const getF32ptr() const;
+	
+	// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates
+	// the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead
+	inline F32 operator[](const S32 idx) const;
+
+	// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time.
+	inline LLSimdScalar getScalarAt(const S32 idx) const;
+
+	// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time.
+	template <int N> LL_FORCE_INLINE LLSimdScalar getScalarAt() const;
+
+	// Set to an x, y, z and optional w provided
+	inline void set(F32 x, F32 y, F32 z, F32 w = 0.f);
+	
+	// Set to all zeros. This is preferred to using ::getZero()
+	inline void clear();
+	
+	// Set all elements to 'x'
+	inline void splat(const F32 x);
+
+	// Set all elements to 'x'
+	inline void splat(const LLSimdScalar& x);
+	
+	// Set all 4 elements to element N of src, with N known at compile time
+	template <int N> void splat(const LLVector4a& src);
+	
+	// Set all 4 elements to element i of v, with i NOT known at compile time
+	inline void splat(const LLVector4a& v, U32 i);
+	
+	// Select bits from sourceIfTrue and sourceIfFalse according to bits in mask
+	inline void setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse );
+	
+	////////////////////////////////////
+	// ALGEBRAIC
+	////////////////////////////////////
+	
+	// Set this to the element-wise (a + b)
+	inline void setAdd(const LLVector4a& a, const LLVector4a& b);
+	
+	// Set this to element-wise (a - b)
+	inline void setSub(const LLVector4a& a, const LLVector4a& b);
+	
+	// Set this to element-wise multiply (a * b)
+	inline void setMul(const LLVector4a& a, const LLVector4a& b);
+	
+	// Set this to element-wise quotient (a / b)
+	inline void setDiv(const LLVector4a& a, const LLVector4a& b);
+	
+	// Set this to the element-wise absolute value of src
+	inline void setAbs(const LLVector4a& src);
+	
+	// Add to each component in this vector the corresponding component in rhs
+	inline void add(const LLVector4a& rhs);
+	
+	// Subtract from each component in this vector the corresponding component in rhs
+	inline void sub(const LLVector4a& rhs);
+	
+	// Multiply each component in this vector by the corresponding component in rhs
+	inline void mul(const LLVector4a& rhs);
+	
+	// Divide each component in this vector by the corresponding component in rhs
+	inline void div(const LLVector4a& rhs);
+	
+	// Multiply this vector by x in a scalar fashion
+	inline void mul(const F32 x);
+
+	// Set this to (a x b) (geometric cross-product)
+	inline void setCross3(const LLVector4a& a, const LLVector4a& b);
+	
+	// Set all elements to the dot product of the x, y, and z elements in a and b
+	inline void setAllDot3(const LLVector4a& a, const LLVector4a& b);
+
+	// Set all elements to the dot product of the x, y, z, and w elements in a and b
+	inline void setAllDot4(const LLVector4a& a, const LLVector4a& b);
+
+	// Return the 3D dot product of this vector and b
+	inline LLSimdScalar dot3(const LLVector4a& b) const;
+
+	// Return the 4D dot product of this vector and b
+	inline LLSimdScalar dot4(const LLVector4a& b) const;
+
+	// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed
+	// Note that this does not consider zero length vectors!
+	inline void normalize3();
+
+	// Same as normalize3() but with respect to all 4 components
+	inline void normalize4();
+
+	// Same as normalize3(), but returns length as a SIMD scalar
+	inline LLSimdScalar normalize3withLength();
+
+	// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed
+	// Note that this does not consider zero length vectors!
+	inline void normalize3fast();
+
+	// Return true if this vector is normalized with respect to x,y,z up to tolerance
+	inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const;
+
+	// Return true if this vector is normalized with respect to all components up to tolerance
+	inline LLBool32 isNormalized4( F32 tolerance = 1e-3 ) const;
+
+	// Set all elements to the length of vector 'v' 
+	inline void setAllLength3( const LLVector4a& v );
+
+	// Get this vector's length
+	inline LLSimdScalar getLength3() const;
+	
+	// Set the components of this vector to the minimum of the corresponding components of lhs and rhs
+	inline void setMin(const LLVector4a& lhs, const LLVector4a& rhs);
+	
+	// Set the components of this vector to the maximum of the corresponding components of lhs and rhs
+	inline void setMax(const LLVector4a& lhs, const LLVector4a& rhs);
+	
+	// Clamps this vector to be within the component-wise range low to high (inclusive)
+	inline void clamp( const LLVector4a& low, const LLVector4a& high );
+
+	// Set this to  (c * lhs) + rhs * ( 1 - c)
+	inline void setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c);
+	
+	// Return true (nonzero) if x, y, z (and w for Finite4) are all finite floats
+	inline LLBool32 isFinite3() const;	
+	inline LLBool32 isFinite4() const;
+
+	// Set this vector to 'vec' rotated by the LLRotation or LLQuaternion2 provided
+	void setRotated( const LLRotation& rot, const LLVector4a& vec );
+	void setRotated( const class LLQuaternion2& quat, const LLVector4a& vec );
+
+	// Set this vector to 'vec' rotated by the INVERSE of the LLRotation or LLQuaternion2 provided
+	inline void setRotatedInv( const LLRotation& rot, const LLVector4a& vec );
+	inline void setRotatedInv( const class LLQuaternion2& quat, const LLVector4a& vec );
+
+	// Quantize this vector to 8 or 16 bit precision
+	void quantize8( const LLVector4a& low, const LLVector4a& high );
+	void quantize16( const LLVector4a& low, const LLVector4a& high );
+
+	////////////////////////////////////
+	// LOGICAL
+	////////////////////////////////////	
+	// The functions in this section will compare the elements in this vector
+	// to those in rhs and return an LLVector4Logical with all bits set in elements
+	// where the comparison was true and all bits unset in elements where the comparison
+	// was false. See llvector4logica.h
+	////////////////////////////////////
+	// WARNING: Other than equals3 and equals4, these functions do NOT account
+	// for floating point tolerance. You should include the appropriate tolerance
+	// in the inputs.
+	////////////////////////////////////
+	
+	inline LLVector4Logical greaterThan(const LLVector4a& rhs) const;
+
+	inline LLVector4Logical lessThan(const LLVector4a& rhs) const;
+	
+	inline LLVector4Logical greaterEqual(const LLVector4a& rhs) const;
+
+	inline LLVector4Logical lessEqual(const LLVector4a& rhs) const;
+	
+	inline LLVector4Logical equal(const LLVector4a& rhs) const;
+
+	// Returns true if this and rhs are componentwise equal up to the specified absolute tolerance
+	inline bool equals4(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const;
+
+	inline bool equals3(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const;
+
+	////////////////////////////////////
+	// OPERATORS
+	////////////////////////////////////	
+	
+	// Do NOT add aditional operators without consulting someone with SSE experience
+	inline const LLVector4a& operator= ( const LLVector4a& rhs );
+	
+	inline const LLVector4a& operator= ( const LLQuad& rhs );
+
+	inline operator LLQuad() const;	
+
+private:
+	LLQuad mQ;
+};
+
+inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p)
+{
+	min.setMin(min, p);
+	max.setMax(max, p);
+}
+
+#endif
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@ -0,0 +1,593 @@
+/** 
+ * @file llvector4a.inl
+ * @brief LLVector4a inline function implementations
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+////////////////////////////////////
+// LOAD/STORE
+////////////////////////////////////
+
+// Load from 16-byte aligned src array (preferred method of loading)
+inline void LLVector4a::load4a(const F32* src)
+{
+	mQ = _mm_load_ps(src);
+}
+
+// Load from unaligned src array (NB: Significantly slower than load4a)
+inline void LLVector4a::loadua(const F32* src)
+{
+	mQ = _mm_loadu_ps(src);
+}
+
+// Load only three floats beginning at address 'src'. Slowest method.
+inline void LLVector4a::load3(const F32* src)
+{
+	// mQ = { 0.f, src[2], src[1], src[0] } = { W, Z, Y, X }
+	// NB: This differs from the convention of { Z, Y, X, W }
+	mQ = _mm_set_ps(0.f, src[2], src[1], src[0]);
+}	
+
+// Store to a 16-byte aligned memory address
+inline void LLVector4a::store4a(F32* dst) const
+{
+	_mm_store_ps(dst, mQ);
+}
+
+////////////////////////////////////
+// BASIC GET/SET 
+////////////////////////////////////
+
+// Return a "this" as an F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
+F32* LLVector4a::getF32ptr()
+{
+	return (F32*) &mQ;
+}
+
+// Return a "this" as a const F32 pointer. Do not use unless you have a very good reason.  (Not sure? Ask Falcon)
+const F32* const LLVector4a::getF32ptr() const
+{
+	return (const F32* const) &mQ;
+}
+
+// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates
+// the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead
+inline F32 LLVector4a::operator[](const S32 idx) const
+{
+	return ((F32*)&mQ)[idx];
+}	
+
+// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time.
+inline LLSimdScalar LLVector4a::getScalarAt(const S32 idx) const
+{
+	// Return appropriate LLQuad. It will be cast to LLSimdScalar automatically (should be effectively a nop)
+	switch (idx)
+	{
+		case 0:
+			return mQ;
+		case 1:
+			return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(1, 1, 1, 1));
+		case 2:
+			return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(2, 2, 2, 2));
+		case 3:
+		default:
+			return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(3, 3, 3, 3));
+	}
+}
+
+// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time.
+template <int N> LL_FORCE_INLINE LLSimdScalar LLVector4a::getScalarAt() const
+{
+	return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(N, N, N, N));
+}
+
+template<> LL_FORCE_INLINE LLSimdScalar LLVector4a::getScalarAt<0>() const
+{
+	return mQ;
+}
+
+// Set to an x, y, z and optional w provided
+inline void LLVector4a::set(F32 x, F32 y, F32 z, F32 w)
+{
+	mQ = _mm_set_ps(w, z, y, x);
+}
+
+// Set to all zeros
+inline void LLVector4a::clear()
+{
+	mQ = LLVector4a::getZero().mQ;
+}
+
+inline void LLVector4a::splat(const F32 x)
+{
+	mQ = _mm_set1_ps(x);	
+}
+
+inline void LLVector4a::splat(const LLSimdScalar& x)
+{
+	mQ = _mm_shuffle_ps( x.getQuad(), x.getQuad(), _MM_SHUFFLE(0,0,0,0) );
+}
+
+// Set all 4 elements to element N of src, with N known at compile time
+template <int N> void LLVector4a::splat(const LLVector4a& src)
+{
+	mQ = _mm_shuffle_ps(src.mQ, src.mQ, _MM_SHUFFLE(N, N, N, N) );
+}
+
+// Set all 4 elements to element i of v, with i NOT known at compile time
+inline void LLVector4a::splat(const LLVector4a& v, U32 i)
+{
+	switch (i)
+	{
+		case 0:
+			mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(0, 0, 0, 0));
+			break;
+		case 1:
+			mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(1, 1, 1, 1));
+			break;
+		case 2:
+			mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(2, 2, 2, 2));
+			break;
+		case 3:
+			mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(3, 3, 3, 3));
+			break;
+	}
+}
+
+// Select bits from sourceIfTrue and sourceIfFalse according to bits in mask
+inline void LLVector4a::setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse )
+{
+	// ((( sourceIfTrue ^ sourceIfFalse ) & mask) ^ sourceIfFalse )
+	// E.g., sourceIfFalse = 1010b, sourceIfTrue = 0101b, mask = 1100b
+	// (sourceIfTrue ^ sourceIfFalse) = 1111b --> & mask = 1100b --> ^ sourceIfFalse = 0110b, 
+	// as expected (01 from sourceIfTrue, 10 from sourceIfFalse)
+	// Courtesy of Mark++, http://markplusplus.wordpress.com/2007/03/14/fast-sse-select-operation/
+	mQ = _mm_xor_ps( sourceIfFalse, _mm_and_ps( mask, _mm_xor_ps( sourceIfTrue, sourceIfFalse ) ) );
+}
+
+////////////////////////////////////
+// ALGEBRAIC
+////////////////////////////////////
+
+// Set this to the element-wise (a + b)
+inline void LLVector4a::setAdd(const LLVector4a& a, const LLVector4a& b)
+{
+	mQ = _mm_add_ps(a.mQ, b.mQ);
+}
+
+// Set this to element-wise (a - b)
+inline void LLVector4a::setSub(const LLVector4a& a, const LLVector4a& b)
+{
+	mQ = _mm_sub_ps(a.mQ, b.mQ);
+}
+
+// Set this to element-wise multiply (a * b)
+inline void LLVector4a::setMul(const LLVector4a& a, const LLVector4a& b)
+{
+	mQ = _mm_mul_ps(a.mQ, b.mQ);
+}
+
+// Set this to element-wise quotient (a / b)
+inline void LLVector4a::setDiv(const LLVector4a& a, const LLVector4a& b)
+{
+	mQ = _mm_div_ps( a.mQ, b.mQ );
+}
+
+// Set this to the element-wise absolute value of src
+inline void LLVector4a::setAbs(const LLVector4a& src)
+{
+	static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF };
+	mQ = _mm_and_ps(src.mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A));
+}
+
+// Add to each component in this vector the corresponding component in rhs
+inline void LLVector4a::add(const LLVector4a& rhs)
+{
+	mQ = _mm_add_ps(mQ, rhs.mQ);	
+}
+
+// Subtract from each component in this vector the corresponding component in rhs
+inline void LLVector4a::sub(const LLVector4a& rhs)
+{
+	mQ = _mm_sub_ps(mQ, rhs.mQ);
+}
+
+// Multiply each component in this vector by the corresponding component in rhs
+inline void LLVector4a::mul(const LLVector4a& rhs)
+{
+	mQ = _mm_mul_ps(mQ, rhs.mQ);	
+}
+
+// Divide each component in this vector by the corresponding component in rhs
+inline void LLVector4a::div(const LLVector4a& rhs)
+{
+	// TODO: Check accuracy, maybe add divFast
+	mQ = _mm_div_ps(mQ, rhs.mQ);
+}
+
+// Multiply this vector by x in a scalar fashion
+inline void LLVector4a::mul(const F32 x) 
+{
+	LLVector4a t;
+	t.splat(x);
+	
+	mQ = _mm_mul_ps(mQ, t.mQ);
+}
+
+// Set this to (a x b) (geometric cross-product)
+inline void LLVector4a::setCross3(const LLVector4a& a, const LLVector4a& b)
+{
+	// Vectors are stored in memory in w, z, y, x order from high to low
+	// Set vector1 = { a[W], a[X], a[Z], a[Y] }
+	const LLQuad vector1 = _mm_shuffle_ps( a.mQ, a.mQ, _MM_SHUFFLE( 3, 0, 2, 1 ));
+	// Set vector2 = { b[W], b[Y], b[X], b[Z] }
+	const LLQuad vector2 = _mm_shuffle_ps( b.mQ, b.mQ, _MM_SHUFFLE( 3, 1, 0, 2 ));
+	// mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] }
+	mQ = _mm_mul_ps( vector1, vector2 );
+	// vector3 = { a[W], a[Y], a[X], a[Z] }
+	const LLQuad vector3 = _mm_shuffle_ps( a.mQ, a.mQ, _MM_SHUFFLE( 3, 1, 0, 2 ));
+	// vector4 = { b[W], b[X], b[Z], b[Y] }
+	const LLQuad vector4 = _mm_shuffle_ps( b.mQ, b.mQ, _MM_SHUFFLE( 3, 0, 2, 1 ));
+	// mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] }
+	mQ = _mm_sub_ps( mQ, _mm_mul_ps( vector3, vector4 ));
+}
+
+/* This function works, but may be slightly slower than the one below on older machines
+ inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b)
+ {
+ // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] }
+ const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ );
+ // yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] }
+ const LLQuad wzxy = _mm_shuffle_ps( ab, ab, _MM_SHUFFLE(3, 2, 0, 1 ));
+ // xPlusY = { 2*a[W]*b[W], 2 * a[Z] * b[Z], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] }
+ const LLQuad xPlusY = _mm_add_ps(ab, wzxy);
+ // xPlusYSplat = { a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } 
+ const LLQuad xPlusYSplat = _mm_movelh_ps(xPlusY, xPlusY);
+ // zSplat = { a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z] }
+ const LLQuad zSplat = _mm_shuffle_ps( ab, ab, _MM_SHUFFLE( 2, 2, 2, 2 ));
+ // mQ = { a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same }
+ mQ = _mm_add_ps(zSplat, xPlusYSplat);
+ }*/
+
+// Set all elements to the dot product of the x, y, and z elements in a and b
+inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b)
+{
+	// ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] }
+	const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ );
+	// yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] }
+	const __m128i wzxy = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE(3, 2, 0, 1 ));
+	// xPlusY = { 2*a[W]*b[W], 2 * a[Z] * b[Z], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] }
+	const LLQuad xPlusY = _mm_add_ps(ab, _mm_castsi128_ps(wzxy));
+	// xPlusYSplat = { a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } 
+	const LLQuad xPlusYSplat = _mm_movelh_ps(xPlusY, xPlusY);
+	// zSplat = { a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z] }
+	const __m128i zSplat = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE( 2, 2, 2, 2 ));
+	// mQ = { a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same }
+	mQ = _mm_add_ps(_mm_castsi128_ps(zSplat), xPlusYSplat);
+}
+
+// Set all elements to the dot product of the x, y, z, and w elements in a and b
+inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b)
+{
+	// ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] }
+	const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ );
+	// yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] }
+	const __m128i zwxy = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE(2, 3, 0, 1 ));
+	// zPlusWandXplusY = { a[W]*b[W] + a[Z]*b[Z], a[Z] * b[Z] + a[W]*b[W], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] }
+	const LLQuad zPlusWandXplusY = _mm_add_ps(ab, _mm_castsi128_ps(zwxy));
+	// xPlusYSplat = { a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } 
+	const LLQuad xPlusYSplat = _mm_movelh_ps(zPlusWandXplusY, zPlusWandXplusY);
+	const LLQuad zPlusWSplat = _mm_movehl_ps(zPlusWandXplusY, zPlusWandXplusY);
+
+	// mQ = { a[W]*b[W] + a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same }
+	mQ = _mm_add_ps(xPlusYSplat, zPlusWSplat);
+}
+
+// Return the 3D dot product of this vector and b
+inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const
+{
+	const LLQuad ab = _mm_mul_ps( mQ, b.mQ );
+	const LLQuad splatY = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(1, 1, 1, 1) ) );
+	const LLQuad splatZ = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(2, 2, 2, 2) ) );
+	const LLQuad xPlusY = _mm_add_ps( ab, splatY );
+	return _mm_add_ps( xPlusY, splatZ );	
+}
+
+// Return the 4D dot product of this vector and b
+inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const
+{
+	// ab = { w, z, y, x }
+ 	const LLQuad ab = _mm_mul_ps( mQ, b.mQ );
+ 	// upperProdsInLowerElems = { y, x, y, x }
+	const LLQuad upperProdsInLowerElems = _mm_movehl_ps( ab, ab );
+	// sumOfPairs = { w+y, z+x, 2y, 2x }
+ 	const LLQuad sumOfPairs = _mm_add_ps( upperProdsInLowerElems, ab );
+	// shuffled = { z+x, z+x, z+x, z+x }
+	const LLQuad shuffled = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( sumOfPairs ), _MM_SHUFFLE(1, 1, 1, 1) ) );
+	return _mm_add_ss( sumOfPairs, shuffled );
+}
+
+// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed
+// Note that this does not consider zero length vectors!
+inline void LLVector4a::normalize3()
+{
+	// lenSqrd = a dot a
+	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
+	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
+	const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
+	static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f };
+	static const LLQuad three = {3.f, 3.f, 3.f, 3.f };
+	// Now we do one round of Newton-Raphson approximation to get full accuracy
+	// According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a))
+	// the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3))
+	// w[i+1] = w + 0.5 * (1/w^2 - a) * w^3 = w + 0.5 * (w - a*w^3) = 1.5 * w - 0.5 * a * w^3
+	// = 0.5 * w * (3 - a*w^2)
+	// Our first approx is w = rsqrt. We need out = a * w[i+1] (this is the input vector 'a', not the 'a' from the above formula
+	// which is actually lenSqrd). So out = a * [0.5*rsqrt * (3 - lenSqrd*rsqrt*rsqrt)]
+	const LLQuad AtimesRsqrt = _mm_mul_ps( lenSqrd.mQ, rsqrt );
+	const LLQuad AtimesRsqrtTimesRsqrt = _mm_mul_ps( AtimesRsqrt, rsqrt );
+	const LLQuad threeMinusAtimesRsqrtTimesRsqrt = _mm_sub_ps(three, AtimesRsqrtTimesRsqrt );
+	const LLQuad nrApprox = _mm_mul_ps(half, _mm_mul_ps(rsqrt, threeMinusAtimesRsqrtTimesRsqrt));
+	mQ = _mm_mul_ps( mQ, nrApprox );
+}
+
+// Normalize this vector with respect to all components. Accurate to 22 bites of precision.
+// Note that this does not consider zero length vectors!
+inline void LLVector4a::normalize4()
+{
+	// lenSqrd = a dot a
+	LLVector4a lenSqrd; lenSqrd.setAllDot4( *this, *this );
+	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
+	const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
+	static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f };
+	static const LLQuad three = {3.f, 3.f, 3.f, 3.f };
+	// Now we do one round of Newton-Raphson approximation to get full accuracy
+	// According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a))
+	// the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3))
+	// w[i+1] = w + 0.5 * (1/w^2 - a) * w^3 = w + 0.5 * (w - a*w^3) = 1.5 * w - 0.5 * a * w^3
+	// = 0.5 * w * (3 - a*w^2)
+	// Our first approx is w = rsqrt. We need out = a * w[i+1] (this is the input vector 'a', not the 'a' from the above formula
+	// which is actually lenSqrd). So out = a * [0.5*rsqrt * (3 - lenSqrd*rsqrt*rsqrt)]
+	const LLQuad AtimesRsqrt = _mm_mul_ps( lenSqrd.mQ, rsqrt );
+	const LLQuad AtimesRsqrtTimesRsqrt = _mm_mul_ps( AtimesRsqrt, rsqrt );
+	const LLQuad threeMinusAtimesRsqrtTimesRsqrt = _mm_sub_ps(three, AtimesRsqrtTimesRsqrt );
+	const LLQuad nrApprox = _mm_mul_ps(half, _mm_mul_ps(rsqrt, threeMinusAtimesRsqrtTimesRsqrt));
+	mQ = _mm_mul_ps( mQ, nrApprox );
+}
+
+// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed
+// Note that this does not consider zero length vectors!
+inline LLSimdScalar LLVector4a::normalize3withLength()
+{
+	// lenSqrd = a dot a
+	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
+	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
+	const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
+	static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f };
+	static const LLQuad three = {3.f, 3.f, 3.f, 3.f };
+	// Now we do one round of Newton-Raphson approximation to get full accuracy
+	// According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a))
+	// the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3))
+	// w[i+1] = w + 0.5 * (1/w^2 - a) * w^3 = w + 0.5 * (w - a*w^3) = 1.5 * w - 0.5 * a * w^3
+	// = 0.5 * w * (3 - a*w^2)
+	// Our first approx is w = rsqrt. We need out = a * w[i+1] (this is the input vector 'a', not the 'a' from the above formula
+	// which is actually lenSqrd). So out = a * [0.5*rsqrt * (3 - lenSqrd*rsqrt*rsqrt)]
+	const LLQuad AtimesRsqrt = _mm_mul_ps( lenSqrd.mQ, rsqrt );
+	const LLQuad AtimesRsqrtTimesRsqrt = _mm_mul_ps( AtimesRsqrt, rsqrt );
+	const LLQuad threeMinusAtimesRsqrtTimesRsqrt = _mm_sub_ps(three, AtimesRsqrtTimesRsqrt );
+	const LLQuad nrApprox = _mm_mul_ps(half, _mm_mul_ps(rsqrt, threeMinusAtimesRsqrtTimesRsqrt));
+	mQ = _mm_mul_ps( mQ, nrApprox );
+	return _mm_sqrt_ss(lenSqrd);
+}
+
+// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed
+// Note that this does not consider zero length vectors!
+inline void LLVector4a::normalize3fast()
+{
+	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
+	const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
+	mQ = _mm_mul_ps( mQ, approxRsqrt );
+}
+
+// Return true if this vector is normalized with respect to x,y,z up to tolerance
+inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const
+{
+	static LL_ALIGN_16(const U32 ones[4]) = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
+	LLSimdScalar tol = _mm_load_ss( &tolerance );
+	tol = _mm_mul_ss( tol, tol );
+	LLVector4a lenSquared; lenSquared.setAllDot3( *this, *this );
+	lenSquared.sub( *reinterpret_cast<const LLVector4a*>(ones) );
+	lenSquared.setAbs(lenSquared);
+	return _mm_comile_ss( lenSquared, tol );		
+}
+
+// Return true if this vector is normalized with respect to all components up to tolerance
+inline LLBool32 LLVector4a::isNormalized4( F32 tolerance ) const
+{
+	static LL_ALIGN_16(const U32 ones[4]) = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
+	LLSimdScalar tol = _mm_load_ss( &tolerance );
+	tol = _mm_mul_ss( tol, tol );
+	LLVector4a lenSquared; lenSquared.setAllDot4( *this, *this );
+	lenSquared.sub( *reinterpret_cast<const LLVector4a*>(ones) );
+	lenSquared.setAbs(lenSquared);
+	return _mm_comile_ss( lenSquared, tol );		
+}
+
+// Set all elements to the length of vector 'v' 
+inline void LLVector4a::setAllLength3( const LLVector4a& v )
+{
+	LLVector4a lenSqrd;
+	lenSqrd.setAllDot3(v, v);
+	
+	mQ = _mm_sqrt_ps(lenSqrd.mQ);
+}
+
+// Get this vector's length
+inline LLSimdScalar LLVector4a::getLength3() const
+{
+	return _mm_sqrt_ss( dot3( (const LLVector4a)mQ ) );
+}
+
+// Set the components of this vector to the minimum of the corresponding components of lhs and rhs
+inline void LLVector4a::setMin(const LLVector4a& lhs, const LLVector4a& rhs)
+{
+	mQ = _mm_min_ps(lhs.mQ, rhs.mQ);
+}
+
+// Set the components of this vector to the maximum of the corresponding components of lhs and rhs
+inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs)
+{
+	mQ = _mm_max_ps(lhs.mQ, rhs.mQ);
+}
+
+// Set this to  (c * lhs) + rhs * ( 1 - c)
+inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c)
+{
+	LLVector4a a = lhs;
+	a.mul(c);
+	
+	LLVector4a b = rhs;
+	b.mul(1.f-c);
+	
+	setAdd(a, b);
+}
+
+inline LLBool32 LLVector4a::isFinite3() const
+{
+	static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+	const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask);
+	const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV );
+	const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV ));
+	return !equalityCheck.areAnySet( LLVector4Logical::MASK_XYZ );
+}
+	
+inline LLBool32 LLVector4a::isFinite4() const
+{
+	static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+	const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask);
+	const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV );
+	const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV ));
+	return !equalityCheck.areAnySet( LLVector4Logical::MASK_XYZW );
+}
+
+inline void LLVector4a::setRotatedInv( const LLRotation& rot, const LLVector4a& vec )
+{
+	LLRotation inv; inv.setTranspose( rot );
+	setRotated( inv, vec );
+}
+
+inline void LLVector4a::setRotatedInv( const LLQuaternion2& quat, const LLVector4a& vec )
+{
+	LLQuaternion2 invRot; invRot.setConjugate( quat );
+	setRotated(invRot, vec);
+}
+
+inline void LLVector4a::clamp( const LLVector4a& low, const LLVector4a& high )
+{
+	const LLVector4Logical highMask = greaterThan( high );
+	const LLVector4Logical lowMask = lessThan( low );
+
+	setSelectWithMask( highMask, high, *this );
+	setSelectWithMask( lowMask, low, *this );
+}
+
+
+////////////////////////////////////
+// LOGICAL
+////////////////////////////////////	
+// The functions in this section will compare the elements in this vector
+// to those in rhs and return an LLVector4Logical with all bits set in elements
+// where the comparison was true and all bits unset in elements where the comparison
+// was false. See llvector4logica.h
+////////////////////////////////////
+// WARNING: Other than equals3 and equals4, these functions do NOT account
+// for floating point tolerance. You should include the appropriate tolerance
+// in the inputs.
+////////////////////////////////////
+
+inline LLVector4Logical LLVector4a::greaterThan(const LLVector4a& rhs) const
+{	
+	return _mm_cmpgt_ps(mQ, rhs.mQ);
+}
+
+inline LLVector4Logical LLVector4a::lessThan(const LLVector4a& rhs) const
+{
+	return _mm_cmplt_ps(mQ, rhs.mQ);
+}
+
+inline LLVector4Logical LLVector4a::greaterEqual(const LLVector4a& rhs) const
+{
+	return _mm_cmpge_ps(mQ, rhs.mQ);
+}
+
+inline LLVector4Logical LLVector4a::lessEqual(const LLVector4a& rhs) const
+{
+	return _mm_cmple_ps(mQ, rhs.mQ);
+}
+
+inline LLVector4Logical LLVector4a::equal(const LLVector4a& rhs) const
+{
+	return _mm_cmpeq_ps(mQ, rhs.mQ);
+}
+
+// Returns true if this and rhs are componentwise equal up to the specified absolute tolerance
+inline bool LLVector4a::equals4(const LLVector4a& rhs, F32 tolerance ) const
+{
+	LLVector4a diff; diff.setSub( *this, rhs );
+	diff.setAbs( diff );
+	const LLQuad tol = _mm_set1_ps( tolerance );
+	const LLQuad cmp = _mm_cmplt_ps( diff, tol );
+	return (_mm_movemask_ps( cmp ) & LLVector4Logical::MASK_XYZW) == LLVector4Logical::MASK_XYZW;
+}
+
+inline bool LLVector4a::equals3(const LLVector4a& rhs, F32 tolerance ) const
+{
+	LLVector4a diff; diff.setSub( *this, rhs );
+	diff.setAbs( diff );
+	const LLQuad tol = _mm_set1_ps( tolerance );
+	const LLQuad t = _mm_cmplt_ps( diff, tol ); 
+	return (_mm_movemask_ps( t ) & LLVector4Logical::MASK_XYZ) == LLVector4Logical::MASK_XYZ;
+	
+}
+
+////////////////////////////////////
+// OPERATORS
+////////////////////////////////////	
+
+// Do NOT add aditional operators without consulting someone with SSE experience
+inline const LLVector4a& LLVector4a::operator= ( const LLVector4a& rhs )
+{
+	mQ = rhs.mQ;
+	return *this;
+}
+
+inline const LLVector4a& LLVector4a::operator= ( const LLQuad& rhs )
+{
+	mQ = rhs;
+	return *this;
+}
+
+inline LLVector4a::operator LLQuad() const
+{
+	return mQ;
+}
--- a/indra/llmath/llvector4logical.h
+++ b/indra/llmath/llvector4logical.h
@ -0,0 +1,124 @@
+/** 
+ * @file llvector4logical.h
+ * @brief LLVector4Logical class header file - Companion class to LLVector4a for logical and bit-twiddling operations
+ *
+ * $LicenseInfo:firstyear=2010&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef	LL_VECTOR4LOGICAL_H
+#define	LL_VECTOR4LOGICAL_H
+
+
+////////////////////////////
+// LLVector4Logical
+////////////////////////////
+// This class is incomplete. If you need additional functionality,
+// for example setting/unsetting particular elements or performing
+// other boolean operations, feel free to implement. If you need
+// assistance in determining the most optimal implementation,
+// contact someone with SSE experience (Falcon, Richard, Davep, e.g.)
+////////////////////////////
+
+static LL_ALIGN_16(const U32 S_V4LOGICAL_MASK_TABLE[4*4]) =
+{
+	0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF
+};
+
+class LLVector4Logical
+{
+public:
+	
+	enum {
+		MASK_X = 1,
+		MASK_Y = 1 << 1,
+		MASK_Z = 1 << 2,
+		MASK_W = 1 << 3,
+		MASK_XYZ = MASK_X | MASK_Y | MASK_Z,
+		MASK_XYZW = MASK_XYZ | MASK_W
+	};
+	
+	// Empty default ctor
+	LLVector4Logical() {}
+	
+	LLVector4Logical( const LLQuad& quad )
+	{
+		mQ = quad;
+	}
+	
+	// Create and return a mask consisting of the lowest order bit of each element
+	inline U32 getGatheredBits() const
+	{
+		return _mm_movemask_ps(mQ);
+	};	
+	
+	// Invert this mask
+	inline LLVector4Logical& invert()
+	{
+		static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+		mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) );
+		return *this;
+	}
+	
+	inline LLBool32 areAllSet( U32 mask ) const
+	{
+		return ( getGatheredBits() & mask) == mask;
+	}
+	
+	inline LLBool32 areAllSet() const
+	{
+		return areAllSet( MASK_XYZW );
+	}
+		
+	inline LLBool32 areAnySet( U32 mask ) const
+	{
+		return getGatheredBits() & mask;
+	}
+	
+	inline LLBool32 areAnySet() const
+	{
+		return areAnySet( MASK_XYZW );
+	}
+	
+	inline operator LLQuad() const
+	{
+		return mQ;
+	}
+
+	inline void clear() 
+	{
+		mQ = _mm_setzero_ps();
+	}
+
+	template<int N> void setElement()
+	{
+		mQ = _mm_or_ps( mQ, *reinterpret_cast<const LLQuad*>(S_V4LOGICAL_MASK_TABLE + 4*N) );
+	}
+	
+private:
+	
+	LLQuad mQ;
+};
+
+#endif //LL_VECTOR4ALOGICAL_H
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@ -34,8 +34,13 @@ class LLPathParams;
 class LLVolumeParams;
 class LLProfile;
 class LLPath;
+
+template <class T> class LLOctreeNode;
+
+class LLVector4a;
 class LLVolumeFace;
 class LLVolume;
+class LLVolumeTriangle;

 #include "lldarray.h"
 #include "lluuid.h"
@ -43,6 +48,8 @@ class LLVolume;
 //#include "vmath.h"
 #include "v2math.h"
 #include "v3math.h"
+#include "v3dmath.h"
+#include "v4math.h"
 #include "llquaternion.h"
 #include "llstrider.h"
 #include "v4coloru.h"
@ -177,12 +184,14 @@ const U8 LL_SCULPT_TYPE_SPHERE    = 1;
 const U8 LL_SCULPT_TYPE_TORUS     = 2;
 const U8 LL_SCULPT_TYPE_PLANE     = 3;
 const U8 LL_SCULPT_TYPE_CYLINDER  = 4;
-
-const U8 LL_SCULPT_TYPE_MASK      = LL_SCULPT_TYPE_SPHERE | LL_SCULPT_TYPE_TORUS | LL_SCULPT_TYPE_PLANE | LL_SCULPT_TYPE_CYLINDER;
+const U8 LL_SCULPT_TYPE_MESH      = 5;
+const U8 LL_SCULPT_TYPE_MASK      = LL_SCULPT_TYPE_SPHERE | LL_SCULPT_TYPE_TORUS | LL_SCULPT_TYPE_PLANE |
+	LL_SCULPT_TYPE_CYLINDER | LL_SCULPT_TYPE_MESH;

 const U8 LL_SCULPT_FLAG_INVERT    = 64;
 const U8 LL_SCULPT_FLAG_MIRROR    = 128;

+const S32 LL_SCULPT_MESH_MAX_FACES = 8;

 class LLProfileParams
 {
@ -569,6 +578,9 @@ public:
 	BOOL importLegacyStream(std::istream& input_stream);
 	BOOL exportLegacyStream(std::ostream& output_stream) const;

+	LLSD sculptAsLLSD() const;
+	bool sculptFromLLSD(LLSD& sd);
+	
 	LLSD asLLSD() const;
 	operator LLSD() const { return asLLSD(); }
 	bool fromLLSD(LLSD& sd);
@ -628,7 +640,8 @@ public:
 	const F32&  getSkew() const			{ return mPathParams.getSkew();			}
 	const LLUUID& getSculptID() const	{ return mSculptID;						}
 	const U8& getSculptType() const     { return mSculptType;                   }
-
+	bool isSculpt() const;
+	bool isMeshSculpt() const;
 	BOOL isConvex() const;

 	// 'begin' and 'end' should be in range [0, 1] (they will be clamped)
@ -779,30 +792,88 @@ public:
 class LLVolumeFace
 {
 public:
-	LLVolumeFace() : 
-		mID(0),
-		mTypeMask(0),
-		mHasBinormals(FALSE),
-		mBeginS(0),
-		mBeginT(0),
-		mNumS(0),
-		mNumT(0)
+	class VertexData
 	{
-	}
+		enum 
+		{
+			POSITION = 0,
+			NORMAL = 1
+		};
+
+	private:
+		void init();
+	public:
+		VertexData();
+		VertexData(const VertexData& rhs);
+		const VertexData& operator=(const VertexData& rhs);
+
+		~VertexData();
+		LLVector4a& getPosition();
+		LLVector4a& getNormal();
+		const LLVector4a& getPosition() const;
+		const LLVector4a& getNormal() const;
+		void setPosition(const LLVector4a& pos);
+		void setNormal(const LLVector4a& norm);
+		
+
+		LLVector2 mTexCoord;
+
+		bool operator<(const VertexData& rhs) const;
+		bool operator==(const VertexData& rhs) const;
+		bool compareNormal(const VertexData& rhs, F32 angle_cutoff) const;
+
+	private:
+		LLVector4a* mData;
+	};
+
+	LLVolumeFace();
+	LLVolumeFace(const LLVolumeFace& src);
+	LLVolumeFace& operator=(const LLVolumeFace& rhs);
+
+	~LLVolumeFace();
+private:
+	void freeData();
+public:

 	BOOL create(LLVolume* volume, BOOL partial_build = FALSE);
 	void createBinormals();
-	void makeTriStrip();
 	
-	class VertexData
+	void appendFace(const LLVolumeFace& face, LLMatrix4& transform, LLMatrix4& normal_tranform);
+
+	void resizeVertices(S32 num_verts);
+	void allocateBinormals(S32 num_verts);
+	void allocateWeights(S32 num_verts);
+	void resizeIndices(S32 num_indices);
+	void fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx);
+
+	void pushVertex(const VertexData& cv);
+	void pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc);
+	void pushIndex(const U16& idx);
+
+	void swapData(LLVolumeFace& rhs);
+
+	void getVertexData(U16 indx, LLVolumeFace::VertexData& cv);
+
+	class VertexMapData : public LLVolumeFace::VertexData
 	{
 	public:
-		LLVector3 mPosition;
-		LLVector3 mNormal;
-		LLVector3 mBinormal;
-		LLVector2 mTexCoord;
+		U16 mIndex;
+
+		bool operator==(const LLVolumeFace::VertexData& rhs) const;
+
+		struct ComparePosition
+		{
+			bool operator()(const LLVector3& a, const LLVector3& b) const;
+		};
+
+		typedef std::map<LLVector3, std::vector<VertexMapData>, VertexMapData::ComparePosition > PointMap;
 	};

+	void optimize(F32 angle_cutoff = 2.f);
+	void cacheOptimize();
+
+	void createOctree(F32 scaler = 0.25f, const LLVector4a& center = LLVector4a(0,0,0), const LLVector4a& size = LLVector4a(0.5f,0.5f,0.5f));
+
 	enum
 	{
 		SINGLE_MASK =	0x0001,
@ -821,23 +892,35 @@ public:
 public:
 	S32 mID;
 	U32 mTypeMask;
-	LLVector3 mCenter;
-	BOOL mHasBinormals;
-
+	
 	// Only used for INNER/OUTER faces
 	S32 mBeginS;
 	S32 mBeginT;
 	S32 mNumS;
 	S32 mNumT;

-	LLVector3 mExtents[2]; //minimum and maximum point of face
-	LLVector2 mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face.
+	LLVector4a* mExtents; //minimum and maximum point of face
+	LLVector4a* mCenter;
+	LLVector2   mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face.
+
+	S32 mNumVertices;
+	S32 mNumIndices;
+
+	LLVector4a* mPositions;
+	LLVector4a* mNormals;
+	LLVector4a* mBinormals;
+	LLVector2*  mTexCoords;
+	U16* mIndices;

-	std::vector<VertexData> mVertices;
-	std::vector<U16>	mIndices;
-	std::vector<U16>	mTriStrip;
 	std::vector<S32>	mEdge;

+	//list of skin weights for rigged volumes
+	// format is mWeights[vertex_index].mV[influence] = <joint_index>.<weight>
+	// mWeights.size() should be empty or match mVertices.size()  
+	LLVector4a* mWeights;
+
+	LLOctreeNode<LLVolumeTriangle>* mOctree;
+
 private:
 	BOOL createUnCutCubeCap(LLVolume* volume, BOOL partial_build = FALSE);
 	BOOL createCap(LLVolume* volume, BOOL partial_build = FALSE);
@ -848,8 +931,7 @@ class LLVolume : public LLRefCount
 {
 	friend class LLVolumeLODGroup;

-private:
-	LLVolume(const LLVolume&);  // Don't implement
+protected:
 	~LLVolume(); // use unref

 public:
@ -871,7 +953,7 @@ public:
 	
 	U8 getProfileType()	const								{ return mParams.getProfileParams().getCurveType(); }
 	U8 getPathType() const									{ return mParams.getPathParams().getCurveType(); }
-	S32	getNumFaces() const									{ return (S32)mProfilep->mFaces.size(); }
+	S32	getNumFaces() const;
 	S32 getNumVolumeFaces() const							{ return mVolumeFaces.size(); }
 	F32 getDetail() const									{ return mDetail; }
 	const LLVolumeParams& getParams() const					{ return mParams; }
@ -893,15 +975,17 @@ public:
 	BOOL isUnique() const									{ return mUnique; }

 	S32 getSculptLevel() const                              { return mSculptLevel; }
-	
+	void setSculptLevel(S32 level)							{ mSculptLevel = level; }
+
 	S32 *getTriangleIndices(U32 &num_indices) const;

 	// returns number of triangle indeces required for path/profile mesh
 	S32 getNumTriangleIndices() const;

+	S32 getNumTriangles() const;
+
 	void generateSilhouetteVertices(std::vector<LLVector3> &vertices, 
 									std::vector<LLVector3> &normals, 
-									std::vector<S32> &segments, 
 									const LLVector3& view_vec,
 									const LLMatrix4& mat,
 									const LLMatrix3& norm_mat,
@ -917,6 +1001,13 @@ public:
 							 LLVector3* normal = NULL,               // return the surface normal at the intersection point
 							 LLVector3* bi_normal = NULL             // return the surface bi-normal at the intersection point
 		);
+
+	S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, 
+								   S32 face = 1,
+								   LLVector3* intersection = NULL,
+								   LLVector2* tex_coord = NULL,
+								   LLVector3* normal = NULL,
+								   LLVector3* bi_normal = NULL);
 	
 	// The following cleans up vertices and triangles,
 	// getting rid of degenerate triangles and duplicate vertices,
@ -938,11 +1029,14 @@ public:
 	friend std::ostream& operator<<(std::ostream &s, const LLVolume *volumep);		// HACK to bypass Windoze confusion over 
 																				// conversion if *(LLVolume*) to LLVolume&
 	const LLVolumeFace &getVolumeFace(const S32 f) const {return mVolumeFaces[f];} // DO NOT DELETE VOLUME WHILE USING THIS REFERENCE, OR HOLD A POINTER TO THIS VOLUMEFACE
-
+	
 	U32					mFaceMask;			// bit array of which faces exist in this volume
 	LLVector3			mLODScaleBias;		// vector for biasing LOD based on scale
 	
 	void sculpt(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data, S32 sculpt_level);
+	void copyVolumeFaces(const LLVolume* volume);
+	void cacheOptimize();
+
 private:
 	void sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data, U8 sculpt_type);
 	F32 sculptGetSurfaceArea();
@ -953,35 +1047,56 @@ private:
 protected:
 	BOOL generate();
 	void createVolumeFaces();
+public:
+	virtual bool unpackVolumeFaces(std::istream& is, S32 size);
+
+	virtual void makeTetrahedron();
+	virtual BOOL isTetrahedron();

 protected:
 	BOOL mUnique;
 	F32 mDetail;
 	S32 mSculptLevel;
+	BOOL mIsTetrahedron;
 	
 	LLVolumeParams mParams;
 	LLPath *mPathp;
 	LLProfile *mProfilep;
 	std::vector<Point> mMesh;
-
+	
 	BOOL mGenerateSingleFace;
 	typedef std::vector<LLVolumeFace> face_list_t;
 	face_list_t mVolumeFaces;
+
+public:
+	LLVector4a* mHullPoints;
+	U16* mHullIndices;
+	S32 mNumHullPoints;
+	S32 mNumHullIndices;
 };

 std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params);

-LLVector3 calc_binormal_from_triangle(
-		const LLVector3& pos0,
+void calc_binormal_from_triangle(
+		LLVector4a& binormal,
+		const LLVector4a& pos0,
 		const LLVector2& tex0,
-		const LLVector3& pos1,
+		const LLVector4a& pos1,
 		const LLVector2& tex1,
-		const LLVector3& pos2,
+		const LLVector4a& pos2,
 		const LLVector2& tex2);

+BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* center, const F32* size);
 BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size);
+BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size);
+
 BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir,
-							F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided);
+							F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided);
+
+BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir,
+							F32& intersection_a, F32& intersection_b, F32& intersection_t);
+BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir,
+							F32& intersection_a, F32& intersection_b, F32& intersection_t);
 	
 	

--- a/indra/llmath/llvolumemgr.cpp
+++ b/indra/llmath/llvolumemgr.cpp
@ -314,7 +314,7 @@ BOOL LLVolumeLODGroup::derefLOD(LLVolume *volumep)
 		{
 			llassert_always(mLODRefs[i] > 0);
 			mLODRefs[i]--;
-#if 1 // SJB: Possible opt: keep other lods around
+#if 0 // SJB: Possible opt: keep other lods around
 			if (!mLODRefs[i])
 			{
 				mVolumeLODs[i] = NULL;
@ -369,6 +369,19 @@ F32 LLVolumeLODGroup::getVolumeScaleFromDetail(const S32 detail)
 	return mDetailScales[detail];
 }

+S32 LLVolumeLODGroup::getVolumeDetailFromScale(const F32 detail)
+{
+	for (S32 i = 1; i < 4; i++)
+	{
+		if (mDetailScales[i] > detail)
+		{
+			return i-1;
+		}
+	}
+
+	return 3;
+}
+
 F32 LLVolumeLODGroup::dump()
 {
 	F32 usage = 0.f;
--- a/indra/llmath/llvolumemgr.h
+++ b/indra/llmath/llvolumemgr.h
@ -53,6 +53,7 @@ public:
 	static S32 getDetailFromTan(const F32 tan_angle);
 	static void getDetailProximity(const F32 tan_angle, F32 &to_lower, F32& to_higher);
 	static F32 getVolumeScaleFromDetail(const S32 detail);
+	static S32 getVolumeDetailFromScale(F32 scale);

 	LLVolume* refLOD(const S32 detail);
 	BOOL derefLOD(LLVolume *volumep);
--- a/indra/llmath/llvolumeoctree.cpp
+++ b/indra/llmath/llvolumeoctree.cpp
@ -0,0 +1,256 @@
+/** 
+
+ * @file llvolumeoctree.cpp
+ *
+ * $LicenseInfo:firstyear=2002&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#include "llvolumeoctree.h"
+#include "llvector4a.h"
+
+BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size)
+{
+	LLVector4a fAWdU;
+	LLVector4a dir;
+	LLVector4a diff;
+
+	dir.setSub(end, start);
+	dir.mul(0.5f);
+
+	diff.setAdd(end,start);
+	diff.mul(0.5f);
+	diff.sub(center);
+	fAWdU.setAbs(dir); 
+
+	LLVector4a rhs;
+	rhs.setAdd(size, fAWdU);
+
+	LLVector4a lhs;
+	lhs.setAbs(diff);
+
+	U32 grt = lhs.greaterThan(rhs).getGatheredBits();
+
+	if (grt & 0x7)
+	{
+		return false;
+	}
+	
+	LLVector4a f;
+	f.setCross3(dir, diff);
+	f.setAbs(f);
+
+	LLVector4a v0, v1;
+
+	v0 = _mm_shuffle_ps(size, size,_MM_SHUFFLE(3,0,0,1));
+	v1 = _mm_shuffle_ps(fAWdU, fAWdU, _MM_SHUFFLE(3,1,2,2));
+	lhs.setMul(v0, v1);
+
+	v0 = _mm_shuffle_ps(size, size, _MM_SHUFFLE(3,1,2,2));
+	v1 = _mm_shuffle_ps(fAWdU, fAWdU, _MM_SHUFFLE(3,0,0,1));
+	rhs.setMul(v0, v1);
+	rhs.add(lhs);
+	
+	grt = f.greaterThan(rhs).getGatheredBits();
+
+	return (grt & 0x7) ? false : true;
+}
+
+
+LLVolumeOctreeListener::LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node)
+{
+	node->addListener(this);
+}
+
+LLVolumeOctreeListener::~LLVolumeOctreeListener()
+{
+
+}
+	
+void LLVolumeOctreeListener::handleChildAddition(const LLOctreeNode<LLVolumeTriangle>* parent, 
+	LLOctreeNode<LLVolumeTriangle>* child)
+{
+	new LLVolumeOctreeListener(child);
+}
+
+
+LLOctreeTriangleRayIntersect::LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, 
+							   const LLVolumeFace* face, F32* closest_t,
+							   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal)
+   : mFace(face),
+     mStart(start),
+	 mDir(dir),
+	 mIntersection(intersection),
+	 mTexCoord(tex_coord),
+	 mNormal(normal),
+	 mBinormal(bi_normal),
+	 mClosestT(closest_t),
+	 mHitFace(false)
+{
+	mEnd.setAdd(mStart, mDir);
+}
+
+void LLOctreeTriangleRayIntersect::traverse(const LLOctreeNode<LLVolumeTriangle>* node)
+{
+	LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) node->getListener(0);
+
+	/*const F32* start = mStart.getF32();
+	const F32* end = mEnd.getF32();
+	const F32* center = vl->mBounds[0].getF32();
+	const F32* size = vl->mBounds[1].getF32();*/
+
+	//if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1]))
+	if (LLLineSegmentBoxIntersect(mStart.getF32ptr(), mEnd.getF32ptr(), vl->mBounds[0].getF32ptr(), vl->mBounds[1].getF32ptr()))
+	{
+		node->accept(this);
+		for (S32 i = 0; i < node->getChildCount(); ++i)
+		{
+			traverse(node->getChild(i));
+		}
+	}
+}
+
+void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode<LLVolumeTriangle>* node)
+{
+	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = 
+			node->getData().begin(); iter != node->getData().end(); ++iter)
+	{
+		const LLVolumeTriangle* tri = *iter;
+
+		F32 a, b, t;
+		
+		if (LLTriangleRayIntersect(*tri->mV[0], *tri->mV[1], *tri->mV[2],
+				mStart, mDir, a, b, t))
+		{
+			if ((t >= 0.f) &&      // if hit is after start
+				(t <= 1.f) &&      // and before end
+				(t < *mClosestT))   // and this hit is closer
+			{
+				*mClosestT = t;
+				mHitFace = true;
+
+				if (mIntersection != NULL)
+				{
+					LLVector4a intersect = mDir;
+					intersect.mul(*mClosestT);
+					intersect.add(mStart);
+					mIntersection->set(intersect.getF32ptr());
+				}
+
+
+				if (mTexCoord != NULL)
+				{
+					LLVector2* tc = (LLVector2*) mFace->mTexCoords;
+					*mTexCoord = ((1.f - a - b)  * tc[tri->mIndex[0]] +
+						a              * tc[tri->mIndex[1]] +
+						b              * tc[tri->mIndex[2]]);
+
+				}
+
+				if (mNormal != NULL)
+				{
+					LLVector4* norm = (LLVector4*) mFace->mNormals;
+
+					*mNormal    = ((1.f - a - b)  * LLVector3(norm[tri->mIndex[0]]) + 
+						a              * LLVector3(norm[tri->mIndex[1]]) +
+						b              * LLVector3(norm[tri->mIndex[2]]));
+				}
+
+				if (mBinormal != NULL)
+				{
+					LLVector4* binormal = (LLVector4*) mFace->mBinormals;
+					*mBinormal = ((1.f - a - b)  * LLVector3(binormal[tri->mIndex[0]]) + 
+							a              * LLVector3(binormal[tri->mIndex[1]]) +
+							b              * LLVector3(binormal[tri->mIndex[2]]));
+				}
+			}
+		}
+	}
+}
+
+const LLVector4a& LLVolumeTriangle::getPositionGroup() const
+{
+	return mPositionGroup;
+}
+
+const F32& LLVolumeTriangle::getBinRadius() const
+{
+	return mRadius;
+}
+
+
+//TEST CODE
+
+void LLVolumeOctreeValidate::visit(const LLOctreeNode<LLVolumeTriangle>* branch)
+{
+	LLVolumeOctreeListener* node = (LLVolumeOctreeListener*) branch->getListener(0);
+
+	//make sure bounds matches extents
+	LLVector4a& min = node->mExtents[0];
+	LLVector4a& max = node->mExtents[1];
+
+	LLVector4a& center = node->mBounds[0];
+	LLVector4a& size = node->mBounds[1];
+
+	LLVector4a test_min, test_max;
+	test_min.setSub(center, size);
+	test_max.setAdd(center, size);
+
+	if (!test_min.equals3(min, 0.001f) ||
+		!test_max.equals3(max, 0.001f))
+	{
+		llerrs << "Bad bounding box data found." << llendl;
+	}
+
+	test_min.sub(LLVector4a(0.001f));
+	test_max.add(LLVector4a(0.001f));
+
+	for (U32 i = 0; i < branch->getChildCount(); ++i)
+	{
+		LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0);
+
+		//make sure all children fit inside this node
+		if (child->mExtents[0].lessThan(test_min).areAnySet(LLVector4Logical::MASK_XYZ) ||
+			child->mExtents[1].greaterThan(test_max).areAnySet(LLVector4Logical::MASK_XYZ))
+		{
+			llerrs << "Child protrudes from bounding box." << llendl;
+		}
+	}
+
+	//children fit, check data
+	for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = branch->getData().begin(); 
+			iter != branch->getData().end(); ++iter)
+	{
+		const LLVolumeTriangle* tri = *iter;
+
+		//validate triangle
+		for (U32 i = 0; i < 3; i++)
+		{
+			if (tri->mV[i]->greaterThan(test_max).areAnySet(LLVector4Logical::MASK_XYZ) ||
+				tri->mV[i]->lessThan(test_min).areAnySet(LLVector4Logical::MASK_XYZ))
+			{
+				llerrs << "Triangle protrudes from node." << llendl;
+			}
+		}
+	}
+}
+
+
--- a/indra/llmath/llvolumeoctree.h
+++ b/indra/llmath/llvolumeoctree.h
@ -0,0 +1,134 @@
+/** 
+ * @file llvolumeoctree.h
+ * @brief LLVolume octree classes.
+ *
+ * $LicenseInfo:firstyear=2002&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_LLVOLUME_OCTREE_H
+#define LL_LLVOLUME_OCTREE_H
+
+#include "linden_common.h"
+#include "llmemory.h"
+
+#include "lloctree.h"
+#include "llvolume.h"
+#include "llvector4a.h"
+
+class LLVolumeTriangle : public LLRefCount
+{
+public:
+	LLVolumeTriangle()
+	{
+		
+	}
+
+	LLVolumeTriangle(const LLVolumeTriangle& rhs)
+	{
+		*this = rhs;
+	}
+
+	const LLVolumeTriangle& operator=(const LLVolumeTriangle& rhs)
+	{
+		llerrs << "Illegal operation!" << llendl;
+		return *this;
+	}
+
+	~LLVolumeTriangle()
+	{
+	
+	}
+
+	LLVector4a mPositionGroup;
+
+	const LLVector4a* mV[3];
+	U16 mIndex[3];
+
+	F32 mRadius;
+
+	virtual const LLVector4a& getPositionGroup() const;
+	virtual const F32& getBinRadius() const;
+};
+
+class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle>
+{
+public:
+	
+	LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node);
+	~LLVolumeOctreeListener();
+	
+	LLVolumeOctreeListener(const LLVolumeOctreeListener& rhs)
+	{
+		*this = rhs;
+	}
+
+	const LLVolumeOctreeListener& operator=(const LLVolumeOctreeListener& rhs)
+	{
+		llerrs << "Illegal operation!" << llendl;
+		return *this;
+	}
+
+	 //LISTENER FUNCTIONS
+	virtual void handleChildAddition(const LLOctreeNode<LLVolumeTriangle>* parent, 
+		LLOctreeNode<LLVolumeTriangle>* child);
+	virtual void handleStateChange(const LLTreeNode<LLVolumeTriangle>* node) { }
+	virtual void handleChildRemoval(const LLOctreeNode<LLVolumeTriangle>* parent, 
+			const LLOctreeNode<LLVolumeTriangle>* child) {	}
+	virtual void handleInsertion(const LLTreeNode<LLVolumeTriangle>* node, LLVolumeTriangle* tri) { }
+	virtual void handleRemoval(const LLTreeNode<LLVolumeTriangle>* node, LLVolumeTriangle* tri) { }
+	virtual void handleDestruction(const LLTreeNode<LLVolumeTriangle>* node) { }
+	
+
+public:
+	LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects)
+	LLVector4a mExtents[2]; // extents (min, max) of this node and all its children
+};
+
+class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle>
+{
+public:
+	const LLVolumeFace* mFace;
+	LLVector4a mStart;
+	LLVector4a mDir;
+	LLVector4a mEnd;
+	LLVector3* mIntersection;
+	LLVector2* mTexCoord;
+	LLVector3* mNormal;
+	LLVector3* mBinormal;
+	F32* mClosestT;
+	bool mHitFace;
+
+	LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, 
+								   const LLVolumeFace* face, F32* closest_t,
+								   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal);
+
+	void traverse(const LLOctreeNode<LLVolumeTriangle>* node);
+
+	virtual void visit(const LLOctreeNode<LLVolumeTriangle>* node);
+};
+
+class LLVolumeOctreeValidate : public LLOctreeTraveler<LLVolumeTriangle>
+{
+	virtual void visit(const LLOctreeNode<LLVolumeTriangle>* branch);
+};
+
+#endif
--- a/indra/llmath/m4math.cpp
+++ b/indra/llmath/m4math.cpp
@ -215,8 +215,33 @@ const LLMatrix4&	LLMatrix4::transpose()

 F32 LLMatrix4::determinant() const
 {
-	llerrs << "Not implemented!" << llendl;
-	return 0.f;
+	F32 value =
+	    mMatrix[0][3] * mMatrix[1][2] * mMatrix[2][1] * mMatrix[3][0] -
+	    mMatrix[0][2] * mMatrix[1][3] * mMatrix[2][1] * mMatrix[3][0] -
+	    mMatrix[0][3] * mMatrix[1][1] * mMatrix[2][2] * mMatrix[3][0] +
+	    mMatrix[0][1] * mMatrix[1][3] * mMatrix[2][2] * mMatrix[3][0] +
+	    mMatrix[0][2] * mMatrix[1][1] * mMatrix[2][3] * mMatrix[3][0] -
+	    mMatrix[0][1] * mMatrix[1][2] * mMatrix[2][3] * mMatrix[3][0] -
+	    mMatrix[0][3] * mMatrix[1][2] * mMatrix[2][0] * mMatrix[3][1] +
+	    mMatrix[0][2] * mMatrix[1][3] * mMatrix[2][0] * mMatrix[3][1] +
+	    mMatrix[0][3] * mMatrix[1][0] * mMatrix[2][2] * mMatrix[3][1] -
+	    mMatrix[0][0] * mMatrix[1][3] * mMatrix[2][2] * mMatrix[3][1] -
+	    mMatrix[0][2] * mMatrix[1][0] * mMatrix[2][3] * mMatrix[3][1] +
+	    mMatrix[0][0] * mMatrix[1][2] * mMatrix[2][3] * mMatrix[3][1] +
+	    mMatrix[0][3] * mMatrix[1][1] * mMatrix[2][0] * mMatrix[3][2] -
+	    mMatrix[0][1] * mMatrix[1][3] * mMatrix[2][0] * mMatrix[3][2] -
+	    mMatrix[0][3] * mMatrix[1][0] * mMatrix[2][1] * mMatrix[3][2] +
+	    mMatrix[0][0] * mMatrix[1][3] * mMatrix[2][1] * mMatrix[3][2] +
+	    mMatrix[0][1] * mMatrix[1][0] * mMatrix[2][3] * mMatrix[3][2] -
+	    mMatrix[0][0] * mMatrix[1][1] * mMatrix[2][3] * mMatrix[3][2] -
+	    mMatrix[0][2] * mMatrix[1][1] * mMatrix[2][0] * mMatrix[3][3] +
+	    mMatrix[0][1] * mMatrix[1][2] * mMatrix[2][0] * mMatrix[3][3] +
+	    mMatrix[0][2] * mMatrix[1][0] * mMatrix[2][1] * mMatrix[3][3] -
+	    mMatrix[0][0] * mMatrix[1][2] * mMatrix[2][1] * mMatrix[3][3] -
+	    mMatrix[0][1] * mMatrix[1][0] * mMatrix[2][2] * mMatrix[3][3] +
+		mMatrix[0][0] * mMatrix[1][1] * mMatrix[2][2] * mMatrix[3][3];
+
+	return value;
 }

 // Only works for pure orthonormal, homogeneous transform matrices.
@ -422,6 +447,17 @@ const LLMatrix4&  	LLMatrix4::initRotTrans(const LLQuaternion &q, const LLVector
 	return (*this);
 }

+const LLMatrix4& LLMatrix4::initScale(const LLVector3 &scale)
+{
+	setIdentity();
+
+	mMatrix[VX][VX] = scale.mV[VX];
+	mMatrix[VY][VY] = scale.mV[VY];
+	mMatrix[VZ][VZ] = scale.mV[VZ];
+	
+	return (*this);
+}
+
 const LLMatrix4& LLMatrix4::initAll(const LLVector3 &scale, const LLQuaternion &q, const LLVector3 &pos)
 {
 	F32		sx, sy, sz;
@ -642,37 +678,6 @@ const LLMatrix4&  	LLMatrix4::initMatrix(const LLMatrix3 &mat, const LLVector4 &

 // LLMatrix4 Operators

-
-/* Not implemented to help enforce code consistency with the syntax of
-   row-major notation.  This is a Good Thing.
-LLVector4 operator*(const LLMatrix4 &a, const LLVector4 &b)
-{
-	// Operate "to the right" on column-vector b
-	LLVector4	vec;
-	vec.mV[VX] = a.mMatrix[VX][VX] * b.mV[VX] + 
-				 a.mMatrix[VY][VX] * b.mV[VY] + 
- 				 a.mMatrix[VZ][VX] * b.mV[VZ] +
-				 a.mMatrix[VW][VX] * b.mV[VW];
-
-	vec.mV[VY] = a.mMatrix[VX][VY] * b.mV[VX] + 
-				 a.mMatrix[VY][VY] * b.mV[VY] + 
-				 a.mMatrix[VZ][VY] * b.mV[VZ] +
-				 a.mMatrix[VW][VY] * b.mV[VW];
-
-	vec.mV[VZ] = a.mMatrix[VX][VZ] * b.mV[VX] + 
-			  	 a.mMatrix[VY][VZ] * b.mV[VY] + 
-				 a.mMatrix[VZ][VZ] * b.mV[VZ] +
-				 a.mMatrix[VW][VZ] * b.mV[VW];
-
-	vec.mV[VW] = a.mMatrix[VX][VW] * b.mV[VX] + 
-				 a.mMatrix[VY][VW] * b.mV[VY] + 
-				 a.mMatrix[VZ][VW] * b.mV[VZ] +
-				 a.mMatrix[VW][VW] * b.mV[VW];
-	return vec;
-}
-*/
-
-
 LLVector4 operator*(const LLVector4 &a, const LLMatrix4 &b)
 {
 	// Operate "to the left" on row-vector a
@ -768,6 +773,23 @@ bool operator!=(const LLMatrix4 &a, const LLMatrix4 &b)
 	return FALSE;
 }

+bool operator<(const LLMatrix4& a, const LLMatrix4 &b)
+{
+	U32		i, j;
+	for (i = 0; i < NUM_VALUES_IN_MAT4; i++)
+	{
+		for (j = 0; j < NUM_VALUES_IN_MAT4; j++)
+		{
+			if (a.mMatrix[i][j] != b.mMatrix[i][j])
+			{
+				return a.mMatrix[i][j] < b.mMatrix[i][j];
+			}
+		}
+	}
+
+	return false;
+}
+
 const LLMatrix4& operator*=(LLMatrix4 &a, F32 k)
 {
 	U32		i, j;
@ -807,4 +829,54 @@ std::ostream& operator<<(std::ostream& s, const LLMatrix4 &a)
 	return s;
 }

+LLSD LLMatrix4::getValue() const
+{
+	LLSD ret;
+	
+	ret[0] = mMatrix[0][0];
+	ret[1] = mMatrix[0][1];
+	ret[2] = mMatrix[0][2];
+	ret[3] = mMatrix[0][3];
+
+	ret[4] = mMatrix[1][0];
+	ret[5] = mMatrix[1][1];
+	ret[6] = mMatrix[1][2];
+	ret[7] = mMatrix[1][3];
+
+	ret[8] = mMatrix[2][0];
+	ret[9] = mMatrix[2][1];
+	ret[10] = mMatrix[2][2];
+	ret[11] = mMatrix[2][3];
+
+	ret[12] = mMatrix[3][0];
+	ret[13] = mMatrix[3][1];
+	ret[14] = mMatrix[3][2];
+	ret[15] = mMatrix[3][3];
+
+	return ret;
+}
+
+void LLMatrix4::setValue(const LLSD& data) 
+{
+	mMatrix[0][0] = data[0].asReal();
+	mMatrix[0][1] = data[1].asReal();
+	mMatrix[0][2] = data[2].asReal();
+	mMatrix[0][3] = data[3].asReal();
+
+	mMatrix[1][0] = data[4].asReal();
+	mMatrix[1][1] = data[5].asReal();
+	mMatrix[1][2] = data[6].asReal();
+	mMatrix[1][3] = data[7].asReal();
+
+	mMatrix[2][0] = data[8].asReal();
+	mMatrix[2][1] = data[9].asReal();
+	mMatrix[2][2] = data[10].asReal();
+	mMatrix[2][3] = data[11].asReal();
+
+	mMatrix[3][0] = data[12].asReal();
+	mMatrix[3][1] = data[13].asReal();
+	mMatrix[3][2] = data[14].asReal();
+	mMatrix[3][3] = data[15].asReal();
+}
+

--- a/indra/llmath/m4math.h
+++ b/indra/llmath/m4math.h
@ -119,6 +119,8 @@ public:

 	~LLMatrix4(void);										// Destructor

+	LLSD getValue() const;
+	void setValue(const LLSD&);

 	//////////////////////////////
 	//
@ -132,6 +134,7 @@ public:

 	// various useful matrix functions
 	const LLMatrix4& setIdentity();					// Load identity matrix
+	bool isIdentity() const;
 	const LLMatrix4& setZero();						// Clears matrix to all zeros.

 	const LLMatrix4& initRotation(const F32 angle, const F32 x, const F32 y, const F32 z);	// Calculate rotation matrix by rotating angle radians about (x, y, z)
@ -153,6 +156,7 @@ public:
 	const LLMatrix4& initRotTrans(const F32 roll, const F32 pitch, const F32 yaw, const LLVector4 &pos); // Rotation from Euler + translation
 	const LLMatrix4& initRotTrans(const LLQuaternion &q, const LLVector4 &pos);	// Set with Quaternion and position

+	const LLMatrix4& initScale(const LLVector3 &scale);

 	// Set all
 	const LLMatrix4& initAll(const LLVector3 &scale, const LLQuaternion &q, const LLVector3 &pos);	
@ -219,10 +223,7 @@ public:
 	// Operators
 	//

-// Not implemented to enforce code that agrees with symbolic syntax
-//		friend LLVector4 operator*(const LLMatrix4 &a, const LLVector4 &b);		// Apply rotation a to vector b
-
-//	friend inline LLMatrix4 operator*(const LLMatrix4 &a, const LLMatrix4 &b);		// Return a * b
+	//	friend inline LLMatrix4 operator*(const LLMatrix4 &a, const LLMatrix4 &b);		// Return a * b
 	friend LLVector4 operator*(const LLVector4 &a, const LLMatrix4 &b);		// Return transform of vector a by matrix b
 	friend const LLVector3 operator*(const LLVector3 &a, const LLMatrix4 &b);		// Return full transform of a by matrix b
 	friend LLVector4 rotate_vector(const LLVector4 &a, const LLMatrix4 &b);	// Rotates a but does not translate
@ -230,6 +231,7 @@ public:

 	friend bool operator==(const LLMatrix4 &a, const LLMatrix4 &b);			// Return a == b
 	friend bool operator!=(const LLMatrix4 &a, const LLMatrix4 &b);			// Return a != b
+	friend bool operator<(const LLMatrix4 &a, const LLMatrix4& b);			// Return a < b

 	friend const LLMatrix4& operator+=(LLMatrix4 &a, const LLMatrix4 &b);	// Return a + b
 	friend const LLMatrix4& operator-=(LLMatrix4 &a, const LLMatrix4 &b);	// Return a - b
@ -263,6 +265,30 @@ inline const LLMatrix4&	LLMatrix4::setIdentity()
 	return (*this);
 }

+inline bool LLMatrix4::isIdentity() const
+{
+	return
+		mMatrix[0][0] == 1.f &&
+		mMatrix[0][1] == 0.f &&
+		mMatrix[0][2] == 0.f &&
+		mMatrix[0][3] == 0.f &&
+
+		mMatrix[1][0] == 0.f &&
+		mMatrix[1][1] == 1.f &&
+		mMatrix[1][2] == 0.f &&
+		mMatrix[1][3] == 0.f &&
+
+		mMatrix[2][0] == 0.f &&
+		mMatrix[2][1] == 0.f &&
+		mMatrix[2][2] == 1.f &&
+		mMatrix[2][3] == 0.f &&
+
+		mMatrix[3][0] == 0.f &&
+		mMatrix[3][1] == 0.f &&
+		mMatrix[3][2] == 0.f &&
+		mMatrix[3][3] == 1.f;
+}
+

 /*
 inline LLMatrix4 operator*(const LLMatrix4 &a, const LLMatrix4 &b)
--- a/indra/llmath/tests/llquaternion_test.cpp
+++ b/indra/llmath/tests/llquaternion_test.cpp
@ -29,12 +29,12 @@
 #include "linden_common.h"
 #include "../test/lltut.h"

-#include "../llquaternion.h"
 #include "../v4math.h"
 #include "../v3math.h"
 #include "../v3dmath.h"
 #include "../m4math.h"
 #include "../m3math.h"
+#include "../llquaternion.h"

 namespace tut
 {
--- a/indra/llmath/tests/v2math_test.cpp
+++ b/indra/llmath/tests/v2math_test.cpp
@ -85,7 +85,7 @@ namespace tut
 		F32 x = 2.2345f, y = 3.5678f ;
 		LLVector2 vec2(x,y);
 		ensure("magVecSquared:Fail ", is_approx_equal(vec2.magVecSquared(), (x*x + y*y)));
-		ensure("magVec:Fail ", is_approx_equal(vec2.magVec(), fsqrtf(x*x + y*y)));
+		ensure("magVec:Fail ", is_approx_equal(vec2.magVec(), (F32) sqrt(x*x + y*y)));
 	}

 	template<> template<>
@ -407,7 +407,7 @@ namespace tut
 		ensure_equals("dist_vec_squared values are not equal",val2, val1);

 		val1 = 	dist_vec(vec2, vec3);
-		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2));
+		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2));
 		ensure_equals("dist_vec values are not equal",val2, val1);
 	}

@ -431,7 +431,7 @@ namespace tut
 		LLVector2 vec2(x1, y1);

 		F32 vecMag = vec2.normVec();
-		F32 mag = fsqrtf(x1*x1 + y1*y1);
+		F32 mag = (F32) sqrt(x1*x1 + y1*y1);

 		F32 oomag = 1.f / mag;
 		val1 = x1 * oomag;
--- a/indra/llmath/tests/v3color_test.cpp
+++ b/indra/llmath/tests/v3color_test.cpp
@ -93,7 +93,7 @@ namespace tut
 		F32 r = 2.3436212f, g = 1231.f, b = 4.7849321232f;
 		LLColor3 llcolor3(r,g,b);
 		ensure("magVecSquared:Fail ", is_approx_equal(llcolor3.magVecSquared(), (r*r + g*g + b*b)));
-		ensure("magVec:Fail ", is_approx_equal(llcolor3.magVec(), fsqrtf(r*r + g*g + b*b)));
+		ensure("magVec:Fail ", is_approx_equal(llcolor3.magVec(), (F32) sqrt(r*r + g*g + b*b)));
 	}

 	template<> template<>
@ -103,7 +103,7 @@ namespace tut
 		F32 val1, val2,val3;
 		LLColor3 llcolor3(r,g,b);
 		F32 vecMag = llcolor3.normVec();
-		F32 mag = fsqrtf(r*r + g*g + b*b);
+		F32 mag = (F32) sqrt(r*r + g*g + b*b);
 		F32 oomag = 1.f / mag;
 		val1 = r * oomag;
 		val2 = g * oomag;
@ -286,7 +286,7 @@ namespace tut
 		F32 r1 =1.f, g1 = 2.f,b1 = 1.2f, r2 = -2.3f, g2 = 1.11f, b2 = 1234.234f;
 		LLColor3 llcolor3(r1,g1,b1),llcolor3a(r2,g2,b2);
 		F32 val = distVec(llcolor3,llcolor3a);
-		ensure("distVec failed ", is_approx_equal(fsqrtf((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val));
+		ensure("distVec failed ", is_approx_equal((F32) sqrt((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val));
 		
 		F32 val1 = distVec_squared(llcolor3,llcolor3a);
 		ensure("distVec_squared failed ", is_approx_equal(((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val1));
--- a/indra/llmath/tests/v3dmath_test.cpp
+++ b/indra/llmath/tests/v3dmath_test.cpp
@ -30,11 +30,11 @@
 #include "llsd.h"
 #include "../test/lltut.h"

-#include "../llquaternion.h"
 #include "../m3math.h"
 #include "../v4math.h"
 #include "../v3dmath.h"
 #include "../v3dmath.h"
+#include "../llquaternion.h"

 namespace tut
 {
@ -403,7 +403,7 @@ namespace tut
 		LLVector3d vec3D(x,y,z);
 		F64 res = (x*x + y*y + z*z) - vec3D.magVecSquared();
 		ensure("1:magVecSquared:Fail ", ((-F_APPROXIMATELY_ZERO <= res)&& (res <=F_APPROXIMATELY_ZERO)));
-		res = fsqrtf(x*x + y*y + z*z) - vec3D.magVec();
+		res = (F32) sqrt(x*x + y*y + z*z) - vec3D.magVec();
 		ensure("2:magVec: Fail ", ((-F_APPROXIMATELY_ZERO <= res)&& (res <=F_APPROXIMATELY_ZERO)));	
 	}

--- a/indra/llmath/tests/v3math_test.cpp
+++ b/indra/llmath/tests/v3math_test.cpp
@ -30,12 +30,12 @@
 #include "../test/lltut.h"
 #include "llsd.h"

-#include "../llquaternion.h"
-#include "../llquantize.h"
 #include "../v3dmath.h"
 #include "../m3math.h"
 #include "../v4math.h"
 #include "../v3math.h"
+#include "../llquaternion.h"
+#include "../llquantize.h"


 namespace tut
@ -149,7 +149,7 @@ namespace tut
 		F32 x = 2.32f, y = 1.212f, z = -.12f;
 		LLVector3 vec3(x,y,z);		
 		ensure("1:magVecSquared:Fail ", is_approx_equal(vec3.magVecSquared(), (x*x + y*y + z*z)));
-		ensure("2:magVec:Fail ", is_approx_equal(vec3.magVec(), fsqrtf(x*x + y*y + z*z)));
+		ensure("2:magVec:Fail ", is_approx_equal(vec3.magVec(), (F32) sqrt(x*x + y*y + z*z)));
 	}

 	template<> template<>
@ -509,7 +509,7 @@ namespace tut
 		F32 val1,val2;
 		LLVector3 vec3(x1,y1,z1),vec3a(x2,y2,z2);
 		val1 = dist_vec(vec3,vec3a);
-		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
+		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
 		ensure_equals("1:dist_vec: Fail ",val2, val1);
 		val1 = dist_vec_squared(vec3,vec3a);
 		val2 =((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
--- a/indra/llmath/tests/v4color_test.cpp
+++ b/indra/llmath/tests/v4color_test.cpp
@ -155,7 +155,7 @@ namespace tut
 		F32 r = 0x20, g = 0xFFFF, b = 0xFF;
 		LLColor4 llcolor4(r,g,b);
 		ensure("magVecSquared:Fail ", is_approx_equal(llcolor4.magVecSquared(), (r*r + g*g + b*b)));
-		ensure("magVec:Fail ", is_approx_equal(llcolor4.magVec(), fsqrtf(r*r + g*g + b*b)));
+		ensure("magVec:Fail ", is_approx_equal(llcolor4.magVec(), (F32) sqrt(r*r + g*g + b*b)));
 	}

 	template<> template<>
@ -164,7 +164,7 @@ namespace tut
 		F32 r = 0x20, g = 0xFFFF, b = 0xFF;
 		LLColor4 llcolor4(r,g,b);
 		F32 vecMag = llcolor4.normVec();
-		F32 mag = fsqrtf(r*r + g*g + b*b);
+		F32 mag = (F32) sqrt(r*r + g*g + b*b);
 		F32 oomag = 1.f / mag;
 		F32 val1 = r * oomag, val2 = g * oomag,	val3 = b * oomag;
 		ensure("1:normVec failed ", (is_approx_equal(val1, llcolor4.mV[0]) && is_approx_equal(val2, llcolor4.mV[1]) && is_approx_equal(val3, llcolor4.mV[2]) && is_approx_equal(vecMag, mag)));
--- a/indra/llmath/tests/v4coloru_test.cpp
+++ b/indra/llmath/tests/v4coloru_test.cpp
@ -135,7 +135,7 @@ namespace tut
 		U8 r = 0x12, g = 0xFF, b = 0xAF;
 		LLColor4U llcolor4u(r,g,b);
 		ensure("magVecSquared:Fail ", is_approx_equal(llcolor4u.magVecSquared(), (F32)(r*r + g*g + b*b)));
-		ensure("magVec:Fail ", is_approx_equal(llcolor4u.magVec(), fsqrtf(r*r + g*g + b*b)));
+		ensure("magVec:Fail ", is_approx_equal(llcolor4u.magVec(), (F32) sqrt((F32) (r*r + g*g + b*b))));
 	}

 	template<> template<>
--- a/indra/llmath/tests/v4math_test.cpp
+++ b/indra/llmath/tests/v4math_test.cpp
@ -30,9 +30,9 @@
 #include "../test/lltut.h"
 #include "llsd.h"

-#include "../llquaternion.h"
 #include "../m4math.h"
 #include "../v4math.h"
+#include "../llquaternion.h"

 namespace tut
 {
@ -96,7 +96,7 @@ namespace tut
 	{
 		F32 x = 10.f, y = -2.3f, z = -.023f;
 		LLVector4 vec4(x,y,z);
-		ensure("magVec:Fail ", is_approx_equal(vec4.magVec(), fsqrtf(x*x + y*y + z*z)));
+		ensure("magVec:Fail ", is_approx_equal(vec4.magVec(), (F32) sqrt(x*x + y*y + z*z)));
 		ensure("magVecSquared:Fail ", is_approx_equal(vec4.magVecSquared(), (x*x + y*y + z*z)));
 	}

@ -337,7 +337,7 @@ namespace tut
 		F32 val1,val2;
 		LLVector4 vec4(x1,y1,z1),vec4a(x2,y2,z2);
 		val1 = dist_vec(vec4,vec4a);
-		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
+		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
 		ensure_equals("dist_vec: Fail ",val2, val1);
 		val1 = dist_vec_squared(vec4,vec4a);
 		val2 =((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
--- a/indra/llmath/v2math.cpp
+++ b/indra/llmath/v2math.cpp
@ -86,7 +86,7 @@ F32	dist_vec(const LLVector2 &a, const LLVector2 &b)
 {
 	F32 x = a.mV[0] - b.mV[0];
 	F32 y = a.mV[1] - b.mV[1];
-	return fsqrtf( x*x + y*y );
+	return (F32) sqrt( x*x + y*y );
 }

 F32	dist_vec_squared(const LLVector2 &a, const LLVector2 &b)
@ -109,3 +109,18 @@ LLVector2 lerp(const LLVector2 &a, const LLVector2 &b, F32 u)
 		a.mV[VX] + (b.mV[VX] - a.mV[VX]) * u,
 		a.mV[VY] + (b.mV[VY] - a.mV[VY]) * u );
 }
+
+LLSD LLVector2::getValue() const
+{
+	LLSD ret;
+	ret[0] = mV[0];
+	ret[1] = mV[1];
+	return ret;
+}
+
+void LLVector2::setValue(LLSD& sd)
+{
+	mV[0] = (F32) sd[0].asReal();
+	mV[1] = (F32) sd[1].asReal();
+}
+
--- a/indra/llmath/v2math.h
+++ b/indra/llmath/v2math.h
@ -60,6 +60,9 @@ class LLVector2
 		void	set(const LLVector2 &vec);	// Sets LLVector2 to vec
 		void	set(const F32 *vec);			// Sets LLVector2 to vec

+		LLSD	getValue() const;
+		void	setValue(LLSD& sd);
+
 		void	setVec(F32 x, F32 y);	        // deprecated
 		void	setVec(const LLVector2 &vec);	// deprecated
 		void	setVec(const F32 *vec);			// deprecated
@ -216,7 +219,7 @@ inline void	LLVector2::setVec(const F32 *vec)

 inline F32 LLVector2::length(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 }

 inline F32 LLVector2::lengthSquared(void) const
@ -226,7 +229,7 @@ inline F32 LLVector2::lengthSquared(void) const

 inline F32		LLVector2::normalize(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 	F32 oomag;

 	if (mag > FP_MAG_THRESHOLD)
@ -253,7 +256,7 @@ inline bool LLVector2::isFinite() const
 // deprecated
 inline F32		LLVector2::magVec(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 }

 // deprecated
@ -265,7 +268,7 @@ inline F32		LLVector2::magVecSquared(void) const
 // deprecated
 inline F32		LLVector2::normVec(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 	F32 oomag;

 	if (mag > FP_MAG_THRESHOLD)
--- a/indra/llmath/v3color.h
+++ b/indra/llmath/v3color.h
@ -278,7 +278,7 @@ inline F32		LLColor3::brightness(void) const

 inline F32		LLColor3::length(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }

 inline F32		LLColor3::lengthSquared(void) const
@ -288,7 +288,7 @@ inline F32		LLColor3::lengthSquared(void) const

 inline F32		LLColor3::normalize(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;

 	if (mag)
@ -304,7 +304,7 @@ inline F32		LLColor3::normalize(void)
 // deprecated
 inline F32		LLColor3::magVec(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }

 // deprecated
@ -316,7 +316,7 @@ inline F32		LLColor3::magVecSquared(void) const
 // deprecated
 inline F32		LLColor3::normVec(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;

 	if (mag)
@ -438,7 +438,7 @@ inline F32		distVec(const LLColor3 &a, const LLColor3 &b)
 	F32 x = a.mV[0] - b.mV[0];
 	F32 y = a.mV[1] - b.mV[1];
 	F32 z = a.mV[2] - b.mV[2];
-	return fsqrtf( x*x + y*y + z*z );
+	return (F32) sqrt( x*x + y*y + z*z );
 }

 inline F32		distVec_squared(const LLColor3 &a, const LLColor3 &b)
--- a/indra/llmath/v3dmath.h
+++ b/indra/llmath/v3dmath.h
@ -234,7 +234,7 @@ inline const LLVector3d&	LLVector3d::setVec(const F64 *vec)

 inline F64 LLVector3d::normVec(void)
 {
-	F64 mag = fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	F64 mag = (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 	F64 oomag;

 	if (mag > FP_MAG_THRESHOLD)
@ -256,7 +256,7 @@ inline F64 LLVector3d::normVec(void)

 inline F64 LLVector3d::normalize(void)
 {
-	F64 mag = fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	F64 mag = (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 	F64 oomag;

 	if (mag > FP_MAG_THRESHOLD)
@ -280,7 +280,7 @@ inline F64 LLVector3d::normalize(void)

 inline F64	LLVector3d::magVec(void) const
 {
-	return fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	return (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 }

 inline F64	LLVector3d::magVecSquared(void) const
@ -290,7 +290,7 @@ inline F64	LLVector3d::magVecSquared(void) const

 inline F64	LLVector3d::length(void) const
 {
-	return fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	return (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 }

 inline F64	LLVector3d::lengthSquared(void) const
@ -400,7 +400,7 @@ inline F64	dist_vec(const LLVector3d &a, const LLVector3d &b)
 	F64 x = a.mdV[0] - b.mdV[0];
 	F64 y = a.mdV[1] - b.mdV[1];
 	F64 z = a.mdV[2] - b.mdV[2];
-	return fsqrtf( x*x + y*y + z*z );
+	return (F32) sqrt( x*x + y*y + z*z );
 }

 inline F64	dist_vec_squared(const LLVector3d &a, const LLVector3d &b)
--- a/indra/llmath/v3math.cpp
+++ b/indra/llmath/v3math.cpp
@ -206,6 +206,28 @@ const LLVector3&	LLVector3::rotVec(const LLQuaternion &q)
 	return *this;
 }

+const LLVector3& LLVector3::transVec(const LLMatrix4& mat)
+{
+	setVec(
+			mV[VX] * mat.mMatrix[VX][VX] + 
+			mV[VY] * mat.mMatrix[VX][VY] + 
+			mV[VZ] * mat.mMatrix[VX][VZ] +
+			mat.mMatrix[VX][VW],
+			 
+			mV[VX] * mat.mMatrix[VY][VX] + 
+			mV[VY] * mat.mMatrix[VY][VY] + 
+			mV[VZ] * mat.mMatrix[VY][VZ] +
+			mat.mMatrix[VY][VW],
+
+			mV[VX] * mat.mMatrix[VZ][VX] + 
+			mV[VY] * mat.mMatrix[VZ][VY] + 
+			mV[VZ] * mat.mMatrix[VZ][VZ] +
+			mat.mMatrix[VZ][VW]);
+
+	return *this;
+}
+
+
 const LLVector3&	LLVector3::rotVec(F32 angle, const LLVector3 &vec)
 {
 	if ( !vec.isExactlyZero() && angle )
--- a/indra/llmath/v3math.h
+++ b/indra/llmath/v3math.h
@ -34,6 +34,7 @@
 class LLVector2;
 class LLVector4;
 class LLMatrix3;
+class LLMatrix4;
 class LLVector3d;
 class LLQuaternion;

@ -110,6 +111,7 @@ class LLVector3
 		const LLVector3&	rotVec(F32 angle, F32 x, F32 y, F32 z);		// Rotates about x,y,z by angle radians
 		const LLVector3&	rotVec(const LLMatrix3 &mat);				// Rotates by LLMatrix4 mat
 		const LLVector3&	rotVec(const LLQuaternion &q);				// Rotates by LLQuaternion q
+		const LLVector3&	transVec(const LLMatrix4& mat);				// Transforms by LLMatrix4 mat (mat * v)

 		const LLVector3&	scaleVec(const LLVector3& vec);				// scales per component by vec
 		LLVector3			scaledVec(const LLVector3& vec) const;			// get a copy of this vector scaled by vec
@ -277,7 +279,7 @@ inline void	LLVector3::setVec(const F32 *vec)

 inline F32 LLVector3::normalize(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;

 	if (mag > FP_MAG_THRESHOLD)
@ -300,7 +302,7 @@ inline F32 LLVector3::normalize(void)
 // deprecated
 inline F32 LLVector3::normVec(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;

 	if (mag > FP_MAG_THRESHOLD)
@ -324,7 +326,7 @@ inline F32 LLVector3::normVec(void)

 inline F32	LLVector3::length(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }

 inline F32	LLVector3::lengthSquared(void) const
@ -334,7 +336,7 @@ inline F32	LLVector3::lengthSquared(void) const

 inline F32	LLVector3::magVec(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }

 inline F32	LLVector3::magVecSquared(void) const
@ -468,7 +470,7 @@ inline F32	dist_vec(const LLVector3 &a, const LLVector3 &b)
 	F32 x = a.mV[0] - b.mV[0];
 	F32 y = a.mV[1] - b.mV[1];
 	F32 z = a.mV[2] - b.mV[2];
-	return fsqrtf( x*x + y*y + z*z );
+	return (F32) sqrt( x*x + y*y + z*z );
 }

 inline F32	dist_vec_squared(const LLVector3 &a, const LLVector3 &b)
@ -537,6 +539,21 @@ inline void update_min_max(LLVector3& min, LLVector3& max, const LLVector3& pos)
 	}
 }

+inline void update_min_max(LLVector3& min, LLVector3& max, const F32* pos)
+{
+	for (U32 i = 0; i < 3; i++)
+	{
+		if (min.mV[i] > pos[i])
+		{
+			min.mV[i] = pos[i];
+		}
+		if (max.mV[i] < pos[i])
+		{
+			max.mV[i] = pos[i];
+		}
+	}
+}
+
 inline F32 angle_between(const LLVector3& a, const LLVector3& b)
 {
 	LLVector3 an = a;
--- a/indra/llmath/v4color.h
+++ b/indra/llmath/v4color.h
@ -108,6 +108,7 @@ class LLColor4
 	
 	    const LLColor4& operator=(const LLColor3 &a);	// Assigns vec3 to vec4 and returns vec4
 		
+		bool operator<(const LLColor4& rhs) const;
 		friend std::ostream&	 operator<<(std::ostream& s, const LLColor4 &a);		// Print a
 		friend LLColor4 operator+(const LLColor4 &a, const LLColor4 &b);	// Return vector a + b
 		friend LLColor4 operator-(const LLColor4 &a, const LLColor4 &b);	// Return vector a minus b
@ -385,7 +386,7 @@ inline const LLColor4&	LLColor4::setAlpha(F32 a)

 inline F32		LLColor4::length(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }

 inline F32		LLColor4::lengthSquared(void) const
@ -395,7 +396,7 @@ inline F32		LLColor4::lengthSquared(void) const

 inline F32		LLColor4::normalize(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;

 	if (mag)
@ -411,7 +412,7 @@ inline F32		LLColor4::normalize(void)
 // deprecated
 inline F32		LLColor4::magVec(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }

 // deprecated
@ -423,7 +424,7 @@ inline F32		LLColor4::magVecSquared(void) const
 // deprecated
 inline F32		LLColor4::normVec(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;

 	if (mag)
@ -589,6 +590,23 @@ inline LLColor4 lerp(const LLColor4 &a, const LLColor4 &b, F32 u)
 		a.mV[VW] + (b.mV[VW] - a.mV[VW]) * u);
 }

+inline bool LLColor4::operator<(const LLColor4& rhs) const
+{
+	if (mV[0] != rhs.mV[0])
+	{
+		return mV[0] < rhs.mV[0];
+	}
+	if (mV[1] != rhs.mV[1])
+	{
+		return mV[1] < rhs.mV[1];
+	}
+	if (mV[2] != rhs.mV[2])
+	{
+		return mV[2] < rhs.mV[2];
+	}
+
+	return mV[3] < rhs.mV[3];
+}

 void LLColor4::clamp()
 {
--- a/indra/llmath/v4coloru.h
+++ b/indra/llmath/v4coloru.h
@ -294,7 +294,7 @@ inline const LLColor4U&	LLColor4U::setAlpha(U8 a)

 inline F32		LLColor4U::length(void) const
 {
-	return fsqrtf( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
+	return (F32) sqrt( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
 }

 inline F32		LLColor4U::lengthSquared(void) const
@ -305,7 +305,7 @@ inline F32		LLColor4U::lengthSquared(void) const
 // deprecated
 inline F32		LLColor4U::magVec(void) const
 {
-	return fsqrtf( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
+	return (F32) sqrt( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
 }

 // deprecated
--- a/indra/llmath/v4math.h
+++ b/indra/llmath/v4math.h
@ -315,7 +315,7 @@ inline void	LLVector4::setVec(const F32 *vec)

 inline F32		LLVector4::length(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }

 inline F32		LLVector4::lengthSquared(void) const
@ -325,7 +325,7 @@ inline F32		LLVector4::lengthSquared(void) const

 inline F32		LLVector4::magVec(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }

 inline F32		LLVector4::magVecSquared(void) const
@ -457,7 +457,7 @@ inline LLVector4 lerp(const LLVector4 &a, const LLVector4 &b, F32 u)

 inline F32		LLVector4::normalize(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;

 	if (mag > FP_MAG_THRESHOLD)
@ -480,7 +480,7 @@ inline F32		LLVector4::normalize(void)
 // deprecated
 inline F32		LLVector4::normVec(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;

 	if (mag > FP_MAG_THRESHOLD)
--- a/indra/llmath/xform.h
+++ b/indra/llmath/xform.h
@ -32,11 +32,11 @@

 const F32 MAX_OBJECT_Z 		= 4096.f; // should match REGION_HEIGHT_METERS, Pre-havok4: 768.f
 const F32 MIN_OBJECT_Z 		= -256.f;
-const F32 DEFAULT_MAX_PRIM_SCALE = 10.f;
+const F32 DEFAULT_MAX_PRIM_SCALE = 64.f;
+const F32 DEFAULT_MAX_PRIM_SCALE_NO_MESH = 10.f;
 const F32 MIN_PRIM_SCALE = 0.01f;
 const F32 MAX_PRIM_SCALE = 65536.f;	// something very high but not near FLT_MAX

-
 class LLXform
 {
 protected:
--- a/indra/llmessage/llassetstorage.cpp
+++ b/indra/llmessage/llassetstorage.cpp
@ -561,7 +561,7 @@ void LLAssetStorage::_queueDataRequest(const LLUUID& uuid, LLAssetType::EType at
 			tpvf.setAsset(uuid, atype);
 			tpvf.setCallback(downloadCompleteCallback, req);

-			llinfos << "Starting transfer for " << uuid << llendl;
+			//llinfos << "Starting transfer for " << uuid << llendl;
 			LLTransferTargetChannel *ttcp = gTransferManager.getTargetChannel(mUpstreamHost, LLTCT_ASSET);
 			ttcp->requestTransfer(spa, tpvf, 100.f + (is_priority ? 1.f : 0.f));
 		}
--- a/indra/llmessage/llcurl.cpp
+++ b/indra/llmessage/llcurl.cpp
@ -49,6 +49,7 @@
 #include "llstl.h"
 #include "llsdserialize.h"
 #include "llthread.h"
+#include "lltimer.h"

 //////////////////////////////////////////////////////////////////////////////
 /*
@ -84,6 +85,26 @@ std::vector<LLMutex*> LLCurl::sSSLMutex;
 std::string LLCurl::sCAPath;
 std::string LLCurl::sCAFile;

+void check_curl_code(CURLcode code)
+{
+	if (code != CURLE_OK)
+	{
+		// linux appears to throw a curl error once per session for a bad initialization
+		// at a pretty random time (when enabling cookies).
+		llinfos << "curl error detected: " << curl_easy_strerror(code) << llendl;
+	}
+}
+
+void check_curl_multi_code(CURLMcode code) 
+{
+	if (code != CURLM_OK)
+	{
+		// linux appears to throw a curl error once per session for a bad initialization
+		// at a pretty random time (when enabling cookies).
+		llinfos << "curl multi error detected: " << curl_multi_strerror(code) << llendl;
+	}
+}
+
 //static
 void LLCurl::setCAPath(const std::string& path)
 {
@ -234,7 +255,12 @@ public:
 	
 	void resetState();

+	static CURL* allocEasyHandle();
+	static void releaseEasyHandle(CURL* handle);
+
 private:	
+	friend class LLCurl;
+
 	CURL*				mCurlEasyHandle;
 	struct curl_slist*	mHeaders;
 	
@ -249,8 +275,62 @@ private:
 	std::vector<char*>	mStrings;
 	
 	ResponderPtr		mResponder;
+
+	static std::set<CURL*> sFreeHandles;
+	static std::set<CURL*> sActiveHandles;
+	static LLMutex* sHandleMutex;
 };

+std::set<CURL*> LLCurl::Easy::sFreeHandles;
+std::set<CURL*> LLCurl::Easy::sActiveHandles;
+LLMutex* LLCurl::Easy::sHandleMutex = NULL;
+
+
+//static
+CURL* LLCurl::Easy::allocEasyHandle()
+{
+	CURL* ret = NULL;
+	LLMutexLock lock(sHandleMutex);
+	if (sFreeHandles.empty())
+	{
+		ret = curl_easy_init();
+	}
+	else
+	{
+		ret = *(sFreeHandles.begin());
+		sFreeHandles.erase(ret);
+		curl_easy_reset(ret);
+	}
+
+	if (ret)
+	{
+		sActiveHandles.insert(ret);
+	}
+
+	return ret;
+}
+
+//static
+void LLCurl::Easy::releaseEasyHandle(CURL* handle)
+{
+	if (!handle)
+	{
+		llerrs << "handle cannot be NULL!" << llendl;
+	}
+
+	LLMutexLock lock(sHandleMutex);
+	
+	if (sActiveHandles.find(handle) != sActiveHandles.end())
+	{
+		sActiveHandles.erase(handle);
+		sFreeHandles.insert(handle);
+	}
+	else
+	{
+		llerrs << "Invalid handle." << llendl;
+	}
+}
+
 LLCurl::Easy::Easy()
 	: mHeaders(NULL),
 	  mCurlEasyHandle(NULL)
@ -261,25 +341,28 @@ LLCurl::Easy::Easy()
 LLCurl::Easy* LLCurl::Easy::getEasy()
 {
 	Easy* easy = new Easy();
-	easy->mCurlEasyHandle = curl_easy_init();
+	easy->mCurlEasyHandle = allocEasyHandle(); 
+	
 	if (!easy->mCurlEasyHandle)
 	{
 		// this can happen if we have too many open files (fails in c-ares/ares_init.c)
-		llwarns << "curl_multi_init() returned NULL! Easy handles: " << gCurlEasyCount << " Multi handles: " << gCurlMultiCount << llendl;
+		llwarns << "allocEasyHandle() returned NULL! Easy handles: " << gCurlEasyCount << " Multi handles: " << gCurlMultiCount << llendl;
 		delete easy;
 		return NULL;
 	}
 	
-	// set no DMS caching as default for all easy handles. This prevents them adopting a
+	// set no DNS caching as default for all easy handles. This prevents them adopting a
 	// multi handles cache if they are added to one.
-	curl_easy_setopt(easy->mCurlEasyHandle, CURLOPT_DNS_CACHE_TIMEOUT, 0);
+	CURLcode result = curl_easy_setopt(easy->mCurlEasyHandle, CURLOPT_DNS_CACHE_TIMEOUT, 0);
+	check_curl_code(result);
+	
 	++gCurlEasyCount;
 	return easy;
 }

 LLCurl::Easy::~Easy()
 {
-	curl_easy_cleanup(mCurlEasyHandle);
+	releaseEasyHandle(mCurlEasyHandle);
 	--gCurlEasyCount;
 	curl_slist_free_all(mHeaders);
 	for_each(mStrings.begin(), mStrings.end(), DeletePointerArray());
@ -338,9 +421,9 @@ void LLCurl::Easy::setHeaders()

 void LLCurl::Easy::getTransferInfo(LLCurl::TransferInfo* info)
 {
-	curl_easy_getinfo(mCurlEasyHandle, CURLINFO_SIZE_DOWNLOAD, &info->mSizeDownload);
-	curl_easy_getinfo(mCurlEasyHandle, CURLINFO_TOTAL_TIME, &info->mTotalTime);
-	curl_easy_getinfo(mCurlEasyHandle, CURLINFO_SPEED_DOWNLOAD, &info->mSpeedDownload);
+	check_curl_code(curl_easy_getinfo(mCurlEasyHandle, CURLINFO_SIZE_DOWNLOAD, &info->mSizeDownload));
+	check_curl_code(curl_easy_getinfo(mCurlEasyHandle, CURLINFO_TOTAL_TIME, &info->mTotalTime));
+	check_curl_code(curl_easy_getinfo(mCurlEasyHandle, CURLINFO_SPEED_DOWNLOAD, &info->mSpeedDownload));
 }

 U32 LLCurl::Easy::report(CURLcode code)
@ -350,13 +433,14 @@ U32 LLCurl::Easy::report(CURLcode code)
 	
 	if (code == CURLE_OK)
 	{
-		curl_easy_getinfo(mCurlEasyHandle, CURLINFO_RESPONSE_CODE, &responseCode);
+		check_curl_code(curl_easy_getinfo(mCurlEasyHandle, CURLINFO_RESPONSE_CODE, &responseCode));
 		//*TODO: get reason from first line of mHeaderOutput
 	}
 	else
 	{
 		responseCode = 499;
 		responseReason = strerror(code) + " : " + mErrorBuffer;
+		setopt(CURLOPT_FRESH_CONNECT, TRUE);
 	}

 	if (mResponder)
@ -372,17 +456,20 @@ U32 LLCurl::Easy::report(CURLcode code)
 // Note: these all assume the caller tracks the value (i.e. keeps it persistant)
 void LLCurl::Easy::setopt(CURLoption option, S32 value)
 {
-	curl_easy_setopt(mCurlEasyHandle, option, value);
+	CURLcode result = curl_easy_setopt(mCurlEasyHandle, option, value);
+	check_curl_code(result);
 }

 void LLCurl::Easy::setopt(CURLoption option, void* value)
 {
-	curl_easy_setopt(mCurlEasyHandle, option, value);
+	CURLcode result = curl_easy_setopt(mCurlEasyHandle, option, value);
+	check_curl_code(result);
 }

 void LLCurl::Easy::setopt(CURLoption option, char* value)
 {
-	curl_easy_setopt(mCurlEasyHandle, option, value);
+	CURLcode result = curl_easy_setopt(mCurlEasyHandle, option, value);
+	check_curl_code(result);
 }

 // Note: this copies the string so that the caller does not have to keep it around
@ -391,7 +478,8 @@ void LLCurl::Easy::setoptString(CURLoption option, const std::string& value)
 	char* tstring = new char[value.length()+1];
 	strcpy(tstring, value.c_str());
 	mStrings.push_back(tstring);
-	curl_easy_setopt(mCurlEasyHandle, option, tstring);
+	CURLcode result = curl_easy_setopt(mCurlEasyHandle, option, tstring);
+	check_curl_code(result);
 }

 void LLCurl::Easy::slist_append(const char* str)
@ -443,7 +531,7 @@ void LLCurl::Easy::prepRequest(const std::string& url,
 	
 	if (post) setoptString(CURLOPT_ENCODING, "");

-//	setopt(CURLOPT_VERBOSE, 1); // usefull for debugging
+	//setopt(CURLOPT_VERBOSE, 1); // usefull for debugging
 	setopt(CURLOPT_NOSIGNAL, 1);

 	mOutput.reset(new LLBufferArray);
@ -467,6 +555,9 @@ void LLCurl::Easy::prepRequest(const std::string& url,
 	setCA();

 	setopt(CURLOPT_SSL_VERIFYPEER, true);
+	
+	//don't verify host name so urls with scrubbed host names will work (improves DNS performance)
+	setopt(CURLOPT_SSL_VERIFYHOST, 0);
 	setopt(CURLOPT_TIMEOUT, CURL_REQUEST_TIMEOUT);

 	setoptString(CURLOPT_URL, url);
@ -532,6 +623,7 @@ LLCurl::Multi::Multi()
 		llwarns << "curl_multi_init() returned NULL! Easy handles: " << gCurlEasyCount << " Multi handles: " << gCurlMultiCount << llendl;
 		mCurlMultiHandle = curl_multi_init();
 	}
+	
 	llassert_always(mCurlMultiHandle);
 	++gCurlMultiCount;
 }
@ -543,7 +635,7 @@ LLCurl::Multi::~Multi()
 		iter != mEasyActiveList.end(); ++iter)
 	{
 		Easy* easy = *iter;
-		curl_multi_remove_handle(mCurlMultiHandle, easy->getCurlHandle());
+		check_curl_multi_code(curl_multi_remove_handle(mCurlMultiHandle, easy->getCurlHandle()));
 		delete easy;
 	}
 	mEasyActiveList.clear();
@ -553,7 +645,7 @@ LLCurl::Multi::~Multi()
 	for_each(mEasyFreeList.begin(), mEasyFreeList.end(), DeletePointer());	
 	mEasyFreeList.clear();

-	curl_multi_cleanup(mCurlMultiHandle);
+	check_curl_multi_code(curl_multi_cleanup(mCurlMultiHandle));
 	--gCurlMultiCount;
 }

@ -574,8 +666,10 @@ S32 LLCurl::Multi::perform()
 		CURLMcode code = curl_multi_perform(mCurlMultiHandle, &q);
 		if (CURLM_CALL_MULTI_PERFORM != code || q == 0)
 		{
+			check_curl_multi_code(code);
 			break;
 		}
+	
 	}
 	mQueued = q;
 	return q;
@ -642,11 +736,12 @@ LLCurl::Easy* LLCurl::Multi::allocEasy()
 bool LLCurl::Multi::addEasy(Easy* easy)
 {
 	CURLMcode mcode = curl_multi_add_handle(mCurlMultiHandle, easy->getCurlHandle());
-	if (mcode != CURLM_OK)
-	{
-		llwarns << "Curl Error: " << curl_multi_strerror(mcode) << llendl;
-		return false;
-	}
+	check_curl_multi_code(mcode);
+	//if (mcode != CURLM_OK)
+	//{
+	//	llwarns << "Curl Error: " << curl_multi_strerror(mcode) << llendl;
+	//	return false;
+	//}
 	return true;
 }

@ -667,7 +762,7 @@ void LLCurl::Multi::easyFree(Easy* easy)

 void LLCurl::Multi::removeEasy(Easy* easy)
 {
-	curl_multi_remove_handle(mCurlMultiHandle, easy->getCurlHandle());
+	check_curl_multi_code(curl_multi_remove_handle(mCurlMultiHandle, easy->getCurlHandle()));
 	easyFree(easy);
 }

@ -686,6 +781,7 @@ LLCurlRequest::LLCurlRequest() :
 	mActiveRequestCount(0)
 {
 	mThreadID = LLThread::currentID();
+	mProcessing = FALSE;
 }

 LLCurlRequest::~LLCurlRequest()
@ -720,6 +816,11 @@ LLCurl::Easy* LLCurlRequest::allocEasy()
 bool LLCurlRequest::addEasy(LLCurl::Easy* easy)
 {
 	llassert_always(mActiveMulti);
+	
+	if (mProcessing)
+	{
+		llerrs << "Posting to a LLCurlRequest instance from within a responder is not allowed (causes DNS timeouts)." << llendl;
+	}
 	bool res = mActiveMulti->addEasy(easy);
 	return res;
 }
@ -777,12 +878,41 @@ bool LLCurlRequest::post(const std::string& url,
 	bool res = addEasy(easy);
 	return res;
 }
+
+bool LLCurlRequest::post(const std::string& url,
+						 const headers_t& headers,
+						 const std::string& data,
+						 LLCurl::ResponderPtr responder)
+{
+	LLCurl::Easy* easy = allocEasy();
+	if (!easy)
+	{
+		return false;
+	}
+	easy->prepRequest(url, headers, responder);
+
+	easy->getInput().write(data.data(), data.size());
+	S32 bytes = easy->getInput().str().length();
 	
+	easy->setopt(CURLOPT_POST, 1);
+	easy->setopt(CURLOPT_POSTFIELDS, (void*)NULL);
+	easy->setopt(CURLOPT_POSTFIELDSIZE, bytes);
+
+	easy->slist_append("Content-Type: application/octet-stream");
+	easy->setHeaders();
+
+	lldebugs << "POSTING: " << bytes << " bytes." << llendl;
+	bool res = addEasy(easy);
+	return res;
+}
+
 // Note: call once per frame
 S32 LLCurlRequest::process()
 {
 	llassert_always(mThreadID == LLThread::currentID());
 	S32 res = 0;
+
+	mProcessing = TRUE;
 	for (curlmulti_set_t::iterator iter = mMultiSet.begin();
 		 iter != mMultiSet.end(); )
 	{
@ -796,6 +926,7 @@ S32 LLCurlRequest::process()
 			delete multi;
 		}
 	}
+	mProcessing = FALSE;
 	return res;
 }

@ -1025,8 +1156,12 @@ void LLCurl::initClass()
 	// Do not change this "unless you are familiar with and mean to control 
 	// internal operations of libcurl"
 	// - http://curl.haxx.se/libcurl/c/curl_global_init.html
-	curl_global_init(CURL_GLOBAL_ALL);
+	CURLcode code = curl_global_init(CURL_GLOBAL_ALL);
+
+	check_curl_code(code);
 	
+	Easy::sHandleMutex = new LLMutex(NULL);
+
 #if SAFE_SSL
 	S32 mutex_count = CRYPTO_num_locks();
 	for (S32 i=0; i<mutex_count; i++)
@ -1044,7 +1179,19 @@ void LLCurl::cleanupClass()
 	CRYPTO_set_locking_callback(NULL);
 	for_each(sSSLMutex.begin(), sSSLMutex.end(), DeletePointer());
 #endif
-	curl_global_cleanup();
+
+	delete Easy::sHandleMutex;
+	Easy::sHandleMutex = NULL;
+
+	for (std::set<CURL*>::iterator iter = Easy::sFreeHandles.begin(); iter != Easy::sFreeHandles.end(); ++iter)
+	{
+		CURL* curl = *iter;
+		curl_easy_cleanup(curl);
+	}
+
+	Easy::sFreeHandles.clear();
+
+	llassert(Easy::sActiveHandles.empty());
 }

 const unsigned int LLCurl::MAX_REDIRECTS = 5;
--- a/indra/llmessage/llcurl.h
+++ b/indra/llmessage/llcurl.h
@ -202,6 +202,8 @@ public:
 	void get(const std::string& url, LLCurl::ResponderPtr responder);
 	bool getByteRange(const std::string& url, const headers_t& headers, S32 offset, S32 length, LLCurl::ResponderPtr responder);
 	bool post(const std::string& url, const headers_t& headers, const LLSD& data, LLCurl::ResponderPtr responder);
+	bool post(const std::string& url, const headers_t& headers, const std::string& data, LLCurl::ResponderPtr responder);
+	
 	S32  process();
 	S32  getQueued();

@ -215,6 +217,7 @@ private:
 	curlmulti_set_t mMultiSet;
 	LLCurl::Multi* mActiveMulti;
 	S32 mActiveRequestCount;
+	BOOL mProcessing;
 	U32 mThreadID; // debug
 };

--- a/indra/llmessage/llhttpassetstorage.cpp
+++ b/indra/llmessage/llhttpassetstorage.cpp
@ -174,8 +174,8 @@ LLSD LLHTTPAssetRequest::getFullDetails() const
 		double curl_total_time = -1.0f;
 		double curl_size_upload = -1.0f;
 		double curl_size_download = -1.0f;
-		long curl_content_length_upload = -1;
-		long curl_content_length_download = -1;
+		double curl_content_length_upload = -1.0f;
+		double curl_content_length_download = -1.0f;
 		long curl_request_size = -1;
 		const char* curl_content_type = NULL;

@ -194,8 +194,8 @@ LLSD LLHTTPAssetRequest::getFullDetails() const
 		sd["curl_total_time"] = curl_total_time;
 		sd["curl_size_upload"]   = curl_size_upload;
 		sd["curl_size_download"] = curl_size_download;
-		sd["curl_content_length_upload"]   = (int) curl_content_length_upload;
-		sd["curl_content_length_download"] = (int) curl_content_length_download;
+		sd["curl_content_length_upload"]   =  curl_content_length_upload;
+		sd["curl_content_length_download"] =  curl_content_length_download;
 		sd["curl_request_size"] = (int) curl_request_size;
 		if (curl_content_type)
 		{
--- a/indra/llmessage/llsdmessagebuilder.cpp
+++ b/indra/llmessage/llsdmessagebuilder.cpp
@ -29,6 +29,7 @@
 #include "llsdmessagebuilder.h"

 #include "llmessagetemplate.h"
+#include "llmath.h"
 #include "llquaternion.h"
 #include "llsdutil.h"
 #include "llsdutil_math.h"
--- a/indra/llmessage/lltemplatemessagebuilder.cpp
+++ b/indra/llmessage/lltemplatemessagebuilder.cpp
@ -29,6 +29,7 @@
 #include "lltemplatemessagebuilder.h"

 #include "llmessagetemplate.h"
+#include "llmath.h"
 #include "llquaternion.h"
 #include "u64.h"
 #include "v3dmath.h"
--- a/Show More
+++ b/Show More