diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig
index 9f1ee9a..ca8e642 100644
--- a/build2/kram.xcconfig
+++ b/build2/kram.xcconfig
@@ -8,10 +8,10 @@
 // Also turn on -ftime-trace to review build times in kram-profile.
 
 KRAM_FLAGS_X64 =
-KRAM_FLAGS_X64[arch=x86_64] = -mf16c -mfma
+KRAM_FLAGS_X64[sdk=*][arch=x86_64] = -mf16c -mfma
 
 KRAM_FLAGS_RELEASE =
-KRAM_FLAGS_RELEASE[config=Release] = -DNDEBUG=1
+KRAM_FLAGS_RELEASE[sdk=*][config=Release] = -DNDEBUG=1
 
 KRAM_FLAGS = -ftime-trace
 KRAM_FLAGS = $(KRAM_FLAGS) -DUSE_SIMDLIB=1 -DUSE_SIMDLIBMODULE=1
diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm
index 68e80ed..6f5d757 100644
--- a/kramv/KramRenderer.mm
+++ b/kramv/KramRenderer.mm
@@ -1492,7 +1492,7 @@ - (void)_updateGameState
 
     // This is per object
     uniforms.modelMatrix = _data->_modelMatrix;
-    uniforms.modelMatrixInvScale2 = _data->_modelMatrixInvScale2;
+   // uniforms.modelMatrixInvScale2 = _data->_modelMatrixInvScale2;
 
     //_rotation += .01;
 }
diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 47daab4..057447c 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -3052,8 +3052,8 @@ void Data::updateTransforms()
         inverse(_viewMatrix).columns[3].xyz; // this is all ortho
 
     // obj specific
-    _modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
-    _showSettings->isInverted = _modelMatrixInvScale2.w < 0.0f;
+    float4 modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix);
+    _showSettings->isInverted = modelMatrixInvScale2.w < 0.0f;
 }
 
 float4x4 Data::computeImageTransform(float panX, float panY, float zoom)
diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h
index 6af1f76..92f9395 100644
--- a/kramv/KramViewerBase.h
+++ b/kramv/KramViewerBase.h
@@ -521,7 +521,7 @@ struct Data {
 
     // object specific
     float4x4 _modelMatrix;
-    float4 _modelMatrixInvScale2;
+    //float4 _modelMatrixInvScale2;
     float4x4 _modelMatrix2D;
     float4x4 _modelMatrix3D;
 
diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h
index e269a36..dbd63e6 100644
--- a/kramv/Shaders/KramShaders.h
+++ b/kramv/Shaders/KramShaders.h
@@ -114,7 +114,7 @@ typedef NS_ENUM(int32_t, ShaderLightingMode) {
 struct Uniforms {
     SIMD_NAMESPACE::float4x4 projectionViewMatrix;
     SIMD_NAMESPACE::float4x4 modelMatrix;
-    SIMD_NAMESPACE::float4 modelMatrixInvScale2;  // to supply inverse, w is determinant
+    //SIMD_NAMESPACE::float4 modelMatrixInvScale2;  // to supply inverse, w is determinant
     SIMD_NAMESPACE::float3 cameraPosition;        // world-space
     float uvPreview;
     float uvToShapeRatio;
diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal
index bc435a4..5671a78 100644
--- a/kramv/Shaders/KramShaders.metal
+++ b/kramv/Shaders/KramShaders.metal
@@ -519,24 +519,28 @@ inline float3x3 toFloat3x3(float4x4 m)
     return float3x3(m[0].xyz, m[1].xyz, m[2].xyz);
 }
 
+// This works even with scale of 0 and is correct over using invT.
+// The normal will be normalize anyways.  Also saves sending down another tfm.
+inline float3x3 adjoint(float3x3 m)
+{
+    return float3x3(cross(m[1], m[2]),
+                    cross(m[2], m[0]),
+                    cross(m[0], m[1]));
+}
+
+
 // this is for vertex shader if tangent supplied
 void transformBasis(thread float3& normal, thread float3& tangent,
-                    float4x4 modelToWorldTfm, float3 invScale2, bool useTangent)
+                    float4x4 modelToWorldTfm, bool useTangent)
 {
     
     float3x3 m = toFloat3x3(modelToWorldTfm);
     
-    // note this is RinvT * n = (Rt)t = R, this is for simple inverse, inv scale handled below
-    // but uniform scale already handled by normalize
-    normal = m * normal;
-    normal *= invScale2;
+    normal = adjoint(m) * normal;
     normal = normalize(normal);
    
-    // question here of whether tangent is transformed by m or mInvT
-    // most apps assume m, but after averaging it can be just as off the surface as the normal
     if (useTangent) {
         tangent = m * tangent;
-        tangent *= invScale2;
         tangent = normalize(tangent);
     }
     
@@ -622,7 +626,7 @@ ColorInOut DrawImageFunc(
     
     if (needsWorldBasis) {
         float3 t = tangent.xyz;
-        transformBasis(normal, t, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent);
+        transformBasis(normal, t, uniforms.modelMatrix, uniforms.useTangent);
         tangent.xyz = t;
         
         out.normal = toHalf(normal);
diff --git a/libkram/vectormath/double234.h b/libkram/vectormath/double234.h
index ffde920..4ac2432 100644
--- a/libkram/vectormath/double234.h
+++ b/libkram/vectormath/double234.h
@@ -106,9 +106,6 @@ SIMD_CALL double4 zeroext(double3 x)
 
 #if SIMD_NEON
 
-// TODO: expose double2 ops on Neon.
-// think I have to, so that 4 can call 2x2 with hi/lo
-
 SIMD_CALL double reduce_min(double2 x)
 {
     return vminvq_f64(x);
diff --git a/libkram/vectormath/float234.h b/libkram/vectormath/float234.h
index b246ad4..62e9e4f 100644
--- a/libkram/vectormath/float234.h
+++ b/libkram/vectormath/float234.h
@@ -929,9 +929,13 @@ SIMD_CALL quatf operator-(quatf q)
 SIMD_CALL float3 operator*(quatf q, float3 v)
 {
     // see https://fgiesen.wordpress.com/2019/02/09/rotating-a-single-vector-using-a-quaternion/
+    //float4 qv = q.v;
+    //float3 t = 2.0f * cross(qv.xyz, v);
+    //return v + qv.w * t + cross(qv.xyz, t);
+    
+    // simplified form of above
     float4 qv = q.v;
-    float3 t = 2.0f * cross(qv.xyz, v);
-    return v + qv.w * t + cross(qv.xyz, t);
+    return v + 2.0 * cross(qv.xyz, cross(qv.xyz, v) + qv.w * v);
 }
 
 SIMD_CALL bool equal(quatf x, quatf y)