Merge pull request #305 from ocheron/p256-point-mul

Better P256.pointMul performance
2020-01-12 18:20:17 +01:00 · 2020-01-12 18:20:17 +01:00 · 7f1c2980e2
commit 7f1c2980e2
parent 1f6ed5711c 7ac3060873
2 changed files with 28 additions and 13 deletions
--- a/Crypto/PubKey/ECC/P256.hs
+++ b/Crypto/PubKey/ECC/P256.hs
@ -124,16 +124,16 @@ pointNegate a = withNewPoint $ \dx dy ->
 -- warning: variable time
 pointMul :: Scalar -> Point -> Point
 pointMul scalar p = withNewPoint $ \dx dy ->
-    withScalar scalar $ \n -> withPoint p $ \px py -> withScalarZero $ \nzero ->
-        ccryptonite_p256_points_mul_vartime nzero n px py dx dy
+    withScalar scalar $ \n -> withPoint p $ \px py ->
+        ccryptonite_p256e_point_mul n px py dx dy

 -- | Similar to 'pointMul', serializing the x coordinate as binary.
 -- When scalar is multiple of point order the result is all zero.
 pointDh :: ByteArray binary => Scalar -> Point -> binary
 pointDh scalar p =
    B.unsafeCreate scalarSize $ \dst -> withTempPoint $ \dx dy -> do
-        withScalar scalar $ \n -> withPoint p $ \px py -> withScalarZero $ \nzero ->
-            ccryptonite_p256_points_mul_vartime nzero n px py dx dy
+        withScalar scalar $ \n -> withPoint p $ \px py ->
+            ccryptonite_p256e_point_mul n px py dx dy
        ccryptonite_p256_to_bin (castPtr dx) dst

 -- | multiply the point @p with @n2 and add a lifted to curve value @n1
@ -338,18 +338,9 @@ withNewScalarFreeze f = Scalar $ B.allocAndFreeze scalarSize f
 withTempPoint :: (Ptr P256X -> Ptr P256Y -> IO a) -> IO a
 withTempPoint f = allocTempScrubbed pointSize (\p -> let px = castPtr p in f px (pxToPy px))

-withTempScalar :: (Ptr P256Scalar -> IO a) -> IO a
-withTempScalar f = allocTempScrubbed scalarSize (f . castPtr)
-
 withScalar :: Scalar -> (Ptr P256Scalar -> IO a) -> IO a
 withScalar (Scalar d) f = B.withByteArray d f

-withScalarZero :: (Ptr P256Scalar -> IO a) -> IO a
-withScalarZero f =
-    withTempScalar $ \d -> do
-        ccryptonite_p256_init d
-        f d
-
 allocTemp :: Int -> (Ptr Word8 -> IO a) -> IO a
 allocTemp n f = ignoreSnd <$> B.allocRet n f
  where
@ -412,6 +403,13 @@ foreign import ccall "cryptonite_p256e_point_negate"
                                   -> Ptr P256X -> Ptr P256Y
                                   -> IO ()

+-- compute (out_x,out_y) = n * (in_x,in_y)
+foreign import ccall "cryptonite_p256e_point_mul"
+    ccryptonite_p256e_point_mul :: Ptr P256Scalar -- n
+                                -> Ptr P256X -> Ptr P256Y -- in_{x,y}
+                                -> Ptr P256X -> Ptr P256Y -- out_{x,y}
+                                -> IO ()
+
 -- compute (out_x,out,y) = n1 * G + n2 * (in_x,in_y)
 foreign import ccall "cryptonite_p256_points_mul_vartime"
    ccryptonite_p256_points_mul_vartime :: Ptr P256Scalar -- n1
--- a/cbits/p256/p256_ec.c
+++ b/cbits/p256/p256_ec.c
@ -1311,3 +1311,20 @@ void cryptonite_p256e_point_negate(
    memcpy(out_x, in_x, P256_NBYTES);
    cryptonite_p256_sub(&cryptonite_SECP256r1_p, in_y, out_y);
 }
+
+/* this function is not part of the original source
+   cryptonite_p256e_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where
+   n is < the order of the group.
+ */
+void cryptonite_p256e_point_mul(const cryptonite_p256_int* n,
+    const cryptonite_p256_int* in_x, const cryptonite_p256_int* in_y,
+    cryptonite_p256_int* out_x, cryptonite_p256_int* out_y) {
+  felem x, y, z, px, py;
+
+  to_montgomery(px, in_x);
+  to_montgomery(py, in_y);
+  scalar_mult(x, y, z, px, py, n);
+  point_to_affine(px, py, x, y, z);
+  from_montgomery(out_x, px);
+  from_montgomery(out_y, py);
+}