libtommath.c 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370
  1. /*
  2. * Minimal code for RSA support from LibTomMath 0.3.9
  3. * http://math.libtomcrypt.com/
  4. * http://math.libtomcrypt.com/files/ltm-0.39.tar.bz2
  5. * This library was released in public domain by Tom St Denis.
  6. *
  7. * The combination in this file is not using many of the optimized algorithms
  8. * (e.g., Montgomery reduction) and is considerable slower than the LibTomMath
  9. * with its default of SC_RSA_1 settins. The main purpose of having this
  10. * version here is to make it easier to build bignum.c wrapper without having
  11. * to install and build an external library. However, it is likely worth the
  12. * effort to use the full library with SC_RSA_1 instead of this minimized copy.
  13. * Including the optimized algorithms may increase the size requirements by
  14. * 15 kB or so (measured with x86 build).
  15. *
  16. * If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this
  17. * libtommath.c file instead of using the external LibTomMath library.
  18. */
  19. #ifndef CHAR_BIT
  20. #define CHAR_BIT 8
  21. #endif
  22. #define BN_MP_INVMOD_C
  23. #define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would
  24. * require BN_MP_EXPTMOD_FAST_C instead */
  25. #define BN_S_MP_MUL_DIGS_C
  26. #define BN_MP_INVMOD_SLOW_C
  27. #define BN_S_MP_SQR_C
  28. #define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this
  29. * would require other than mp_reduce */
  30. /* from tommath.h */
  31. #ifndef MIN
  32. #define MIN(x,y) ((x)<(y)?(x):(y))
  33. #endif
  34. #ifndef MAX
  35. #define MAX(x,y) ((x)>(y)?(x):(y))
  36. #endif
  37. #define OPT_CAST(x)
  38. typedef unsigned long mp_digit;
  39. typedef u64 mp_word;
  40. #define DIGIT_BIT 28
  41. #define MP_28BIT
  42. #define XMALLOC os_malloc
  43. #define XFREE os_free
  44. #define XREALLOC os_realloc
  45. #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
  46. #define MP_LT -1 /* less than */
  47. #define MP_EQ 0 /* equal to */
  48. #define MP_GT 1 /* greater than */
  49. #define MP_ZPOS 0 /* positive integer */
  50. #define MP_NEG 1 /* negative */
  51. #define MP_OKAY 0 /* ok result */
  52. #define MP_MEM -2 /* out of mem */
  53. #define MP_VAL -3 /* invalid input */
  54. #define MP_YES 1 /* yes response */
  55. #define MP_NO 0 /* no response */
  56. typedef int mp_err;
  57. /* define this to use lower memory usage routines (exptmods mostly) */
  58. #define MP_LOW_MEM
  59. /* default precision */
  60. #ifndef MP_PREC
  61. #ifndef MP_LOW_MEM
  62. #define MP_PREC 32 /* default digits of precision */
  63. #else
  64. #define MP_PREC 8 /* default digits of precision */
  65. #endif
  66. #endif
  67. /* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */
  68. #define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
  69. /* the infamous mp_int structure */
  70. typedef struct {
  71. int used, alloc, sign;
  72. mp_digit *dp;
  73. } mp_int;
  74. /* ---> Basic Manipulations <--- */
  75. #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
  76. #define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
  77. #define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
  78. /* prototypes for copied functions */
  79. #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
  80. static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
  81. static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
  82. static int s_mp_sqr(mp_int * a, mp_int * b);
  83. static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs);
  84. static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
  85. static int mp_init_multi(mp_int *mp, ...);
  86. static void mp_clear_multi(mp_int *mp, ...);
  87. static int mp_lshd(mp_int * a, int b);
  88. static void mp_set(mp_int * a, mp_digit b);
  89. static void mp_clamp(mp_int * a);
  90. static void mp_exch(mp_int * a, mp_int * b);
  91. static void mp_rshd(mp_int * a, int b);
  92. static void mp_zero(mp_int * a);
  93. static int mp_mod_2d(mp_int * a, int b, mp_int * c);
  94. static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d);
  95. static int mp_init_copy(mp_int * a, mp_int * b);
  96. static int mp_mul_2d(mp_int * a, int b, mp_int * c);
  97. static int mp_div_2(mp_int * a, mp_int * b);
  98. static int mp_copy(mp_int * a, mp_int * b);
  99. static int mp_count_bits(mp_int * a);
  100. static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d);
  101. static int mp_mod(mp_int * a, mp_int * b, mp_int * c);
  102. static int mp_grow(mp_int * a, int size);
  103. static int mp_cmp_mag(mp_int * a, mp_int * b);
  104. static int mp_invmod(mp_int * a, mp_int * b, mp_int * c);
  105. static int mp_abs(mp_int * a, mp_int * b);
  106. static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c);
  107. static int mp_sqr(mp_int * a, mp_int * b);
  108. static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
  109. static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
  110. static int mp_2expt(mp_int * a, int b);
  111. static int mp_reduce_setup(mp_int * a, mp_int * b);
  112. static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu);
  113. static int mp_init_size(mp_int * a, int size);
  114. /* functions from bn_<func name>.c */
  115. /* reverse an array, used for radix code */
  116. static void bn_reverse (unsigned char *s, int len)
  117. {
  118. int ix, iy;
  119. unsigned char t;
  120. ix = 0;
  121. iy = len - 1;
  122. while (ix < iy) {
  123. t = s[ix];
  124. s[ix] = s[iy];
  125. s[iy] = t;
  126. ++ix;
  127. --iy;
  128. }
  129. }
  130. /* low level addition, based on HAC pp.594, Algorithm 14.7 */
  131. static int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
  132. {
  133. mp_int *x;
  134. int olduse, res, min, max;
  135. /* find sizes, we let |a| <= |b| which means we have to sort
  136. * them. "x" will point to the input with the most digits
  137. */
  138. if (a->used > b->used) {
  139. min = b->used;
  140. max = a->used;
  141. x = a;
  142. } else {
  143. min = a->used;
  144. max = b->used;
  145. x = b;
  146. }
  147. /* init result */
  148. if (c->alloc < max + 1) {
  149. if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
  150. return res;
  151. }
  152. }
  153. /* get old used digit count and set new one */
  154. olduse = c->used;
  155. c->used = max + 1;
  156. {
  157. register mp_digit u, *tmpa, *tmpb, *tmpc;
  158. register int i;
  159. /* alias for digit pointers */
  160. /* first input */
  161. tmpa = a->dp;
  162. /* second input */
  163. tmpb = b->dp;
  164. /* destination */
  165. tmpc = c->dp;
  166. /* zero the carry */
  167. u = 0;
  168. for (i = 0; i < min; i++) {
  169. /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
  170. *tmpc = *tmpa++ + *tmpb++ + u;
  171. /* U = carry bit of T[i] */
  172. u = *tmpc >> ((mp_digit)DIGIT_BIT);
  173. /* take away carry bit from T[i] */
  174. *tmpc++ &= MP_MASK;
  175. }
  176. /* now copy higher words if any, that is in A+B
  177. * if A or B has more digits add those in
  178. */
  179. if (min != max) {
  180. for (; i < max; i++) {
  181. /* T[i] = X[i] + U */
  182. *tmpc = x->dp[i] + u;
  183. /* U = carry bit of T[i] */
  184. u = *tmpc >> ((mp_digit)DIGIT_BIT);
  185. /* take away carry bit from T[i] */
  186. *tmpc++ &= MP_MASK;
  187. }
  188. }
  189. /* add carry */
  190. *tmpc++ = u;
  191. /* clear digits above oldused */
  192. for (i = c->used; i < olduse; i++) {
  193. *tmpc++ = 0;
  194. }
  195. }
  196. mp_clamp (c);
  197. return MP_OKAY;
  198. }
  199. /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
  200. static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
  201. {
  202. int olduse, res, min, max;
  203. /* find sizes */
  204. min = b->used;
  205. max = a->used;
  206. /* init result */
  207. if (c->alloc < max) {
  208. if ((res = mp_grow (c, max)) != MP_OKAY) {
  209. return res;
  210. }
  211. }
  212. olduse = c->used;
  213. c->used = max;
  214. {
  215. register mp_digit u, *tmpa, *tmpb, *tmpc;
  216. register int i;
  217. /* alias for digit pointers */
  218. tmpa = a->dp;
  219. tmpb = b->dp;
  220. tmpc = c->dp;
  221. /* set carry to zero */
  222. u = 0;
  223. for (i = 0; i < min; i++) {
  224. /* T[i] = A[i] - B[i] - U */
  225. *tmpc = *tmpa++ - *tmpb++ - u;
  226. /* U = carry bit of T[i]
  227. * Note this saves performing an AND operation since
  228. * if a carry does occur it will propagate all the way to the
  229. * MSB. As a result a single shift is enough to get the carry
  230. */
  231. u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
  232. /* Clear carry from T[i] */
  233. *tmpc++ &= MP_MASK;
  234. }
  235. /* now copy higher words if any, e.g. if A has more digits than B */
  236. for (; i < max; i++) {
  237. /* T[i] = A[i] - U */
  238. *tmpc = *tmpa++ - u;
  239. /* U = carry bit of T[i] */
  240. u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
  241. /* Clear carry from T[i] */
  242. *tmpc++ &= MP_MASK;
  243. }
  244. /* clear digits above used (since we may not have grown result above) */
  245. for (i = c->used; i < olduse; i++) {
  246. *tmpc++ = 0;
  247. }
  248. }
  249. mp_clamp (c);
  250. return MP_OKAY;
  251. }
  252. /* init a new mp_int */
  253. static int mp_init (mp_int * a)
  254. {
  255. int i;
  256. /* allocate memory required and clear it */
  257. a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
  258. if (a->dp == NULL) {
  259. return MP_MEM;
  260. }
  261. /* set the digits to zero */
  262. for (i = 0; i < MP_PREC; i++) {
  263. a->dp[i] = 0;
  264. }
  265. /* set the used to zero, allocated digits to the default precision
  266. * and sign to positive */
  267. a->used = 0;
  268. a->alloc = MP_PREC;
  269. a->sign = MP_ZPOS;
  270. return MP_OKAY;
  271. }
  272. /* clear one (frees) */
  273. static void mp_clear (mp_int * a)
  274. {
  275. int i;
  276. /* only do anything if a hasn't been freed previously */
  277. if (a->dp != NULL) {
  278. /* first zero the digits */
  279. for (i = 0; i < a->used; i++) {
  280. a->dp[i] = 0;
  281. }
  282. /* free ram */
  283. XFREE(a->dp);
  284. /* reset members to make debugging easier */
  285. a->dp = NULL;
  286. a->alloc = a->used = 0;
  287. a->sign = MP_ZPOS;
  288. }
  289. }
  290. /* high level addition (handles signs) */
  291. static int mp_add (mp_int * a, mp_int * b, mp_int * c)
  292. {
  293. int sa, sb, res;
  294. /* get sign of both inputs */
  295. sa = a->sign;
  296. sb = b->sign;
  297. /* handle two cases, not four */
  298. if (sa == sb) {
  299. /* both positive or both negative */
  300. /* add their magnitudes, copy the sign */
  301. c->sign = sa;
  302. res = s_mp_add (a, b, c);
  303. } else {
  304. /* one positive, the other negative */
  305. /* subtract the one with the greater magnitude from */
  306. /* the one of the lesser magnitude. The result gets */
  307. /* the sign of the one with the greater magnitude. */
  308. if (mp_cmp_mag (a, b) == MP_LT) {
  309. c->sign = sb;
  310. res = s_mp_sub (b, a, c);
  311. } else {
  312. c->sign = sa;
  313. res = s_mp_sub (a, b, c);
  314. }
  315. }
  316. return res;
  317. }
  318. /* high level subtraction (handles signs) */
  319. static int mp_sub (mp_int * a, mp_int * b, mp_int * c)
  320. {
  321. int sa, sb, res;
  322. sa = a->sign;
  323. sb = b->sign;
  324. if (sa != sb) {
  325. /* subtract a negative from a positive, OR */
  326. /* subtract a positive from a negative. */
  327. /* In either case, ADD their magnitudes, */
  328. /* and use the sign of the first number. */
  329. c->sign = sa;
  330. res = s_mp_add (a, b, c);
  331. } else {
  332. /* subtract a positive from a positive, OR */
  333. /* subtract a negative from a negative. */
  334. /* First, take the difference between their */
  335. /* magnitudes, then... */
  336. if (mp_cmp_mag (a, b) != MP_LT) {
  337. /* Copy the sign from the first */
  338. c->sign = sa;
  339. /* The first has a larger or equal magnitude */
  340. res = s_mp_sub (a, b, c);
  341. } else {
  342. /* The result has the *opposite* sign from */
  343. /* the first number. */
  344. c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
  345. /* The second has a larger magnitude */
  346. res = s_mp_sub (b, a, c);
  347. }
  348. }
  349. return res;
  350. }
  351. /* high level multiplication (handles sign) */
  352. static int mp_mul (mp_int * a, mp_int * b, mp_int * c)
  353. {
  354. int res, neg;
  355. neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
  356. /* use Toom-Cook? */
  357. #ifdef BN_MP_TOOM_MUL_C
  358. if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
  359. res = mp_toom_mul(a, b, c);
  360. } else
  361. #endif
  362. #ifdef BN_MP_KARATSUBA_MUL_C
  363. /* use Karatsuba? */
  364. if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
  365. res = mp_karatsuba_mul (a, b, c);
  366. } else
  367. #endif
  368. {
  369. /* can we use the fast multiplier?
  370. *
  371. * The fast multiplier can be used if the output will
  372. * have less than MP_WARRAY digits and the number of
  373. * digits won't affect carry propagation
  374. */
  375. #ifdef BN_FAST_S_MP_MUL_DIGS_C
  376. int digs = a->used + b->used + 1;
  377. if ((digs < MP_WARRAY) &&
  378. MIN(a->used, b->used) <=
  379. (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
  380. res = fast_s_mp_mul_digs (a, b, c, digs);
  381. } else
  382. #endif
  383. #ifdef BN_S_MP_MUL_DIGS_C
  384. res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
  385. #else
  386. #error mp_mul could fail
  387. res = MP_VAL;
  388. #endif
  389. }
  390. c->sign = (c->used > 0) ? neg : MP_ZPOS;
  391. return res;
  392. }
  393. /* d = a * b (mod c) */
  394. static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
  395. {
  396. int res;
  397. mp_int t;
  398. if ((res = mp_init (&t)) != MP_OKAY) {
  399. return res;
  400. }
  401. if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
  402. mp_clear (&t);
  403. return res;
  404. }
  405. res = mp_mod (&t, c, d);
  406. mp_clear (&t);
  407. return res;
  408. }
  409. /* c = a mod b, 0 <= c < b */
  410. static int mp_mod (mp_int * a, mp_int * b, mp_int * c)
  411. {
  412. mp_int t;
  413. int res;
  414. if ((res = mp_init (&t)) != MP_OKAY) {
  415. return res;
  416. }
  417. if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
  418. mp_clear (&t);
  419. return res;
  420. }
  421. if (t.sign != b->sign) {
  422. res = mp_add (b, &t, c);
  423. } else {
  424. res = MP_OKAY;
  425. mp_exch (&t, c);
  426. }
  427. mp_clear (&t);
  428. return res;
  429. }
  430. /* this is a shell function that calls either the normal or Montgomery
  431. * exptmod functions. Originally the call to the montgomery code was
  432. * embedded in the normal function but that wasted alot of stack space
  433. * for nothing (since 99% of the time the Montgomery code would be called)
  434. */
  435. static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
  436. {
  437. int dr;
  438. /* modulus P must be positive */
  439. if (P->sign == MP_NEG) {
  440. return MP_VAL;
  441. }
  442. /* if exponent X is negative we have to recurse */
  443. if (X->sign == MP_NEG) {
  444. #ifdef BN_MP_INVMOD_C
  445. mp_int tmpG, tmpX;
  446. int err;
  447. /* first compute 1/G mod P */
  448. if ((err = mp_init(&tmpG)) != MP_OKAY) {
  449. return err;
  450. }
  451. if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
  452. mp_clear(&tmpG);
  453. return err;
  454. }
  455. /* now get |X| */
  456. if ((err = mp_init(&tmpX)) != MP_OKAY) {
  457. mp_clear(&tmpG);
  458. return err;
  459. }
  460. if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
  461. mp_clear_multi(&tmpG, &tmpX, NULL);
  462. return err;
  463. }
  464. /* and now compute (1/G)**|X| instead of G**X [X < 0] */
  465. err = mp_exptmod(&tmpG, &tmpX, P, Y);
  466. mp_clear_multi(&tmpG, &tmpX, NULL);
  467. return err;
  468. #else
  469. #error mp_exptmod would always fail
  470. /* no invmod */
  471. return MP_VAL;
  472. #endif
  473. }
  474. /* modified diminished radix reduction */
  475. #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
  476. if (mp_reduce_is_2k_l(P) == MP_YES) {
  477. return s_mp_exptmod(G, X, P, Y, 1);
  478. }
  479. #endif
  480. #ifdef BN_MP_DR_IS_MODULUS_C
  481. /* is it a DR modulus? */
  482. dr = mp_dr_is_modulus(P);
  483. #else
  484. /* default to no */
  485. dr = 0;
  486. #endif
  487. #ifdef BN_MP_REDUCE_IS_2K_C
  488. /* if not, is it a unrestricted DR modulus? */
  489. if (dr == 0) {
  490. dr = mp_reduce_is_2k(P) << 1;
  491. }
  492. #endif
  493. /* if the modulus is odd or dr != 0 use the montgomery method */
  494. #ifdef BN_MP_EXPTMOD_FAST_C
  495. if (mp_isodd (P) == 1 || dr != 0) {
  496. return mp_exptmod_fast (G, X, P, Y, dr);
  497. } else {
  498. #endif
  499. #ifdef BN_S_MP_EXPTMOD_C
  500. /* otherwise use the generic Barrett reduction technique */
  501. return s_mp_exptmod (G, X, P, Y, 0);
  502. #else
  503. #error mp_exptmod could fail
  504. /* no exptmod for evens */
  505. return MP_VAL;
  506. #endif
  507. #ifdef BN_MP_EXPTMOD_FAST_C
  508. }
  509. #endif
  510. }
  511. /* compare two ints (signed)*/
  512. static int mp_cmp (mp_int * a, mp_int * b)
  513. {
  514. /* compare based on sign */
  515. if (a->sign != b->sign) {
  516. if (a->sign == MP_NEG) {
  517. return MP_LT;
  518. } else {
  519. return MP_GT;
  520. }
  521. }
  522. /* compare digits */
  523. if (a->sign == MP_NEG) {
  524. /* if negative compare opposite direction */
  525. return mp_cmp_mag(b, a);
  526. } else {
  527. return mp_cmp_mag(a, b);
  528. }
  529. }
  530. /* compare a digit */
  531. static int mp_cmp_d(mp_int * a, mp_digit b)
  532. {
  533. /* compare based on sign */
  534. if (a->sign == MP_NEG) {
  535. return MP_LT;
  536. }
  537. /* compare based on magnitude */
  538. if (a->used > 1) {
  539. return MP_GT;
  540. }
  541. /* compare the only digit of a to b */
  542. if (a->dp[0] > b) {
  543. return MP_GT;
  544. } else if (a->dp[0] < b) {
  545. return MP_LT;
  546. } else {
  547. return MP_EQ;
  548. }
  549. }
  550. /* hac 14.61, pp608 */
  551. static int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
  552. {
  553. /* b cannot be negative */
  554. if (b->sign == MP_NEG || mp_iszero(b) == 1) {
  555. return MP_VAL;
  556. }
  557. #ifdef BN_FAST_MP_INVMOD_C
  558. /* if the modulus is odd we can use a faster routine instead */
  559. if (mp_isodd (b) == 1) {
  560. return fast_mp_invmod (a, b, c);
  561. }
  562. #endif
  563. #ifdef BN_MP_INVMOD_SLOW_C
  564. return mp_invmod_slow(a, b, c);
  565. #endif
  566. #ifndef BN_FAST_MP_INVMOD_C
  567. #ifndef BN_MP_INVMOD_SLOW_C
  568. #error mp_invmod would always fail
  569. #endif
  570. #endif
  571. return MP_VAL;
  572. }
  573. /* get the size for an unsigned equivalent */
  574. static int mp_unsigned_bin_size (mp_int * a)
  575. {
  576. int size = mp_count_bits (a);
  577. return (size / 8 + ((size & 7) != 0 ? 1 : 0));
  578. }
  579. /* hac 14.61, pp608 */
  580. static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
  581. {
  582. mp_int x, y, u, v, A, B, C, D;
  583. int res;
  584. /* b cannot be negative */
  585. if (b->sign == MP_NEG || mp_iszero(b) == 1) {
  586. return MP_VAL;
  587. }
  588. /* init temps */
  589. if ((res = mp_init_multi(&x, &y, &u, &v,
  590. &A, &B, &C, &D, NULL)) != MP_OKAY) {
  591. return res;
  592. }
  593. /* x = a, y = b */
  594. if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
  595. goto LBL_ERR;
  596. }
  597. if ((res = mp_copy (b, &y)) != MP_OKAY) {
  598. goto LBL_ERR;
  599. }
  600. /* 2. [modified] if x,y are both even then return an error! */
  601. if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
  602. res = MP_VAL;
  603. goto LBL_ERR;
  604. }
  605. /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
  606. if ((res = mp_copy (&x, &u)) != MP_OKAY) {
  607. goto LBL_ERR;
  608. }
  609. if ((res = mp_copy (&y, &v)) != MP_OKAY) {
  610. goto LBL_ERR;
  611. }
  612. mp_set (&A, 1);
  613. mp_set (&D, 1);
  614. top:
  615. /* 4. while u is even do */
  616. while (mp_iseven (&u) == 1) {
  617. /* 4.1 u = u/2 */
  618. if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
  619. goto LBL_ERR;
  620. }
  621. /* 4.2 if A or B is odd then */
  622. if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
  623. /* A = (A+y)/2, B = (B-x)/2 */
  624. if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
  625. goto LBL_ERR;
  626. }
  627. if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
  628. goto LBL_ERR;
  629. }
  630. }
  631. /* A = A/2, B = B/2 */
  632. if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
  633. goto LBL_ERR;
  634. }
  635. if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
  636. goto LBL_ERR;
  637. }
  638. }
  639. /* 5. while v is even do */
  640. while (mp_iseven (&v) == 1) {
  641. /* 5.1 v = v/2 */
  642. if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
  643. goto LBL_ERR;
  644. }
  645. /* 5.2 if C or D is odd then */
  646. if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
  647. /* C = (C+y)/2, D = (D-x)/2 */
  648. if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
  649. goto LBL_ERR;
  650. }
  651. if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
  652. goto LBL_ERR;
  653. }
  654. }
  655. /* C = C/2, D = D/2 */
  656. if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
  657. goto LBL_ERR;
  658. }
  659. if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
  660. goto LBL_ERR;
  661. }
  662. }
  663. /* 6. if u >= v then */
  664. if (mp_cmp (&u, &v) != MP_LT) {
  665. /* u = u - v, A = A - C, B = B - D */
  666. if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
  667. goto LBL_ERR;
  668. }
  669. if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
  670. goto LBL_ERR;
  671. }
  672. if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
  673. goto LBL_ERR;
  674. }
  675. } else {
  676. /* v - v - u, C = C - A, D = D - B */
  677. if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
  678. goto LBL_ERR;
  679. }
  680. if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
  681. goto LBL_ERR;
  682. }
  683. if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
  684. goto LBL_ERR;
  685. }
  686. }
  687. /* if not zero goto step 4 */
  688. if (mp_iszero (&u) == 0)
  689. goto top;
  690. /* now a = C, b = D, gcd == g*v */
  691. /* if v != 1 then there is no inverse */
  692. if (mp_cmp_d (&v, 1) != MP_EQ) {
  693. res = MP_VAL;
  694. goto LBL_ERR;
  695. }
  696. /* if its too low */
  697. while (mp_cmp_d(&C, 0) == MP_LT) {
  698. if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
  699. goto LBL_ERR;
  700. }
  701. }
  702. /* too big */
  703. while (mp_cmp_mag(&C, b) != MP_LT) {
  704. if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
  705. goto LBL_ERR;
  706. }
  707. }
  708. /* C is now the inverse */
  709. mp_exch (&C, c);
  710. res = MP_OKAY;
  711. LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
  712. return res;
  713. }
  714. /* compare maginitude of two ints (unsigned) */
  715. static int mp_cmp_mag (mp_int * a, mp_int * b)
  716. {
  717. int n;
  718. mp_digit *tmpa, *tmpb;
  719. /* compare based on # of non-zero digits */
  720. if (a->used > b->used) {
  721. return MP_GT;
  722. }
  723. if (a->used < b->used) {
  724. return MP_LT;
  725. }
  726. /* alias for a */
  727. tmpa = a->dp + (a->used - 1);
  728. /* alias for b */
  729. tmpb = b->dp + (a->used - 1);
  730. /* compare based on digits */
  731. for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
  732. if (*tmpa > *tmpb) {
  733. return MP_GT;
  734. }
  735. if (*tmpa < *tmpb) {
  736. return MP_LT;
  737. }
  738. }
  739. return MP_EQ;
  740. }
  741. /* reads a unsigned char array, assumes the msb is stored first [big endian] */
  742. static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
  743. {
  744. int res;
  745. /* make sure there are at least two digits */
  746. if (a->alloc < 2) {
  747. if ((res = mp_grow(a, 2)) != MP_OKAY) {
  748. return res;
  749. }
  750. }
  751. /* zero the int */
  752. mp_zero (a);
  753. /* read the bytes in */
  754. while (c-- > 0) {
  755. if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
  756. return res;
  757. }
  758. #ifndef MP_8BIT
  759. a->dp[0] |= *b++;
  760. a->used += 1;
  761. #else
  762. a->dp[0] = (*b & MP_MASK);
  763. a->dp[1] |= ((*b++ >> 7U) & 1);
  764. a->used += 2;
  765. #endif
  766. }
  767. mp_clamp (a);
  768. return MP_OKAY;
  769. }
  770. /* store in unsigned [big endian] format */
  771. static int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
  772. {
  773. int x, res;
  774. mp_int t;
  775. if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
  776. return res;
  777. }
  778. x = 0;
  779. while (mp_iszero (&t) == 0) {
  780. #ifndef MP_8BIT
  781. b[x++] = (unsigned char) (t.dp[0] & 255);
  782. #else
  783. b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
  784. #endif
  785. if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
  786. mp_clear (&t);
  787. return res;
  788. }
  789. }
  790. bn_reverse (b, x);
  791. mp_clear (&t);
  792. return MP_OKAY;
  793. }
  794. /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
  795. static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
  796. {
  797. mp_digit D, r, rr;
  798. int x, res;
  799. mp_int t;
  800. /* if the shift count is <= 0 then we do no work */
  801. if (b <= 0) {
  802. res = mp_copy (a, c);
  803. if (d != NULL) {
  804. mp_zero (d);
  805. }
  806. return res;
  807. }
  808. if ((res = mp_init (&t)) != MP_OKAY) {
  809. return res;
  810. }
  811. /* get the remainder */
  812. if (d != NULL) {
  813. if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
  814. mp_clear (&t);
  815. return res;
  816. }
  817. }
  818. /* copy */
  819. if ((res = mp_copy (a, c)) != MP_OKAY) {
  820. mp_clear (&t);
  821. return res;
  822. }
  823. /* shift by as many digits in the bit count */
  824. if (b >= (int)DIGIT_BIT) {
  825. mp_rshd (c, b / DIGIT_BIT);
  826. }
  827. /* shift any bit count < DIGIT_BIT */
  828. D = (mp_digit) (b % DIGIT_BIT);
  829. if (D != 0) {
  830. register mp_digit *tmpc, mask, shift;
  831. /* mask */
  832. mask = (((mp_digit)1) << D) - 1;
  833. /* shift for lsb */
  834. shift = DIGIT_BIT - D;
  835. /* alias */
  836. tmpc = c->dp + (c->used - 1);
  837. /* carry */
  838. r = 0;
  839. for (x = c->used - 1; x >= 0; x--) {
  840. /* get the lower bits of this word in a temp */
  841. rr = *tmpc & mask;
  842. /* shift the current word and mix in the carry bits from the previous word */
  843. *tmpc = (*tmpc >> D) | (r << shift);
  844. --tmpc;
  845. /* set the carry to the carry bits of the current word found above */
  846. r = rr;
  847. }
  848. }
  849. mp_clamp (c);
  850. if (d != NULL) {
  851. mp_exch (&t, d);
  852. }
  853. mp_clear (&t);
  854. return MP_OKAY;
  855. }
  856. static int mp_init_copy (mp_int * a, mp_int * b)
  857. {
  858. int res;
  859. if ((res = mp_init (a)) != MP_OKAY) {
  860. return res;
  861. }
  862. return mp_copy (b, a);
  863. }
  864. /* set to zero */
  865. static void mp_zero (mp_int * a)
  866. {
  867. int n;
  868. mp_digit *tmp;
  869. a->sign = MP_ZPOS;
  870. a->used = 0;
  871. tmp = a->dp;
  872. for (n = 0; n < a->alloc; n++) {
  873. *tmp++ = 0;
  874. }
  875. }
  876. /* copy, b = a */
  877. static int mp_copy (mp_int * a, mp_int * b)
  878. {
  879. int res, n;
  880. /* if dst == src do nothing */
  881. if (a == b) {
  882. return MP_OKAY;
  883. }
  884. /* grow dest */
  885. if (b->alloc < a->used) {
  886. if ((res = mp_grow (b, a->used)) != MP_OKAY) {
  887. return res;
  888. }
  889. }
  890. /* zero b and copy the parameters over */
  891. {
  892. register mp_digit *tmpa, *tmpb;
  893. /* pointer aliases */
  894. /* source */
  895. tmpa = a->dp;
  896. /* destination */
  897. tmpb = b->dp;
  898. /* copy all the digits */
  899. for (n = 0; n < a->used; n++) {
  900. *tmpb++ = *tmpa++;
  901. }
  902. /* clear high digits */
  903. for (; n < b->used; n++) {
  904. *tmpb++ = 0;
  905. }
  906. }
  907. /* copy used count and sign */
  908. b->used = a->used;
  909. b->sign = a->sign;
  910. return MP_OKAY;
  911. }
  912. /* shift right a certain amount of digits */
  913. static void mp_rshd (mp_int * a, int b)
  914. {
  915. int x;
  916. /* if b <= 0 then ignore it */
  917. if (b <= 0) {
  918. return;
  919. }
  920. /* if b > used then simply zero it and return */
  921. if (a->used <= b) {
  922. mp_zero (a);
  923. return;
  924. }
  925. {
  926. register mp_digit *bottom, *top;
  927. /* shift the digits down */
  928. /* bottom */
  929. bottom = a->dp;
  930. /* top [offset into digits] */
  931. top = a->dp + b;
  932. /* this is implemented as a sliding window where
  933. * the window is b-digits long and digits from
  934. * the top of the window are copied to the bottom
  935. *
  936. * e.g.
  937. b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
  938. /\ | ---->
  939. \-------------------/ ---->
  940. */
  941. for (x = 0; x < (a->used - b); x++) {
  942. *bottom++ = *top++;
  943. }
  944. /* zero the top digits */
  945. for (; x < a->used; x++) {
  946. *bottom++ = 0;
  947. }
  948. }
  949. /* remove excess digits */
  950. a->used -= b;
  951. }
  952. /* swap the elements of two integers, for cases where you can't simply swap the
  953. * mp_int pointers around
  954. */
  955. static void mp_exch (mp_int * a, mp_int * b)
  956. {
  957. mp_int t;
  958. t = *a;
  959. *a = *b;
  960. *b = t;
  961. }
  962. /* trim unused digits
  963. *
  964. * This is used to ensure that leading zero digits are
  965. * trimed and the leading "used" digit will be non-zero
  966. * Typically very fast. Also fixes the sign if there
  967. * are no more leading digits
  968. */
  969. static void mp_clamp (mp_int * a)
  970. {
  971. /* decrease used while the most significant digit is
  972. * zero.
  973. */
  974. while (a->used > 0 && a->dp[a->used - 1] == 0) {
  975. --(a->used);
  976. }
  977. /* reset the sign flag if used == 0 */
  978. if (a->used == 0) {
  979. a->sign = MP_ZPOS;
  980. }
  981. }
  982. /* grow as required */
  983. static int mp_grow (mp_int * a, int size)
  984. {
  985. int i;
  986. mp_digit *tmp;
  987. /* if the alloc size is smaller alloc more ram */
  988. if (a->alloc < size) {
  989. /* ensure there are always at least MP_PREC digits extra on top */
  990. size += (MP_PREC * 2) - (size % MP_PREC);
  991. /* reallocate the array a->dp
  992. *
  993. * We store the return in a temporary variable
  994. * in case the operation failed we don't want
  995. * to overwrite the dp member of a.
  996. */
  997. tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
  998. if (tmp == NULL) {
  999. /* reallocation failed but "a" is still valid [can be freed] */
  1000. return MP_MEM;
  1001. }
  1002. /* reallocation succeeded so set a->dp */
  1003. a->dp = tmp;
  1004. /* zero excess digits */
  1005. i = a->alloc;
  1006. a->alloc = size;
  1007. for (; i < a->alloc; i++) {
  1008. a->dp[i] = 0;
  1009. }
  1010. }
  1011. return MP_OKAY;
  1012. }
  1013. /* b = |a|
  1014. *
  1015. * Simple function copies the input and fixes the sign to positive
  1016. */
  1017. static int mp_abs (mp_int * a, mp_int * b)
  1018. {
  1019. int res;
  1020. /* copy a to b */
  1021. if (a != b) {
  1022. if ((res = mp_copy (a, b)) != MP_OKAY) {
  1023. return res;
  1024. }
  1025. }
  1026. /* force the sign of b to positive */
  1027. b->sign = MP_ZPOS;
  1028. return MP_OKAY;
  1029. }
  1030. /* set to a digit */
  1031. static void mp_set (mp_int * a, mp_digit b)
  1032. {
  1033. mp_zero (a);
  1034. a->dp[0] = b & MP_MASK;
  1035. a->used = (a->dp[0] != 0) ? 1 : 0;
  1036. }
  1037. /* b = a/2 */
  1038. static int mp_div_2(mp_int * a, mp_int * b)
  1039. {
  1040. int x, res, oldused;
  1041. /* copy */
  1042. if (b->alloc < a->used) {
  1043. if ((res = mp_grow (b, a->used)) != MP_OKAY) {
  1044. return res;
  1045. }
  1046. }
  1047. oldused = b->used;
  1048. b->used = a->used;
  1049. {
  1050. register mp_digit r, rr, *tmpa, *tmpb;
  1051. /* source alias */
  1052. tmpa = a->dp + b->used - 1;
  1053. /* dest alias */
  1054. tmpb = b->dp + b->used - 1;
  1055. /* carry */
  1056. r = 0;
  1057. for (x = b->used - 1; x >= 0; x--) {
  1058. /* get the carry for the next iteration */
  1059. rr = *tmpa & 1;
  1060. /* shift the current digit, add in carry and store */
  1061. *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
  1062. /* forward carry to next iteration */
  1063. r = rr;
  1064. }
  1065. /* zero excess digits */
  1066. tmpb = b->dp + b->used;
  1067. for (x = b->used; x < oldused; x++) {
  1068. *tmpb++ = 0;
  1069. }
  1070. }
  1071. b->sign = a->sign;
  1072. mp_clamp (b);
  1073. return MP_OKAY;
  1074. }
  1075. /* shift left by a certain bit count */
  1076. static int mp_mul_2d (mp_int * a, int b, mp_int * c)
  1077. {
  1078. mp_digit d;
  1079. int res;
  1080. /* copy */
  1081. if (a != c) {
  1082. if ((res = mp_copy (a, c)) != MP_OKAY) {
  1083. return res;
  1084. }
  1085. }
  1086. if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
  1087. if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
  1088. return res;
  1089. }
  1090. }
  1091. /* shift by as many digits in the bit count */
  1092. if (b >= (int)DIGIT_BIT) {
  1093. if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
  1094. return res;
  1095. }
  1096. }
  1097. /* shift any bit count < DIGIT_BIT */
  1098. d = (mp_digit) (b % DIGIT_BIT);
  1099. if (d != 0) {
  1100. register mp_digit *tmpc, shift, mask, r, rr;
  1101. register int x;
  1102. /* bitmask for carries */
  1103. mask = (((mp_digit)1) << d) - 1;
  1104. /* shift for msbs */
  1105. shift = DIGIT_BIT - d;
  1106. /* alias */
  1107. tmpc = c->dp;
  1108. /* carry */
  1109. r = 0;
  1110. for (x = 0; x < c->used; x++) {
  1111. /* get the higher bits of the current word */
  1112. rr = (*tmpc >> shift) & mask;
  1113. /* shift the current word and OR in the carry */
  1114. *tmpc = ((*tmpc << d) | r) & MP_MASK;
  1115. ++tmpc;
  1116. /* set the carry to the carry bits of the current word */
  1117. r = rr;
  1118. }
  1119. /* set final carry */
  1120. if (r != 0) {
  1121. c->dp[(c->used)++] = r;
  1122. }
  1123. }
  1124. mp_clamp (c);
  1125. return MP_OKAY;
  1126. }
  1127. static int mp_init_multi(mp_int *mp, ...)
  1128. {
  1129. mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
  1130. int n = 0; /* Number of ok inits */
  1131. mp_int* cur_arg = mp;
  1132. va_list args;
  1133. va_start(args, mp); /* init args to next argument from caller */
  1134. while (cur_arg != NULL) {
  1135. if (mp_init(cur_arg) != MP_OKAY) {
  1136. /* Oops - error! Back-track and mp_clear what we already
  1137. succeeded in init-ing, then return error.
  1138. */
  1139. va_list clean_args;
  1140. /* end the current list */
  1141. va_end(args);
  1142. /* now start cleaning up */
  1143. cur_arg = mp;
  1144. va_start(clean_args, mp);
  1145. while (n--) {
  1146. mp_clear(cur_arg);
  1147. cur_arg = va_arg(clean_args, mp_int*);
  1148. }
  1149. va_end(clean_args);
  1150. res = MP_MEM;
  1151. break;
  1152. }
  1153. n++;
  1154. cur_arg = va_arg(args, mp_int*);
  1155. }
  1156. va_end(args);
  1157. return res; /* Assumed ok, if error flagged above. */
  1158. }
  1159. static void mp_clear_multi(mp_int *mp, ...)
  1160. {
  1161. mp_int* next_mp = mp;
  1162. va_list args;
  1163. va_start(args, mp);
  1164. while (next_mp != NULL) {
  1165. mp_clear(next_mp);
  1166. next_mp = va_arg(args, mp_int*);
  1167. }
  1168. va_end(args);
  1169. }
  1170. /* shift left a certain amount of digits */
  1171. static int mp_lshd (mp_int * a, int b)
  1172. {
  1173. int x, res;
  1174. /* if its less than zero return */
  1175. if (b <= 0) {
  1176. return MP_OKAY;
  1177. }
  1178. /* grow to fit the new digits */
  1179. if (a->alloc < a->used + b) {
  1180. if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
  1181. return res;
  1182. }
  1183. }
  1184. {
  1185. register mp_digit *top, *bottom;
  1186. /* increment the used by the shift amount then copy upwards */
  1187. a->used += b;
  1188. /* top */
  1189. top = a->dp + a->used - 1;
  1190. /* base */
  1191. bottom = a->dp + a->used - 1 - b;
  1192. /* much like mp_rshd this is implemented using a sliding window
  1193. * except the window goes the otherway around. Copying from
  1194. * the bottom to the top. see bn_mp_rshd.c for more info.
  1195. */
  1196. for (x = a->used - 1; x >= b; x--) {
  1197. *top-- = *bottom--;
  1198. }
  1199. /* zero the lower digits */
  1200. top = a->dp;
  1201. for (x = 0; x < b; x++) {
  1202. *top++ = 0;
  1203. }
  1204. }
  1205. return MP_OKAY;
  1206. }
  1207. /* returns the number of bits in an int */
  1208. static int mp_count_bits (mp_int * a)
  1209. {
  1210. int r;
  1211. mp_digit q;
  1212. /* shortcut */
  1213. if (a->used == 0) {
  1214. return 0;
  1215. }
  1216. /* get number of digits and add that */
  1217. r = (a->used - 1) * DIGIT_BIT;
  1218. /* take the last digit and count the bits in it */
  1219. q = a->dp[a->used - 1];
  1220. while (q > ((mp_digit) 0)) {
  1221. ++r;
  1222. q >>= ((mp_digit) 1);
  1223. }
  1224. return r;
  1225. }
  1226. /* calc a value mod 2**b */
  1227. static int mp_mod_2d (mp_int * a, int b, mp_int * c)
  1228. {
  1229. int x, res;
  1230. /* if b is <= 0 then zero the int */
  1231. if (b <= 0) {
  1232. mp_zero (c);
  1233. return MP_OKAY;
  1234. }
  1235. /* if the modulus is larger than the value than return */
  1236. if (b >= (int) (a->used * DIGIT_BIT)) {
  1237. res = mp_copy (a, c);
  1238. return res;
  1239. }
  1240. /* copy */
  1241. if ((res = mp_copy (a, c)) != MP_OKAY) {
  1242. return res;
  1243. }
  1244. /* zero digits above the last digit of the modulus */
  1245. for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
  1246. c->dp[x] = 0;
  1247. }
  1248. /* clear the digit that is not completely outside/inside the modulus */
  1249. c->dp[b / DIGIT_BIT] &=
  1250. (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
  1251. mp_clamp (c);
  1252. return MP_OKAY;
  1253. }
  1254. /* slower bit-bang division... also smaller */
  1255. static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
  1256. {
  1257. mp_int ta, tb, tq, q;
  1258. int res, n, n2;
  1259. /* is divisor zero ? */
  1260. if (mp_iszero (b) == 1) {
  1261. return MP_VAL;
  1262. }
  1263. /* if a < b then q=0, r = a */
  1264. if (mp_cmp_mag (a, b) == MP_LT) {
  1265. if (d != NULL) {
  1266. res = mp_copy (a, d);
  1267. } else {
  1268. res = MP_OKAY;
  1269. }
  1270. if (c != NULL) {
  1271. mp_zero (c);
  1272. }
  1273. return res;
  1274. }
  1275. /* init our temps */
  1276. if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) {
  1277. return res;
  1278. }
  1279. mp_set(&tq, 1);
  1280. n = mp_count_bits(a) - mp_count_bits(b);
  1281. if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
  1282. ((res = mp_abs(b, &tb)) != MP_OKAY) ||
  1283. ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
  1284. ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
  1285. goto LBL_ERR;
  1286. }
  1287. while (n-- >= 0) {
  1288. if (mp_cmp(&tb, &ta) != MP_GT) {
  1289. if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
  1290. ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
  1291. goto LBL_ERR;
  1292. }
  1293. }
  1294. if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
  1295. ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
  1296. goto LBL_ERR;
  1297. }
  1298. }
  1299. /* now q == quotient and ta == remainder */
  1300. n = a->sign;
  1301. n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
  1302. if (c != NULL) {
  1303. mp_exch(c, &q);
  1304. c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
  1305. }
  1306. if (d != NULL) {
  1307. mp_exch(d, &ta);
  1308. d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
  1309. }
  1310. LBL_ERR:
  1311. mp_clear_multi(&ta, &tb, &tq, &q, NULL);
  1312. return res;
  1313. }
  1314. #ifdef MP_LOW_MEM
  1315. #define TAB_SIZE 32
  1316. #else
  1317. #define TAB_SIZE 256
  1318. #endif
  1319. static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
  1320. {
  1321. mp_int M[TAB_SIZE], res, mu;
  1322. mp_digit buf;
  1323. int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
  1324. int (*redux)(mp_int*,mp_int*,mp_int*);
  1325. /* find window size */
  1326. x = mp_count_bits (X);
  1327. if (x <= 7) {
  1328. winsize = 2;
  1329. } else if (x <= 36) {
  1330. winsize = 3;
  1331. } else if (x <= 140) {
  1332. winsize = 4;
  1333. } else if (x <= 450) {
  1334. winsize = 5;
  1335. } else if (x <= 1303) {
  1336. winsize = 6;
  1337. } else if (x <= 3529) {
  1338. winsize = 7;
  1339. } else {
  1340. winsize = 8;
  1341. }
  1342. #ifdef MP_LOW_MEM
  1343. if (winsize > 5) {
  1344. winsize = 5;
  1345. }
  1346. #endif
  1347. /* init M array */
  1348. /* init first cell */
  1349. if ((err = mp_init(&M[1])) != MP_OKAY) {
  1350. return err;
  1351. }
  1352. /* now init the second half of the array */
  1353. for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
  1354. if ((err = mp_init(&M[x])) != MP_OKAY) {
  1355. for (y = 1<<(winsize-1); y < x; y++) {
  1356. mp_clear (&M[y]);
  1357. }
  1358. mp_clear(&M[1]);
  1359. return err;
  1360. }
  1361. }
  1362. /* create mu, used for Barrett reduction */
  1363. if ((err = mp_init (&mu)) != MP_OKAY) {
  1364. goto LBL_M;
  1365. }
  1366. if (redmode == 0) {
  1367. if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
  1368. goto LBL_MU;
  1369. }
  1370. redux = mp_reduce;
  1371. } else {
  1372. if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
  1373. goto LBL_MU;
  1374. }
  1375. redux = mp_reduce_2k_l;
  1376. }
  1377. /* create M table
  1378. *
  1379. * The M table contains powers of the base,
  1380. * e.g. M[x] = G**x mod P
  1381. *
  1382. * The first half of the table is not
  1383. * computed though accept for M[0] and M[1]
  1384. */
  1385. if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
  1386. goto LBL_MU;
  1387. }
  1388. /* compute the value at M[1<<(winsize-1)] by squaring
  1389. * M[1] (winsize-1) times
  1390. */
  1391. if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
  1392. goto LBL_MU;
  1393. }
  1394. for (x = 0; x < (winsize - 1); x++) {
  1395. /* square it */
  1396. if ((err = mp_sqr (&M[1 << (winsize - 1)],
  1397. &M[1 << (winsize - 1)])) != MP_OKAY) {
  1398. goto LBL_MU;
  1399. }
  1400. /* reduce modulo P */
  1401. if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
  1402. goto LBL_MU;
  1403. }
  1404. }
  1405. /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
  1406. * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
  1407. */
  1408. for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
  1409. if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
  1410. goto LBL_MU;
  1411. }
  1412. if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
  1413. goto LBL_MU;
  1414. }
  1415. }
  1416. /* setup result */
  1417. if ((err = mp_init (&res)) != MP_OKAY) {
  1418. goto LBL_MU;
  1419. }
  1420. mp_set (&res, 1);
  1421. /* set initial mode and bit cnt */
  1422. mode = 0;
  1423. bitcnt = 1;
  1424. buf = 0;
  1425. digidx = X->used - 1;
  1426. bitcpy = 0;
  1427. bitbuf = 0;
  1428. for (;;) {
  1429. /* grab next digit as required */
  1430. if (--bitcnt == 0) {
  1431. /* if digidx == -1 we are out of digits */
  1432. if (digidx == -1) {
  1433. break;
  1434. }
  1435. /* read next digit and reset the bitcnt */
  1436. buf = X->dp[digidx--];
  1437. bitcnt = (int) DIGIT_BIT;
  1438. }
  1439. /* grab the next msb from the exponent */
  1440. y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
  1441. buf <<= (mp_digit)1;
  1442. /* if the bit is zero and mode == 0 then we ignore it
  1443. * These represent the leading zero bits before the first 1 bit
  1444. * in the exponent. Technically this opt is not required but it
  1445. * does lower the # of trivial squaring/reductions used
  1446. */
  1447. if (mode == 0 && y == 0) {
  1448. continue;
  1449. }
  1450. /* if the bit is zero and mode == 1 then we square */
  1451. if (mode == 1 && y == 0) {
  1452. if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
  1453. goto LBL_RES;
  1454. }
  1455. if ((err = redux (&res, P, &mu)) != MP_OKAY) {
  1456. goto LBL_RES;
  1457. }
  1458. continue;
  1459. }
  1460. /* else we add it to the window */
  1461. bitbuf |= (y << (winsize - ++bitcpy));
  1462. mode = 2;
  1463. if (bitcpy == winsize) {
  1464. /* ok window is filled so square as required and multiply */
  1465. /* square first */
  1466. for (x = 0; x < winsize; x++) {
  1467. if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
  1468. goto LBL_RES;
  1469. }
  1470. if ((err = redux (&res, P, &mu)) != MP_OKAY) {
  1471. goto LBL_RES;
  1472. }
  1473. }
  1474. /* then multiply */
  1475. if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
  1476. goto LBL_RES;
  1477. }
  1478. if ((err = redux (&res, P, &mu)) != MP_OKAY) {
  1479. goto LBL_RES;
  1480. }
  1481. /* empty window and reset */
  1482. bitcpy = 0;
  1483. bitbuf = 0;
  1484. mode = 1;
  1485. }
  1486. }
  1487. /* if bits remain then square/multiply */
  1488. if (mode == 2 && bitcpy > 0) {
  1489. /* square then multiply if the bit is set */
  1490. for (x = 0; x < bitcpy; x++) {
  1491. if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
  1492. goto LBL_RES;
  1493. }
  1494. if ((err = redux (&res, P, &mu)) != MP_OKAY) {
  1495. goto LBL_RES;
  1496. }
  1497. bitbuf <<= 1;
  1498. if ((bitbuf & (1 << winsize)) != 0) {
  1499. /* then multiply */
  1500. if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
  1501. goto LBL_RES;
  1502. }
  1503. if ((err = redux (&res, P, &mu)) != MP_OKAY) {
  1504. goto LBL_RES;
  1505. }
  1506. }
  1507. }
  1508. }
  1509. mp_exch (&res, Y);
  1510. err = MP_OKAY;
  1511. LBL_RES:mp_clear (&res);
  1512. LBL_MU:mp_clear (&mu);
  1513. LBL_M:
  1514. mp_clear(&M[1]);
  1515. for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
  1516. mp_clear (&M[x]);
  1517. }
  1518. return err;
  1519. }
  1520. /* computes b = a*a */
  1521. static int mp_sqr (mp_int * a, mp_int * b)
  1522. {
  1523. int res;
  1524. #ifdef BN_MP_TOOM_SQR_C
  1525. /* use Toom-Cook? */
  1526. if (a->used >= TOOM_SQR_CUTOFF) {
  1527. res = mp_toom_sqr(a, b);
  1528. /* Karatsuba? */
  1529. } else
  1530. #endif
  1531. #ifdef BN_MP_KARATSUBA_SQR_C
  1532. if (a->used >= KARATSUBA_SQR_CUTOFF) {
  1533. res = mp_karatsuba_sqr (a, b);
  1534. } else
  1535. #endif
  1536. {
  1537. #ifdef BN_FAST_S_MP_SQR_C
  1538. /* can we use the fast comba multiplier? */
  1539. if ((a->used * 2 + 1) < MP_WARRAY &&
  1540. a->used <
  1541. (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
  1542. res = fast_s_mp_sqr (a, b);
  1543. } else
  1544. #endif
  1545. #ifdef BN_S_MP_SQR_C
  1546. res = s_mp_sqr (a, b);
  1547. #else
  1548. #error mp_sqr could fail
  1549. res = MP_VAL;
  1550. #endif
  1551. }
  1552. b->sign = MP_ZPOS;
  1553. return res;
  1554. }
  1555. /* reduces a modulo n where n is of the form 2**p - d
  1556. This differs from reduce_2k since "d" can be larger
  1557. than a single digit.
  1558. */
  1559. static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
  1560. {
  1561. mp_int q;
  1562. int p, res;
  1563. if ((res = mp_init(&q)) != MP_OKAY) {
  1564. return res;
  1565. }
  1566. p = mp_count_bits(n);
  1567. top:
  1568. /* q = a/2**p, a = a mod 2**p */
  1569. if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
  1570. goto ERR;
  1571. }
  1572. /* q = q * d */
  1573. if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
  1574. goto ERR;
  1575. }
  1576. /* a = a + q */
  1577. if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
  1578. goto ERR;
  1579. }
  1580. if (mp_cmp_mag(a, n) != MP_LT) {
  1581. s_mp_sub(a, n, a);
  1582. goto top;
  1583. }
  1584. ERR:
  1585. mp_clear(&q);
  1586. return res;
  1587. }
  1588. /* determines the setup value */
  1589. static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
  1590. {
  1591. int res;
  1592. mp_int tmp;
  1593. if ((res = mp_init(&tmp)) != MP_OKAY) {
  1594. return res;
  1595. }
  1596. if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
  1597. goto ERR;
  1598. }
  1599. if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
  1600. goto ERR;
  1601. }
  1602. ERR:
  1603. mp_clear(&tmp);
  1604. return res;
  1605. }
  1606. /* computes a = 2**b
  1607. *
  1608. * Simple algorithm which zeroes the int, grows it then just sets one bit
  1609. * as required.
  1610. */
  1611. static int mp_2expt (mp_int * a, int b)
  1612. {
  1613. int res;
  1614. /* zero a as per default */
  1615. mp_zero (a);
  1616. /* grow a to accomodate the single bit */
  1617. if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
  1618. return res;
  1619. }
  1620. /* set the used count of where the bit will go */
  1621. a->used = b / DIGIT_BIT + 1;
  1622. /* put the single bit in its place */
  1623. a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
  1624. return MP_OKAY;
  1625. }
  1626. /* pre-calculate the value required for Barrett reduction
  1627. * For a given modulus "b" it calulates the value required in "a"
  1628. */
  1629. static int mp_reduce_setup (mp_int * a, mp_int * b)
  1630. {
  1631. int res;
  1632. if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
  1633. return res;
  1634. }
  1635. return mp_div (a, b, a, NULL);
  1636. }
  1637. /* reduces x mod m, assumes 0 < x < m**2, mu is
  1638. * precomputed via mp_reduce_setup.
  1639. * From HAC pp.604 Algorithm 14.42
  1640. */
  1641. static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
  1642. {
  1643. mp_int q;
  1644. int res, um = m->used;
  1645. /* q = x */
  1646. if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
  1647. return res;
  1648. }
  1649. /* q1 = x / b**(k-1) */
  1650. mp_rshd (&q, um - 1);
  1651. /* according to HAC this optimization is ok */
  1652. if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
  1653. if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
  1654. goto CLEANUP;
  1655. }
  1656. } else {
  1657. #ifdef BN_S_MP_MUL_HIGH_DIGS_C
  1658. if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
  1659. goto CLEANUP;
  1660. }
  1661. #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
  1662. if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
  1663. goto CLEANUP;
  1664. }
  1665. #else
  1666. {
  1667. #error mp_reduce would always fail
  1668. res = MP_VAL;
  1669. goto CLEANUP;
  1670. }
  1671. #endif
  1672. }
  1673. /* q3 = q2 / b**(k+1) */
  1674. mp_rshd (&q, um + 1);
  1675. /* x = x mod b**(k+1), quick (no division) */
  1676. if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
  1677. goto CLEANUP;
  1678. }
  1679. /* q = q * m mod b**(k+1), quick (no division) */
  1680. if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
  1681. goto CLEANUP;
  1682. }
  1683. /* x = x - q */
  1684. if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
  1685. goto CLEANUP;
  1686. }
  1687. /* If x < 0, add b**(k+1) to it */
  1688. if (mp_cmp_d (x, 0) == MP_LT) {
  1689. mp_set (&q, 1);
  1690. if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) {
  1691. goto CLEANUP;
  1692. }
  1693. if ((res = mp_add (x, &q, x)) != MP_OKAY) {
  1694. goto CLEANUP;
  1695. }
  1696. }
  1697. /* Back off if it's too big */
  1698. while (mp_cmp (x, m) != MP_LT) {
  1699. if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
  1700. goto CLEANUP;
  1701. }
  1702. }
  1703. CLEANUP:
  1704. mp_clear (&q);
  1705. return res;
  1706. }
  1707. /* multiplies |a| * |b| and only computes upto digs digits of result
  1708. * HAC pp. 595, Algorithm 14.12 Modified so you can control how
  1709. * many digits of output are created.
  1710. */
  1711. static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
  1712. {
  1713. mp_int t;
  1714. int res, pa, pb, ix, iy;
  1715. mp_digit u;
  1716. mp_word r;
  1717. mp_digit tmpx, *tmpt, *tmpy;
  1718. /* can we use the fast multiplier? */
  1719. if (((digs) < MP_WARRAY) &&
  1720. MIN (a->used, b->used) <
  1721. (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
  1722. return fast_s_mp_mul_digs (a, b, c, digs);
  1723. }
  1724. if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
  1725. return res;
  1726. }
  1727. t.used = digs;
  1728. /* compute the digits of the product directly */
  1729. pa = a->used;
  1730. for (ix = 0; ix < pa; ix++) {
  1731. /* set the carry to zero */
  1732. u = 0;
  1733. /* limit ourselves to making digs digits of output */
  1734. pb = MIN (b->used, digs - ix);
  1735. /* setup some aliases */
  1736. /* copy of the digit from a used within the nested loop */
  1737. tmpx = a->dp[ix];
  1738. /* an alias for the destination shifted ix places */
  1739. tmpt = t.dp + ix;
  1740. /* an alias for the digits of b */
  1741. tmpy = b->dp;
  1742. /* compute the columns of the output and propagate the carry */
  1743. for (iy = 0; iy < pb; iy++) {
  1744. /* compute the column as a mp_word */
  1745. r = ((mp_word)*tmpt) +
  1746. ((mp_word)tmpx) * ((mp_word)*tmpy++) +
  1747. ((mp_word) u);
  1748. /* the new column is the lower part of the result */
  1749. *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
  1750. /* get the carry word from the result */
  1751. u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
  1752. }
  1753. /* set carry if it is placed below digs */
  1754. if (ix + iy < digs) {
  1755. *tmpt = u;
  1756. }
  1757. }
  1758. mp_clamp (&t);
  1759. mp_exch (&t, c);
  1760. mp_clear (&t);
  1761. return MP_OKAY;
  1762. }
  1763. /* Fast (comba) multiplier
  1764. *
  1765. * This is the fast column-array [comba] multiplier. It is
  1766. * designed to compute the columns of the product first
  1767. * then handle the carries afterwards. This has the effect
  1768. * of making the nested loops that compute the columns very
  1769. * simple and schedulable on super-scalar processors.
  1770. *
  1771. * This has been modified to produce a variable number of
  1772. * digits of output so if say only a half-product is required
  1773. * you don't have to compute the upper half (a feature
  1774. * required for fast Barrett reduction).
  1775. *
  1776. * Based on Algorithm 14.12 on pp.595 of HAC.
  1777. *
  1778. */
  1779. static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
  1780. {
  1781. int olduse, res, pa, ix, iz;
  1782. mp_digit W[MP_WARRAY];
  1783. register mp_word _W;
  1784. /* grow the destination as required */
  1785. if (c->alloc < digs) {
  1786. if ((res = mp_grow (c, digs)) != MP_OKAY) {
  1787. return res;
  1788. }
  1789. }
  1790. /* number of output digits to produce */
  1791. pa = MIN(digs, a->used + b->used);
  1792. /* clear the carry */
  1793. _W = 0;
  1794. for (ix = 0; ix < pa; ix++) {
  1795. int tx, ty;
  1796. int iy;
  1797. mp_digit *tmpx, *tmpy;
  1798. /* get offsets into the two bignums */
  1799. ty = MIN(b->used-1, ix);
  1800. tx = ix - ty;
  1801. /* setup temp aliases */
  1802. tmpx = a->dp + tx;
  1803. tmpy = b->dp + ty;
  1804. /* this is the number of times the loop will iterrate, essentially
  1805. while (tx++ < a->used && ty-- >= 0) { ... }
  1806. */
  1807. iy = MIN(a->used-tx, ty+1);
  1808. /* execute loop */
  1809. for (iz = 0; iz < iy; ++iz) {
  1810. _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
  1811. }
  1812. /* store term */
  1813. W[ix] = ((mp_digit)_W) & MP_MASK;
  1814. /* make next carry */
  1815. _W = _W >> ((mp_word)DIGIT_BIT);
  1816. }
  1817. /* setup dest */
  1818. olduse = c->used;
  1819. c->used = pa;
  1820. {
  1821. register mp_digit *tmpc;
  1822. tmpc = c->dp;
  1823. for (ix = 0; ix < pa+1; ix++) {
  1824. /* now extract the previous digit [below the carry] */
  1825. *tmpc++ = W[ix];
  1826. }
  1827. /* clear unused digits [that existed in the old copy of c] */
  1828. for (; ix < olduse; ix++) {
  1829. *tmpc++ = 0;
  1830. }
  1831. }
  1832. mp_clamp (c);
  1833. return MP_OKAY;
  1834. }
  1835. /* init an mp_init for a given size */
  1836. static int mp_init_size (mp_int * a, int size)
  1837. {
  1838. int x;
  1839. /* pad size so there are always extra digits */
  1840. size += (MP_PREC * 2) - (size % MP_PREC);
  1841. /* alloc mem */
  1842. a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
  1843. if (a->dp == NULL) {
  1844. return MP_MEM;
  1845. }
  1846. /* set the members */
  1847. a->used = 0;
  1848. a->alloc = size;
  1849. a->sign = MP_ZPOS;
  1850. /* zero the digits */
  1851. for (x = 0; x < size; x++) {
  1852. a->dp[x] = 0;
  1853. }
  1854. return MP_OKAY;
  1855. }
  1856. /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
  1857. static int s_mp_sqr (mp_int * a, mp_int * b)
  1858. {
  1859. mp_int t;
  1860. int res, ix, iy, pa;
  1861. mp_word r;
  1862. mp_digit u, tmpx, *tmpt;
  1863. pa = a->used;
  1864. if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
  1865. return res;
  1866. }
  1867. /* default used is maximum possible size */
  1868. t.used = 2*pa + 1;
  1869. for (ix = 0; ix < pa; ix++) {
  1870. /* first calculate the digit at 2*ix */
  1871. /* calculate double precision result */
  1872. r = ((mp_word) t.dp[2*ix]) +
  1873. ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
  1874. /* store lower part in result */
  1875. t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
  1876. /* get the carry */
  1877. u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
  1878. /* left hand side of A[ix] * A[iy] */
  1879. tmpx = a->dp[ix];
  1880. /* alias for where to store the results */
  1881. tmpt = t.dp + (2*ix + 1);
  1882. for (iy = ix + 1; iy < pa; iy++) {
  1883. /* first calculate the product */
  1884. r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
  1885. /* now calculate the double precision result, note we use
  1886. * addition instead of *2 since it's easier to optimize
  1887. */
  1888. r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
  1889. /* store lower part */
  1890. *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
  1891. /* get carry */
  1892. u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
  1893. }
  1894. /* propagate upwards */
  1895. while (u != ((mp_digit) 0)) {
  1896. r = ((mp_word) *tmpt) + ((mp_word) u);
  1897. *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
  1898. u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
  1899. }
  1900. }
  1901. mp_clamp (&t);
  1902. mp_exch (&t, b);
  1903. mp_clear (&t);
  1904. return MP_OKAY;
  1905. }
  1906. /* multiplies |a| * |b| and does not compute the lower digs digits
  1907. * [meant to get the higher part of the product]
  1908. */
  1909. static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
  1910. {
  1911. mp_int t;
  1912. int res, pa, pb, ix, iy;
  1913. mp_digit u;
  1914. mp_word r;
  1915. mp_digit tmpx, *tmpt, *tmpy;
  1916. /* can we use the fast multiplier? */
  1917. #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
  1918. if (((a->used + b->used + 1) < MP_WARRAY)
  1919. && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
  1920. return fast_s_mp_mul_high_digs (a, b, c, digs);
  1921. }
  1922. #endif
  1923. if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
  1924. return res;
  1925. }
  1926. t.used = a->used + b->used + 1;
  1927. pa = a->used;
  1928. pb = b->used;
  1929. for (ix = 0; ix < pa; ix++) {
  1930. /* clear the carry */
  1931. u = 0;
  1932. /* left hand side of A[ix] * B[iy] */
  1933. tmpx = a->dp[ix];
  1934. /* alias to the address of where the digits will be stored */
  1935. tmpt = &(t.dp[digs]);
  1936. /* alias for where to read the right hand side from */
  1937. tmpy = b->dp + (digs - ix);
  1938. for (iy = digs - ix; iy < pb; iy++) {
  1939. /* calculate the double precision result */
  1940. r = ((mp_word)*tmpt) +
  1941. ((mp_word)tmpx) * ((mp_word)*tmpy++) +
  1942. ((mp_word) u);
  1943. /* get the lower part */
  1944. *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
  1945. /* carry the carry */
  1946. u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
  1947. }
  1948. *tmpt = u;
  1949. }
  1950. mp_clamp (&t);
  1951. mp_exch (&t, c);
  1952. mp_clear (&t);
  1953. return MP_OKAY;
  1954. }