From 319c7264b0f5b6851f1582f8af9619145cd2f0b2 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 24 Oct 2011 13:24:28 +0000 Subject: [PATCH 001/120] typo --- crypto/armcap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/armcap.c b/crypto/armcap.c index 8dbd741087..5258d2fbdd 100644 --- a/crypto/armcap.c +++ b/crypto/armcap.c @@ -30,7 +30,7 @@ unsigned int OPENSSL_rdtsc(void) } #if defined(__GNUC__) && __GNUC__>=2 -void OPENSSL_cpuid_setup(void) __attribute__((constructor)) +void OPENSSL_cpuid_setup(void) __attribute__((constructor)); #endif void OPENSSL_cpuid_setup(void) { From 51035e733c1f62140a4dd282d981d5884ee00e0a Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 24 Oct 2011 16:53:59 +0000 Subject: [PATCH 002/120] prepare for RC1 --- README.FIPS | 2 +- fips/fips_locl.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.FIPS b/README.FIPS index c41bab9930..cbd598c943 100644 --- a/README.FIPS +++ b/README.FIPS @@ -1,4 +1,4 @@ -Preliminary status and build information for FIPS module v2.0 +Preliminary status and build information for FIPS module v2.0 rc1 NB: if you are cross compiling you now need to use the latest "incore" script this can be found at util/incore in the tarballs. diff --git a/fips/fips_locl.h b/fips/fips_locl.h index df3863f91e..5af6c1f367 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -67,8 +67,8 @@ int fips_post_failed(int id, int subid, void *ex); int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); -#define FIPS_MODULE_VERSION_NUMBER 0x20000000L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-dev unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_NUMBER 0x20000001L +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc1 unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 45e5f551acf65e1b2c36161e356637045cc5f793 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 24 Oct 2011 16:58:49 +0000 Subject: [PATCH 003/120] Prepare for RC2. --- README.FIPS | 2 +- fips/fips_locl.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.FIPS b/README.FIPS index cbd598c943..87253f6bfb 100644 --- a/README.FIPS +++ b/README.FIPS @@ -1,4 +1,4 @@ -Preliminary status and build information for FIPS module v2.0 rc1 +Preliminary status and build information for FIPS module v2.0 NB: if you are cross compiling you now need to use the latest "incore" script this can be found at util/incore in the tarballs. diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 5af6c1f367..24743be6ab 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -67,8 +67,8 @@ int fips_post_failed(int id, int subid, void *ex); int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); -#define FIPS_MODULE_VERSION_NUMBER 0x20000001L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc1 unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_NUMBER 0x20000002L +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc2-dev unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 9ab6d6813ec544a15f6aea3d4af83629fa1eb4e5 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 26 Oct 2011 16:46:20 +0000 Subject: [PATCH 004/120] PR: 2632 Submitted by: emmanuel.azencot@bull.net Reviewed by: steve Return -1 immediately if not affine coordinates as BN_CTX has not been set up. --- crypto/ec/ec2_smpl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/ec/ec2_smpl.c b/crypto/ec/ec2_smpl.c index f37347b5e1..9a9476f0c1 100644 --- a/crypto/ec/ec2_smpl.c +++ b/crypto/ec/ec2_smpl.c @@ -556,7 +556,7 @@ int ec_GF2m_simple_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_ field_sqr = group->meth->field_sqr; /* only support affine coordinates */ - if (!point->Z_is_one) goto err; + if (!point->Z_is_one) return -1; if (ctx == NULL) { From 8b8096d082292211cc977af195e9b12c88c26de3 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 1 Nov 2011 13:45:30 +0000 Subject: [PATCH 005/120] Add support for multicall fips_algvs utility combining functionality of all fips test utilities in a single binary and some minimal script parsing for platforms lacking a suitable shell. In order to keep changes to the build system to a minimum it #includes all the utilities C source files (yuck). --- CHANGES | 4 + Makefile.fips | 2 + fips/aes/fips_aesavs.c | 9 +- fips/aes/fips_gcmtest.c | 6 +- fips/cmac/fips_cmactest.c | 4 + fips/des/fips_desmovs.c | 52 ++++--- fips/dh/fips_dhvs.c | 8 +- fips/dsa/fips_dssvs.c | 6 +- fips/ecdh/fips_ecdhvs.c | 10 +- fips/ecdsa/fips_ecdsavs.c | 15 +- fips/fips_test_suite.c | 6 +- fips/fips_utl.h | 5 + fips/hmac/fips_hmactest.c | 4 + fips/rand/fips_drbgvs.c | 12 +- fips/rand/fips_rngvs.c | 6 +- fips/rsa/fips_rsagtest.c | 4 + fips/rsa/fips_rsastest.c | 4 + fips/rsa/fips_rsavtest.c | 10 +- fips/sha/fips_shatest.c | 4 + test/Makefile | 10 +- test/fips_algvs.c | 312 ++++++++++++++++++++++++++++++++++++++ 21 files changed, 442 insertions(+), 51 deletions(-) create mode 100644 test/fips_algvs.c diff --git a/CHANGES b/CHANGES index 0d70e034da..4159394ef9 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,10 @@ Changes between 1.0.1 and 1.1.0 [xx XXX xxxx] + *) Add fips_algvs: a multicall fips utility incorporaing all the algorithm + test programs and fips_test_suite. Includes functionality to parse + the minimal script output of fipsalgest.pl directly. + *) Add authorisation parameter to FIPS_module_mode_set(). [Steve Henson] diff --git a/Makefile.fips b/Makefile.fips index 703c9f9228..36e9a7d65b 100644 --- a/Makefile.fips +++ b/Makefile.fips @@ -387,6 +387,8 @@ build_apps: @dir=apps; target=all; $(BUILD_ONE_CMD) build_tests: @dir=test; target=fipsexe; $(BUILD_ONE_CMD) +build_algvs: + @dir=test; target=fipsalgvs; $(BUILD_ONE_CMD) build_tools: @dir=tools; target=all; $(BUILD_ONE_CMD) diff --git a/fips/aes/fips_aesavs.c b/fips/aes/fips_aesavs.c index 84bcbac32a..9a09964221 100644 --- a/fips/aes/fips_aesavs.c +++ b/fips/aes/fips_aesavs.c @@ -780,7 +780,7 @@ static int proc_file(char *rqfile, char *rspfile) if(do_mct(amode, akeysz, aKey, iVec, dir, (unsigned char*)plaintext, len, rfp) < 0) - EXIT(1); + err = 1; } else { @@ -862,7 +862,11 @@ static int proc_file(char *rqfile, char *rspfile) aes_test -d xxxxx.xxx The default is: -d req.txt --------------------------------------------------*/ +#ifdef FIPS_ALGVS +int fips_aesavs_main(int argc, char **argv) +#else int main(int argc, char **argv) +#endif { char *rqlist = "req.txt", *rspfile = NULL; FILE *fp = NULL; @@ -915,7 +919,7 @@ int main(int argc, char **argv) if (proc_file(rfn, rspfile)) { printf(">>> Processing failed for: %s <<<\n", rfn); - EXIT(1); + return 1; } } fclose(fp); @@ -929,7 +933,6 @@ int main(int argc, char **argv) printf(">>> Processing failed for: %s <<<\n", fn); } } - EXIT(0); return 0; } diff --git a/fips/aes/fips_gcmtest.c b/fips/aes/fips_gcmtest.c index 3839de8f8a..a7c787368a 100644 --- a/fips/aes/fips_gcmtest.c +++ b/fips/aes/fips_gcmtest.c @@ -496,7 +496,11 @@ static void ccmtest(FILE *in, FILE *out) FIPS_cipher_ctx_cleanup(&ctx); } -int main(int argc,char **argv) +#ifdef FIPS_ALGVS +int fips_gcmtest_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif { int encrypt; int xts = 0, ccm = 0; diff --git a/fips/cmac/fips_cmactest.c b/fips/cmac/fips_cmactest.c index 6d799f2d5f..2c8c7664e9 100644 --- a/fips/cmac/fips_cmactest.c +++ b/fips/cmac/fips_cmactest.c @@ -92,7 +92,11 @@ static int print_cmac_ver(const EVP_CIPHER *cipher, FILE *out, unsigned char *Mac, int Maclen, int Tlen); +#ifdef FIPS_ALGVS +int fips_cmactest_main(int argc, char **argv) +#else int main(int argc, char **argv) +#endif { FILE *in = NULL, *out = NULL; int mode = 0; /* 0 => Generate, 1 => Verify */ diff --git a/fips/des/fips_desmovs.c b/fips/des/fips_desmovs.c index e8766561ce..29035f08c7 100644 --- a/fips/des/fips_desmovs.c +++ b/fips/des/fips_desmovs.c @@ -102,7 +102,7 @@ static int DESTest(EVP_CIPHER_CTX *ctx, if (akeysz != 192) { printf("Invalid key size: %d\n", akeysz); - EXIT(1); + return 0; } if (fips_strcasecmp(amode, "CBC") == 0) @@ -120,7 +120,7 @@ static int DESTest(EVP_CIPHER_CTX *ctx, else { printf("Unknown mode: %s\n", amode); - EXIT(1); + return 0; } if (FIPS_cipherinit(ctx, cipher, aKey, iVec, dir) <= 0) @@ -155,12 +155,12 @@ static void shiftin(unsigned char *dst,unsigned char *src,int nbits) } /*-----------------------------------------------*/ -char *t_tag[2] = {"PLAINTEXT", "CIPHERTEXT"}; -char *t_mode[6] = {"CBC","ECB","OFB","CFB1","CFB8","CFB64"}; -enum Mode {CBC, ECB, OFB, CFB1, CFB8, CFB64}; +char *tdes_t_tag[2] = {"PLAINTEXT", "CIPHERTEXT"}; +char *tdes_t_mode[6] = {"CBC","ECB","OFB","CFB1","CFB8","CFB64"}; +enum tdes_Mode {TCBC, TECB, TOFB, TCFB1, TCFB8, TCFB64}; int Sizes[6]={64,64,64,1,8,64}; -static void do_mct(char *amode, +static int do_tmct(char *amode, int akeysz, int numkeys, unsigned char *akey,unsigned char *ivec, int dir, unsigned char *text, int len, FILE *rfp) @@ -170,12 +170,12 @@ static void do_mct(char *amode, unsigned char text0[8]; for (imode=0 ; imode < 6 ; ++imode) - if(!strcmp(amode,t_mode[imode])) + if(!strcmp(amode,tdes_t_mode[imode])) break; if (imode == 6) { printf("Unrecognized mode: %s\n", amode); - EXIT(1); + return 0; } for(i=0 ; i < 400 ; ++i) { @@ -196,12 +196,12 @@ static void do_mct(char *amode, OutputValue("",akey+n*8,8,rfp,0); } - if(imode != ECB) + if(imode != TECB) OutputValue("IV",ivec,8,rfp,0); - OutputValue(t_tag[dir^1],text,len,rfp,imode == CFB1); + OutputValue(tdes_t_tag[dir^1],text,len,rfp,imode == TCFB1); #if 0 /* compensate for endianness */ - if(imode == CFB1) + if(imode == TCFB1) text[0]<<=7; #endif memcpy(text0,text,8); @@ -223,18 +223,18 @@ static void do_mct(char *amode, } if(j == 9999) { - OutputValue(t_tag[dir],text,len,rfp,imode == CFB1); + OutputValue(tdes_t_tag[dir],text,len,rfp,imode == TCFB1); /* memcpy(ivec,text,8); */ } /* DebugValue("iv",ctx.iv,8); */ /* accumulate material for the next key */ shiftin(nk,text,Sizes[imode]); /* DebugValue("nk",nk,24);*/ - if((dir && (imode == CFB1 || imode == CFB8 || imode == CFB64 - || imode == CBC)) || imode == OFB) + if((dir && (imode == TCFB1 || imode == TCFB8 + || imode == TCFB64 || imode == TCBC)) || imode == TOFB) memcpy(text,old_iv,8); - if(!dir && (imode == CFB1 || imode == CFB8 || imode == CFB64)) + if(!dir && (imode == TCFB1 || imode == TCFB8 || imode == TCFB64)) { /* the test specifies using the output of the raw DES operation which we don't have, so reconstruct it... */ @@ -260,13 +260,14 @@ static void do_mct(char *amode, /* pointless exercise - the final text doesn't depend on the initial text in OFB mode, so who cares what it is? (Who designed these tests?) */ - if(imode == OFB) + if(imode == TOFB) for(n=0 ; n < 8 ; ++n) text[n]=text0[n]^old_iv[n]; } + return 1; } -static int proc_file(char *rqfile, char *rspfile) +static int tproc_file(char *rqfile, char *rspfile) { char afn[256], rfn[256]; FILE *afp = NULL, *rfp = NULL; @@ -546,7 +547,9 @@ static int proc_file(char *rqfile, char *rspfile) PrintValue("PLAINTEXT", (unsigned char*)plaintext, len); if (strcmp(atest, "Monte") == 0) /* Monte Carlo Test */ { - do_mct(amode,akeysz,numkeys,aKey,iVec,dir,plaintext,len,rfp); + if (!do_tmct(amode,akeysz,numkeys,aKey,iVec, + dir,plaintext,len,rfp)) + return -1; } else { @@ -585,7 +588,7 @@ static int proc_file(char *rqfile, char *rspfile) PrintValue("CIPHERTEXT", ciphertext, len); if (strcmp(atest, "Monte") == 0) /* Monte Carlo Test */ { - do_mct(amode, akeysz, numkeys, aKey, iVec, + do_tmct(amode, akeysz, numkeys, aKey, iVec, dir, ciphertext, len, rfp); } else @@ -631,7 +634,11 @@ static int proc_file(char *rqfile, char *rspfile) aes_test -d xxxxx.xxx The default is: -d req.txt --------------------------------------------------*/ +#ifdef FIPS_ALGVS +int fips_desmovs_main(int argc, char **argv) +#else int main(int argc, char **argv) +#endif { char *rqlist = "req.txt", *rspfile = NULL; FILE *fp = NULL; @@ -680,10 +687,10 @@ int main(int argc, char **argv) strtok(fn, "\r\n"); strcpy(rfn, fn); printf("Processing: %s\n", rfn); - if (proc_file(rfn, rspfile)) + if (tproc_file(rfn, rspfile)) { printf(">>> Processing failed for: %s <<<\n", rfn); - EXIT(1); + return -1; } } fclose(fp); @@ -692,12 +699,11 @@ int main(int argc, char **argv) { if (VERBOSE) printf("Processing: %s\n", fn); - if (proc_file(fn, rspfile)) + if (tproc_file(fn, rspfile)) { printf(">>> Processing failed for: %s <<<\n", fn); } } - EXIT(0); return 0; } diff --git a/fips/dh/fips_dhvs.c b/fips/dh/fips_dhvs.c index ad760c8aaa..3ba1977862 100644 --- a/fips/dh/fips_dhvs.c +++ b/fips/dh/fips_dhvs.c @@ -145,8 +145,12 @@ static void output_Zhash(FILE *out, int exout, OPENSSL_cleanse(Z, Zlen); OPENSSL_free(Z); } - -int main(int argc,char **argv) + +#ifdef FIPS_ALGVS +int fips_dhvs_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif { char **args = argv + 1; int argn = argc - 1; diff --git a/fips/dsa/fips_dssvs.c b/fips/dsa/fips_dssvs.c index 45bca7c155..706babf050 100644 --- a/fips/dsa/fips_dssvs.c +++ b/fips/dsa/fips_dssvs.c @@ -717,7 +717,11 @@ static void sigver(FILE *in, FILE *out) } } -int main(int argc,char **argv) +#ifdef FIPS_ALGVS +int fips_dssvs_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif { FILE *in, *out; if (argc == 4) diff --git a/fips/ecdh/fips_ecdhvs.c b/fips/ecdh/fips_ecdhvs.c index 72ebe815dd..821821a96d 100644 --- a/fips/ecdh/fips_ecdhvs.c +++ b/fips/ecdh/fips_ecdhvs.c @@ -76,7 +76,7 @@ int main(int argc, char **argv) #include "fips_utl.h" -static const EVP_MD *parse_md(char *line) +static const EVP_MD *eparse_md(char *line) { char *p; if (line[0] != '[' || line[1] != 'E') @@ -301,7 +301,11 @@ static void ec_output_Zhash(FILE *out, int exout, EC_GROUP *group, EC_POINT_free(peerkey); } -int main(int argc,char **argv) +#ifdef FIPS_ALGVS +int fips_ecdhvs_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif { char **args = argv + 1; int argn = argc - 1; @@ -412,7 +416,7 @@ int main(int argc,char **argv) if (strlen(buf) > 6 && !strncmp(buf, "[E", 2)) { - md = parse_md(buf); + md = eparse_md(buf); if (md == NULL) goto parse_error; continue; diff --git a/fips/ecdsa/fips_ecdsavs.c b/fips/ecdsa/fips_ecdsavs.c index 898951a2c8..50b1b7ca5d 100644 --- a/fips/ecdsa/fips_ecdsavs.c +++ b/fips/ecdsa/fips_ecdsavs.c @@ -75,7 +75,7 @@ int main(int argc, char **argv) #include -static int lookup_curve(char *in, char *curve_name, const EVP_MD **pmd) +static int elookup_curve(char *in, char *curve_name, const EVP_MD **pmd) { char *cname, *p; /* Copy buffer as we will change it */ @@ -200,7 +200,7 @@ static int KeyPair(FILE *in, FILE *out) if (*buf == '[' && buf[2] == '-') { if (buf[2] == '-') - curve_nid = lookup_curve(buf, lbuf, NULL); + curve_nid = elookup_curve(buf, lbuf, NULL); fputs(buf, out); continue; } @@ -260,7 +260,7 @@ static int PKV(FILE *in, FILE *out) fputs(buf, out); if (*buf == '[' && buf[2] == '-') { - curve_nid = lookup_curve(buf, lbuf, NULL); + curve_nid = elookup_curve(buf, lbuf, NULL); if (curve_nid == NID_undef) return 0; @@ -314,7 +314,7 @@ static int SigGen(FILE *in, FILE *out) fputs(buf, out); if (*buf == '[') { - curve_nid = lookup_curve(buf, lbuf, &digest); + curve_nid = elookup_curve(buf, lbuf, &digest); if (curve_nid == NID_undef) return 0; } @@ -390,7 +390,7 @@ static int SigVer(FILE *in, FILE *out) fputs(buf, out); if (*buf == '[') { - curve_nid = lookup_curve(buf, lbuf, &digest); + curve_nid = elookup_curve(buf, lbuf, &digest); if (curve_nid == NID_undef) return 0; } @@ -459,8 +459,11 @@ static int SigVer(FILE *in, FILE *out) } return 1; } - +#ifdef FIPS_ALGVS +int fips_ecdsavs_main(int argc, char **argv) +#else int main(int argc, char **argv) +#endif { FILE *in = NULL, *out = NULL; const char *cmd = argv[1]; diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index b7aea4e9cd..20e9094068 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -995,7 +995,11 @@ static int post_cb(int op, int id, int subid, void *ex) return 1; } -int main(int argc,char **argv) +#ifdef FIPS_ALGVS +int fips_test_suite_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif { int bad_rsa = 0, bad_dsa = 0; int do_rng_stick = 0; diff --git a/fips/fips_utl.h b/fips/fips_utl.h index 1ed133c5c9..491bc2ace9 100644 --- a/fips/fips_utl.h +++ b/fips/fips_utl.h @@ -47,6 +47,9 @@ * */ +#ifndef FIPS_UTL_H +#define FIPS_UTL_H + #define OPENSSL_FIPSAPI #include @@ -487,3 +490,5 @@ int fips_strcasecmp(const char *str1, const char *str2) return fips_strncasecmp(str1, str2, (size_t)-1); } + +#endif diff --git a/fips/hmac/fips_hmactest.c b/fips/hmac/fips_hmactest.c index 07c18bfdfa..da9c8d7926 100644 --- a/fips/hmac/fips_hmactest.c +++ b/fips/hmac/fips_hmactest.c @@ -85,7 +85,11 @@ static int print_hmac(const EVP_MD *md, FILE *out, unsigned char *Key, int Klen, unsigned char *Msg, int Msglen, int Tlen); +#ifdef FIPS_ALGVS +int fips_hmactest_main(int argc, char **argv) +#else int main(int argc, char **argv) +#endif { FILE *in = NULL, *out = NULL; diff --git a/fips/rand/fips_drbgvs.c b/fips/rand/fips_drbgvs.c index 4d3f0cfee0..bcdfa6dac3 100644 --- a/fips/rand/fips_drbgvs.c +++ b/fips/rand/fips_drbgvs.c @@ -76,7 +76,7 @@ int main(int argc, char **argv) #include "fips_utl.h" -static int parse_md(char *str) +static int dparse_md(char *str) { switch(atoi(str + 5)) { @@ -115,7 +115,7 @@ static int parse_ec(char *str) curve_nid = NID_secp521r1; else return NID_undef; - md_nid = parse_md(md); + md_nid = dparse_md(md); if (md_nid == NID_undef) return NID_undef; return (curve_nid << 16) | md_nid; @@ -170,9 +170,11 @@ static size_t test_nonce(DRBG_CTX *dctx, unsigned char **pout, return t->noncelen; } - - +#ifdef FIPS_ALGVS +int fips_drbgvs_main(int argc,char **argv) +#else int main(int argc,char **argv) +#endif { FILE *in, *out; DRBG_CTX *dctx = NULL; @@ -240,7 +242,7 @@ int main(int argc,char **argv) } if (strlen(buf) > 4 && !strncmp(buf, "[SHA-", 5)) { - nid = parse_md(buf); + nid = dparse_md(buf); if (nid == NID_undef) exit(1); if (drbg_type == DRBG_HMAC) diff --git a/fips/rand/fips_rngvs.c b/fips/rand/fips_rngvs.c index ac0a526573..9e1f070602 100644 --- a/fips/rand/fips_rngvs.c +++ b/fips/rand/fips_rngvs.c @@ -198,7 +198,11 @@ static void mct(FILE *in, FILE *out) } } -int main(int argc,char **argv) +#ifdef FIPS_ALGVS +int fips_rngvs_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif { FILE *in, *out; if (argc == 4) diff --git a/fips/rsa/fips_rsagtest.c b/fips/rsa/fips_rsagtest.c index 78b4531398..8342f615fb 100644 --- a/fips/rsa/fips_rsagtest.c +++ b/fips/rsa/fips_rsagtest.c @@ -88,7 +88,11 @@ static int rsa_printkey1(FILE *out, RSA *rsa, static int rsa_printkey2(FILE *out, RSA *rsa, BIGNUM *Xq1, BIGNUM *Xq2, BIGNUM *Xq); +#ifdef FIPS_ALGVS +int fips_rsagtest_main(int argc, char **argv) +#else int main(int argc, char **argv) +#endif { FILE *in = NULL, *out = NULL; diff --git a/fips/rsa/fips_rsastest.c b/fips/rsa/fips_rsastest.c index e0dbe2a0d7..d11b06316b 100644 --- a/fips/rsa/fips_rsastest.c +++ b/fips/rsa/fips_rsastest.c @@ -85,7 +85,11 @@ static int rsa_stest(FILE *out, FILE *in, int Saltlen); static int rsa_printsig(FILE *out, RSA *rsa, const EVP_MD *dgst, unsigned char *Msg, long Msglen, int Saltlen); +#ifdef FIPS_ALGVS +int fips_rsastest_main(int argc, char **argv) +#else int main(int argc, char **argv) +#endif { FILE *in = NULL, *out = NULL; diff --git a/fips/rsa/fips_rsavtest.c b/fips/rsa/fips_rsavtest.c index df33842691..7685c74ef0 100644 --- a/fips/rsa/fips_rsavtest.c +++ b/fips/rsa/fips_rsavtest.c @@ -82,14 +82,18 @@ int main(int argc, char *argv[]) #include "fips_utl.h" -int rsa_test(FILE *out, FILE *in, int saltlen); +int rsa_vtest(FILE *out, FILE *in, int saltlen); static int rsa_printver(FILE *out, BIGNUM *n, BIGNUM *e, const EVP_MD *dgst, unsigned char *Msg, long Msglen, unsigned char *S, long Slen, int Saltlen); +#ifdef FIPS_ALGVS +int fips_rsavtest_main(int argc, char **argv) +#else int main(int argc, char **argv) +#endif { FILE *in = NULL, *out = NULL; @@ -138,7 +142,7 @@ int main(int argc, char **argv) goto end; } - if (!rsa_test(out, in, Saltlen)) + if (!rsa_vtest(out, in, Saltlen)) { fprintf(stderr, "FATAL RSAVTEST file processing error\n"); goto end; @@ -159,7 +163,7 @@ int main(int argc, char **argv) #define RSA_TEST_MAXLINELEN 10240 -int rsa_test(FILE *out, FILE *in, int Saltlen) +int rsa_vtest(FILE *out, FILE *in, int Saltlen) { char *linebuf, *olinebuf, *p, *q; char *keyword, *value; diff --git a/fips/sha/fips_shatest.c b/fips/sha/fips_shatest.c index c14df16601..3954777a64 100644 --- a/fips/sha/fips_shatest.c +++ b/fips/sha/fips_shatest.c @@ -86,7 +86,11 @@ static int print_dgst(const EVP_MD *md, FILE *out, static int print_monte(const EVP_MD *md, FILE *out, unsigned char *Seed, int SeedLen); +#ifdef FIPS_ALGVS +int fips_shatest_main(int argc, char **argv) +#else int main(int argc, char **argv) +#endif { FILE *in = NULL, *out = NULL; diff --git a/test/Makefile b/test/Makefile index 2577d245b9..2fcc78d46a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -81,6 +81,7 @@ FIPS_ECDHVS= fips_ecdhvs FIPS_ECDSAVS= fips_ecdsavs FIPS_TEST_SUITE=fips_test_suite FIPS_CMACTEST= fips_cmactest +FIPS_ALGVS= fips_algvs TESTS= alltests @@ -119,7 +120,7 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $(IDEATEST).o \ $(FIPS_RSASTEST).o $(FIPS_RSAGTEST).o $(FIPS_GCMTEST).o \ $(FIPS_DSSVS).o $(FIPS_DSATEST).o $(FIPS_RNGVS).o $(FIPS_DRBGVS).o \ $(FIPS_TEST_SUITE).o $(FIPS_DHVS).o $(FIPS_ECDSAVS).o \ - $(FIPS_ECDHVS).o $(FIPS_CMACTEST).o \ + $(FIPS_ECDHVS).o $(FIPS_CMACTEST).o $(FIPS_ALGVS).o \ $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ $(MD2TEST).c $(MD4TEST).c $(MD5TEST).c \ @@ -133,7 +134,7 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ $(FIPS_RSASTEST).c $(FIPS_RSAGTEST).c $(FIPS_GCMTEST).c \ $(FIPS_DSSVS).c $(FIPS_DSATEST).c $(FIPS_RNGVS).c $(FIPS_DRBGVS).c \ $(FIPS_TEST_SUITE).c $(FIPS_DHVS).c $(FIPS_ECDSAVS).c \ - $(FIPS_ECDHVS).c $(FIPS_CMACTEST).c \ + $(FIPS_ECDHVS).c $(FIPS_CMACTEST).c $(FIPS_ALGVS).c \ $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c EXHEADER= @@ -150,6 +151,8 @@ exe: $(EXE) $(FIPSEXE) dummytest$(EXE_EXT) fipsexe: $(FIPSEXE) +fipsalgvs: $(FIPS_ALGVS) + files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO @@ -504,6 +507,9 @@ $(FIPS_TEST_SUITE)$(EXE_EXT): $(FIPS_TEST_SUITE).o $(DLIBCRYPTO) $(FIPS_CMACTEST)$(EXE_EXT): $(FIPS_CMACTEST).o $(DLIBCRYPTO) @target=$(FIPS_CMACTEST); $(FIPS_BUILD_CMD) +$(FIPS_ALGVS)$(EXE_EXT): $(FIPS_ALGVS).o $(DLIBCRYPTO) + @target=$(FIPS_ALGVS); $(FIPS_BUILD_CMD) + $(RMDTEST)$(EXE_EXT): $(RMDTEST).o $(DLIBCRYPTO) @target=$(RMDTEST); $(BUILD_CMD) diff --git a/test/fips_algvs.c b/test/fips_algvs.c new file mode 100644 index 0000000000..a662d01df4 --- /dev/null +++ b/test/fips_algvs.c @@ -0,0 +1,312 @@ +/* test/fips_algvs.c */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project 2011 + */ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include +#include +#include + +#ifndef OPENSSL_FIPS +#include + +int main(int argc, char **argv) +{ + printf("No FIPS ALGVS support\n"); + return 0; +} +#else + +#define FIPS_ALGVS + +extern int fips_aesavs_main(int argc, char **argv); +extern int fips_cmactest_main(int argc, char **argv); +extern int fips_desmovs_main(int argc, char **argv); +extern int fips_dhvs_main(int argc, char **argv); +extern int fips_drbgvs_main(int argc,char **argv); +extern int fips_dssvs_main(int argc, char **argv); +extern int fips_ecdhvs_main(int argc, char **argv); +extern int fips_ecdsavs_main(int argc, char **argv); +extern int fips_gcmtest_main(int argc, char **argv); +extern int fips_hmactest_main(int argc, char **argv); +extern int fips_rngvs_main(int argc, char **argv); +extern int fips_rsagtest_main(int argc, char **argv); +extern int fips_rsastest_main(int argc, char **argv); +extern int fips_rsavtest_main(int argc, char **argv); +extern int fips_shatest_main(int argc, char **argv); +extern int fips_test_suite_main(int argc, char **argv); + +#include "fips_aesavs.c" +#include "fips_cmactest.c" +#include "fips_desmovs.c" +#include "fips_dhvs.c" +#include "fips_drbgvs.c" +#include "fips_dssvs.c" +#include "fips_ecdhvs.c" +#include "fips_ecdsavs.c" +#include "fips_gcmtest.c" +#include "fips_hmactest.c" +#include "fips_rngvs.c" +#include "fips_rsagtest.c" +#include "fips_rsastest.c" +#include "fips_rsavtest.c" +#include "fips_shatest.c" +#include "fips_test_suite.c" + +typedef struct + { + const char *name; + int (*func)(int argc, char **argv); + } ALGVS_FUNCTION; + +static ALGVS_FUNCTION algvs[] = { + {"fips_aesavs", fips_aesavs_main}, + {"fips_cmactest", fips_cmactest_main}, + {"fips_desmovs", fips_desmovs_main}, + {"fips_dhvs", fips_dhvs_main}, + {"fips_drbgvs", fips_drbgvs_main}, + {"fips_dssvs", fips_dssvs_main}, + {"fips_ecdhvs", fips_ecdhvs_main}, + {"fips_ecdsavs", fips_ecdsavs_main}, + {"fips_gcmtest", fips_gcmtest_main}, + {"fips_hmactest", fips_hmactest_main}, + {"fips_rngvs", fips_rngvs_main}, + {"fips_rsagtest", fips_rsagtest_main}, + {"fips_rsastest", fips_rsastest_main}, + {"fips_rsavtest", fips_rsavtest_main}, + {"fips_shatest", fips_shatest_main}, + {"fips_test_suite", fips_test_suite_main}, + {NULL, 0} + }; + +/* Argument parsing taken from apps/apps.c */ + +typedef struct args_st + { + char **data; + int count; + } ARGS; + +static int chopup_args(ARGS *arg, char *buf, int *argc, char **argv[]) + { + int num,i; + char *p; + + *argc=0; + *argv=NULL; + + i=0; + if (arg->count == 0) + { + arg->count=20; + arg->data=(char **)OPENSSL_malloc(sizeof(char *)*arg->count); + } + for (i=0; icount; i++) + arg->data[i]=NULL; + + num=0; + p=buf; + for (;;) + { + /* first scan over white space */ + if (!*p) break; + while (*p && ((*p == ' ') || (*p == '\t') || (*p == '\n'))) + p++; + if (!*p) break; + + /* The start of something good :-) */ + if (num >= arg->count) + { + fprintf(stderr, "Too many arguments!!\n"); + return 0; + } + arg->data[num++]=p; + + /* now look for the end of this */ + if ((*p == '\'') || (*p == '\"')) /* scan for closing quote */ + { + i= *(p++); + arg->data[num-1]++; /* jump over quote */ + while (*p && (*p != i)) + p++; + *p='\0'; + } + else + { + while (*p && ((*p != ' ') && + (*p != '\t') && (*p != '\n'))) + p++; + + if (*p == '\0') + p--; + else + *p='\0'; + } + p++; + } + *argc=num; + *argv=arg->data; + return(1); + } + +static int run_prg(int argc, char **argv) + { + ALGVS_FUNCTION *t; + const char *prg_name; + prg_name = strrchr(argv[0], '/'); + if (prg_name) + prg_name++; + else + prg_name = argv[0]; + for (t = algvs; t->name; t++) + { + if (!strcmp(prg_name, t->name)) + return t->func(argc, argv); + } + return -100; + } + +int main(int argc, char **argv) + { + char buf[1024]; + char **args = argv + 1; + const char *sname = "fipstests.sh"; + ARGS arg; + int xargc; + char **xargv; + int lineno = 0, badarg = 0; + int nerr = 0, quiet = 0, verbose = 0; + FILE *in = NULL; + if (*args && *args[0] != '-') + return run_prg(argc - 1, args); + while (!badarg && *args && *args[0] == '-') + { + if (!strcmp(*args, "-script")) + { + if (args[1]) + { + args++; + sname = *args; + } + else + badarg = 1; + } + else if (!strcmp(*args, "-quiet")) + quiet = 1; + else if (!strcmp(*args, "-verbose")) + verbose = 1; + else + badarg = 1; + args++; + } + + if (badarg) + { + fprintf(stderr, "Error processing arguments\n"); + return 1; + } + + in = fopen(sname, "r"); + if (!in) + { + fprintf(stderr, "Error opening script file \"%s\"\n", sname); + return 1; + } + + arg.data = NULL; + arg.count = 0; + + while (fgets(buf, sizeof(buf), in)) + { + lineno++; + if (!chopup_args(&arg, buf, &xargc, &xargv)) + fprintf(stderr, "Error processing line %d\n", lineno); + else + { + int rv; + if (!quiet) + { + int i; + int narg = verbose ? xargc : xargc - 2; + printf("Running command line:"); + for (i = 0; i < narg; i++) + printf(" %s", xargv[i]); + printf("\n"); + } + rv = run_prg(xargc, xargv); + if (FIPS_module_mode()) + FIPS_module_mode_set(0, NULL); + if (rv != 0) + nerr++; + if (rv == -100) + fprintf(stderr, "ERROR: Command not found\n"); + else if (rv != 0) + fprintf(stderr, "ERROR: returned %d\n", rv); + else if (verbose) + printf("\tCommand run successfully\n"); + } + } + + if (!quiet) + printf("Completed with %d errors\n", nerr); + + fclose(in); + if (nerr == 0) + return 0; + return 1; + } + +#endif From d5939062d7fa9059b086e88ca14bf3f3ad19256b Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 2 Nov 2011 00:07:15 +0000 Subject: [PATCH 006/120] Replace exit calls with return in fips_test_suite --- fips/fips_test_suite.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index 20e9094068..1344b1108d 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -1092,16 +1092,16 @@ int main(int argc, char **argv) pass = ""; } else { printf("Bad argument \"%s\"\n", argv[1]); - exit(1); + return 1; } if (!no_exit) { fips_algtest_init_nofips(); if (!FIPS_module_mode_set(1, pass)) { printf("Power-up self test failed\n"); - exit(1); + return 1; } printf("Power-up self test successful\n"); - exit(0); + return 0; } } @@ -1120,7 +1120,7 @@ int main(int argc, char **argv) ERR_clear_error(); test_msg("2. Automatic power-up self test", FIPS_module_mode_set(1, pass)); if (!FIPS_module_mode()) - exit(1); + return 1; if (do_drbg_stick) FIPS_drbg_stick(); if (do_rng_stick) From cb47a7107f26bfcfba680cf9dfd450ceea9d5ead Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 2 Nov 2011 00:43:45 +0000 Subject: [PATCH 007/120] Print out an error for "make test" in FIPS builds. --- Makefile.fips | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile.fips b/Makefile.fips index 36e9a7d65b..b3811dff22 100644 --- a/Makefile.fips +++ b/Makefile.fips @@ -538,9 +538,7 @@ dclean: test: tests tests: - @(cd test && echo "testing..." && \ - $(CLEARENV) && $(MAKE) -e $(BUILDENV) TOP=.. TESTS='$(TESTS)' OPENSSL_DEBUG_MEMORY=on OPENSSL_CONF=../apps/openssl.cnf tests ); - OPENSSL_CONF=apps/openssl.cnf util/opensslwrap.sh version -a + @echo "Not implemented in FIPS build" ; false report: @$(PERL) util/selftest.pl From 8ab0d50c4369b95fea3e806c3a07540e6781889f Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 2 Nov 2011 16:35:24 +0000 Subject: [PATCH 008/120] Remove duplicate test from health check. Fix memory leaks by uninstantiating DRBG before reinitialising it. --- fips/rand/fips_drbg_selftest.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/fips/rand/fips_drbg_selftest.c b/fips/rand/fips_drbg_selftest.c index ee0561bcbe..a787323d6d 100644 --- a/fips/rand/fips_drbg_selftest.c +++ b/fips/rand/fips_drbg_selftest.c @@ -582,7 +582,6 @@ static int fips_drbg_error_check(DRBG_CTX *dctx, DRBG_SELFTEST_DATA *td) } dctx->iflags &= ~DRBG_FLAG_NOERR; - if (!FIPS_drbg_uninstantiate(dctx)) { FIPSerr(FIPS_F_FIPS_DRBG_ERROR_CHECK, FIPS_R_UNINSTANTIATE_ERROR); @@ -617,28 +616,20 @@ static int fips_drbg_error_check(DRBG_CTX *dctx, DRBG_SELFTEST_DATA *td) goto err; } - /* Explicit reseed tests */ - - /* Test explicit reseed with too large additional input */ - if (!do_drbg_init(dctx, td, &t)) - goto err; - - dctx->iflags |= DRBG_FLAG_NOERR; - - if (FIPS_drbg_reseed(dctx, td->adin, dctx->max_adin + 1) > 0) + dctx->iflags &= ~DRBG_FLAG_NOERR; + if (!FIPS_drbg_uninstantiate(dctx)) { - FIPSerr(FIPS_F_FIPS_DRBG_ERROR_CHECK, FIPS_R_ADDITIONAL_INPUT_ERROR_UNDETECTED); + FIPSerr(FIPS_F_FIPS_DRBG_ERROR_CHECK, FIPS_R_UNINSTANTIATE_ERROR); goto err; } - /* Test explicit reseed with entropy source failure */ - /* Check prediction resistance request fails if entropy source * failure. */ t.entlen = 0; + dctx->iflags |= DRBG_FLAG_NOERR; if (FIPS_drbg_generate(dctx, randout, td->katlen, 1, td->adin, td->adinlen)) { @@ -680,6 +671,13 @@ static int fips_drbg_error_check(DRBG_CTX *dctx, DRBG_SELFTEST_DATA *td) goto err; } + dctx->iflags &= ~DRBG_FLAG_NOERR; + if (!FIPS_drbg_uninstantiate(dctx)) + { + FIPSerr(FIPS_F_FIPS_DRBG_ERROR_CHECK, FIPS_R_UNINSTANTIATE_ERROR); + goto err; + } + /* Explicit reseed tests */ /* Test explicit reseed with too large additional input */ @@ -696,11 +694,6 @@ static int fips_drbg_error_check(DRBG_CTX *dctx, DRBG_SELFTEST_DATA *td) /* Test explicit reseed with entropy source failure */ - if (!do_drbg_init(dctx, td, &t)) - goto err; - - dctx->iflags |= DRBG_FLAG_NOERR; - t.entlen = 0; if (FIPS_drbg_reseed(dctx, td->adin, td->adinlen) > 0) From b7de76b74d84c4c45d86cca6fae2e9879f281695 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 2 Nov 2011 19:16:43 +0000 Subject: [PATCH 009/120] Add support for memory leak checking in fips_algvs. Fix many memory leaks in algorithm test utilities. --- fips/aes/fips_aesavs.c | 3 ++- fips/aes/fips_gcmtest.c | 6 ++++++ fips/des/fips_desmovs.c | 2 ++ fips/dsa/fips_dssvs.c | 29 +++++++++++++++++++++++------ fips/ecdh/fips_ecdhvs.c | 20 +++++++++++++++++--- fips/ecdsa/fips_ecdsavs.c | 17 ++++++++++++++++- fips/fips_test_suite.c | 4 +++- fips/rsa/fips_rsastest.c | 3 +++ test/fips_algvs.c | 24 +++++++++++++++++++++--- 9 files changed, 93 insertions(+), 15 deletions(-) diff --git a/fips/aes/fips_aesavs.c b/fips/aes/fips_aesavs.c index 9a09964221..bbdb93aab9 100644 --- a/fips/aes/fips_aesavs.c +++ b/fips/aes/fips_aesavs.c @@ -535,7 +535,7 @@ static int do_mct(char *amode, } } } - + FIPS_cipher_ctx_cleanup(&ctx); return ret; } @@ -850,6 +850,7 @@ static int proc_file(char *rqfile, char *rspfile) fclose(rfp); if (afp) fclose(afp); + FIPS_cipher_ctx_cleanup(&ctx); return err; } diff --git a/fips/aes/fips_gcmtest.c b/fips/aes/fips_gcmtest.c index a7c787368a..02849bc2ab 100644 --- a/fips/aes/fips_gcmtest.c +++ b/fips/aes/fips_gcmtest.c @@ -261,6 +261,7 @@ static void gcmtest(FILE *in, FILE *out, int encrypt) iv = aad = ct = pt = key = tag = NULL; } } + FIPS_cipher_ctx_cleanup(&ctx); } static void xtstest(FILE *in, FILE *out) @@ -335,6 +336,7 @@ static void xtstest(FILE *in, FILE *out) iv = key = inbuf = outbuf = NULL; } } + FIPS_cipher_ctx_cleanup(&ctx); } static void ccmtest(FILE *in, FILE *out) @@ -428,6 +430,8 @@ static void ccmtest(FILE *in, FILE *out) } else if (!strcmp(keyword,"Adata")) { + if (Adata) + OPENSSL_free(Adata); Adata = hex2bin_m(value, &l); if (Alen && l != Alen) { @@ -493,6 +497,8 @@ static void ccmtest(FILE *in, FILE *out) OPENSSL_free(Key); if (Nonce) OPENSSL_free(Nonce); + if (Adata) + OPENSSL_free(Adata); FIPS_cipher_ctx_cleanup(&ctx); } diff --git a/fips/des/fips_desmovs.c b/fips/des/fips_desmovs.c index 29035f08c7..79900aeae2 100644 --- a/fips/des/fips_desmovs.c +++ b/fips/des/fips_desmovs.c @@ -263,6 +263,7 @@ static int do_tmct(char *amode, if(imode == TOFB) for(n=0 ; n < 8 ; ++n) text[n]=text0[n]^old_iv[n]; + FIPS_cipher_ctx_cleanup(&ctx); } return 1; } @@ -622,6 +623,7 @@ static int tproc_file(char *rqfile, char *rspfile) fclose(rfp); if (afp) fclose(afp); + FIPS_cipher_ctx_cleanup(&ctx); return err; } diff --git a/fips/dsa/fips_dssvs.c b/fips/dsa/fips_dssvs.c index 706babf050..45eefb7c63 100644 --- a/fips/dsa/fips_dssvs.c +++ b/fips/dsa/fips_dssvs.c @@ -199,6 +199,7 @@ static void pqg(FILE *in, FILE *out) { fprintf(out, "counter = %d" RESP_EOL RESP_EOL, counter); } + FIPS_dsa_free(dsa); } } else if(!strcmp(keyword,"P")) @@ -519,6 +520,8 @@ static void keyver(FILE *in, FILE *out) BN_free(g); if (Y2) BN_free(Y2); + if (ctx) + BN_CTX_free(ctx); } static void keypair(FILE *in, FILE *out) @@ -575,6 +578,8 @@ static void keypair(FILE *in, FILE *out) do_bn_print_name(out, "Y",dsa->pub_key); fputs(RESP_EOL, out); } + if (dsa) + FIPS_dsa_free(dsa); } } } @@ -648,8 +653,8 @@ static void siggen(FILE *in, FILE *out) FIPS_md_ctx_cleanup(&mctx); } } - if (dsa) - FIPS_dsa_free(dsa); + if (dsa) + FIPS_dsa_free(dsa); } static void sigver(FILE *in, FILE *out) @@ -687,15 +692,15 @@ static void sigver(FILE *in, FILE *out) dsa = FIPS_dsa_new(); } else if(!strcmp(keyword,"P")) - dsa->p=hex2bn(value); + do_hex2bn(&dsa->p, value); else if(!strcmp(keyword,"Q")) - dsa->q=hex2bn(value); + do_hex2bn(&dsa->q, value); else if(!strcmp(keyword,"G")) - dsa->g=hex2bn(value); + do_hex2bn(&dsa->g, value); else if(!strcmp(keyword,"Msg")) n=hex2bin(value,msg); else if(!strcmp(keyword,"Y")) - dsa->pub_key=hex2bn(value); + do_hex2bn(&dsa->pub_key, value); else if(!strcmp(keyword,"R")) sig->r=hex2bn(value); else if(!strcmp(keyword,"S")) @@ -711,10 +716,22 @@ static void sigver(FILE *in, FILE *out) r = FIPS_dsa_verify_ctx(dsa, &mctx, sig); no_err = 0; FIPS_md_ctx_cleanup(&mctx); + if (sig->s) + { + BN_free(sig->s); + sig->s = NULL; + } + if (sig->r) + { + BN_free(sig->r); + sig->r = NULL; + } fprintf(out, "Result = %c" RESP_EOL RESP_EOL, r == 1 ? 'P' : 'F'); } } + if (dsa) + FIPS_dsa_free(dsa); } #ifdef FIPS_ALGVS diff --git a/fips/ecdh/fips_ecdhvs.c b/fips/ecdh/fips_ecdhvs.c index 821821a96d..a30e335e2b 100644 --- a/fips/ecdh/fips_ecdhvs.c +++ b/fips/ecdh/fips_ecdhvs.c @@ -319,6 +319,7 @@ int main(int argc, char **argv) EC_GROUP *group = NULL; char *keyword = NULL, *value = NULL; int do_verify = -1, exout = 0; + int rv = 1; int curve_nids[5] = {0,0,0,0,0}; int param_set = -1; @@ -463,10 +464,23 @@ int main(int argc, char **argv) md, rhash, rhashlen); } } - return 0; + rv = 0; parse_error: - fprintf(stderr, "Error Parsing request file\n"); - exit(1); + if (id) + BN_free(id); + if (ix) + BN_free(ix); + if (iy) + BN_free(iy); + if (cx) + BN_free(cx); + if (cy) + BN_free(cy); + if (group) + EC_GROUP_free(group); + if (rv) + fprintf(stderr, "Error Parsing request file\n"); + return rv; } #endif diff --git a/fips/ecdsa/fips_ecdsavs.c b/fips/ecdsa/fips_ecdsavs.c index 50b1b7ca5d..35ff251060 100644 --- a/fips/ecdsa/fips_ecdsavs.c +++ b/fips/ecdsa/fips_ecdsavs.c @@ -287,10 +287,13 @@ static int PKV(FILE *in, FILE *out) no_err = 1; rv = EC_KEY_set_public_key_affine_coordinates(key, Qx, Qy); no_err = 0; + EC_KEY_free(key); fprintf(out, "Result = %s" RESP_EOL, rv ? "P":"F"); } } + BN_free(Qx); + BN_free(Qy); return 1; } @@ -358,7 +361,7 @@ static int SigGen(FILE *in, FILE *out) do_bn_print_name(out, "S", sig->s); EC_KEY_free(key); - + OPENSSL_free(msg); FIPS_ecdsa_sig_free(sig); } @@ -451,12 +454,24 @@ static int SigVer(FILE *in, FILE *out) FIPS_digestupdate(&mctx, msg, mlen); no_err = 1; rv = FIPS_ecdsa_verify_ctx(key, &mctx, sig); + EC_KEY_free(key); + if (msg) + OPENSSL_free(msg); no_err = 0; fprintf(out, "Result = %s" RESP_EOL, rv ? "P":"F"); } } + if (sig->r) + BN_free(sig->r); + if (sig->s) + BN_free(sig->s); + if (Qx) + BN_free(Qx); + if (Qy) + BN_free(Qy); + EVP_MD_CTX_cleanup(&mctx); return 1; } #ifdef FIPS_ALGVS diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index 1344b1108d..db0f18a16b 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -651,6 +651,8 @@ static int Zeroize() for(i = 0; i < sizeof(userkey); i++) printf("%02x", userkey[i]); printf("\n"); + FIPS_rsa_free(key); + return 1; } @@ -696,7 +698,7 @@ static int do_drbg_test(int type, int flags) } rv = 1; err: - FIPS_drbg_uninstantiate(dctx); + FIPS_drbg_free(dctx); return rv; } diff --git a/fips/rsa/fips_rsastest.c b/fips/rsa/fips_rsastest.c index d11b06316b..72e75a3cc6 100644 --- a/fips/rsa/fips_rsastest.c +++ b/fips/rsa/fips_rsastest.c @@ -362,6 +362,9 @@ static int rsa_printsig(FILE *out, RSA *rsa, const EVP_MD *dgst, error: + if (sigbuf) + OPENSSL_free(sigbuf); + return ret; } #endif diff --git a/test/fips_algvs.c b/test/fips_algvs.c index a662d01df4..36d7fb3338 100644 --- a/test/fips_algvs.c +++ b/test/fips_algvs.c @@ -229,9 +229,23 @@ int main(int argc, char **argv) char **xargv; int lineno = 0, badarg = 0; int nerr = 0, quiet = 0, verbose = 0; + int rv; FILE *in = NULL; +#ifdef FIPS_ALGVS_MEMCHECK + CRYPTO_malloc_debug_init(); + OPENSSL_init(); + CRYPTO_set_mem_debug_options(V_CRYPTO_MDEBUG_ALL); + CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); +#endif + if (*args && *args[0] != '-') - return run_prg(argc - 1, args); + { + rv = run_prg(argc - 1, args); +#ifdef FIPS_ALGVS_MEMCHECK + CRYPTO_mem_leaks_fp(stderr); +#endif + return rv; + } while (!badarg && *args && *args[0] == '-') { if (!strcmp(*args, "-script")) @@ -276,7 +290,6 @@ int main(int argc, char **argv) fprintf(stderr, "Error processing line %d\n", lineno); else { - int rv; if (!quiet) { int i; @@ -303,10 +316,15 @@ int main(int argc, char **argv) if (!quiet) printf("Completed with %d errors\n", nerr); + if (arg.data) + OPENSSL_free(arg.data); + fclose(in); +#ifdef FIPS_ALGVS_MEMCHECK + CRYPTO_mem_leaks_fp(stderr); +#endif if (nerr == 0) return 0; return 1; } - #endif From 485ef852acc702df6847a30c583844f05a80e47f Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 5 Nov 2011 01:32:52 +0000 Subject: [PATCH 010/120] Add single call public key sign and verify functions. --- CHANGES | 5 +++++ crypto/dsa/dsa.h | 5 +++++ crypto/ecdsa/ecdsa.h | 5 +++++ fips/dsa/fips_dsa_sign.c | 24 ++++++++++++++++++++++++ fips/dsa/fips_dsatest.c | 15 ++------------- fips/dsa/fips_dssvs.c | 14 ++------------ fips/ecdsa/fips_ecdsa_sign.c | 25 +++++++++++++++++++++++++ fips/ecdsa/fips_ecdsavs.c | 14 ++------------ fips/fips.h | 10 ++++++++++ fips/fips_test_suite.c | 32 ++++++-------------------------- fips/rsa/fips_rsa_sign.c | 29 +++++++++++++++++++++++++++++ fips/rsa/fips_rsastest.c | 11 +---------- fips/rsa/fips_rsavtest.c | 13 +------------ 13 files changed, 117 insertions(+), 85 deletions(-) diff --git a/CHANGES b/CHANGES index 4159394ef9..d3883be28f 100644 --- a/CHANGES +++ b/CHANGES @@ -4,9 +4,14 @@ Changes between 1.0.1 and 1.1.0 [xx XXX xxxx] + *) Add FIPS_{rsa,dsa,ecdsa}_{sign,verify} functions which digest and + sign or verify all in one operation. + [Steve Henson] + *) Add fips_algvs: a multicall fips utility incorporaing all the algorithm test programs and fips_test_suite. Includes functionality to parse the minimal script output of fipsalgest.pl directly. + [Steve Henson] *) Add authorisation parameter to FIPS_module_mode_set(). [Steve Henson] diff --git a/crypto/dsa/dsa.h b/crypto/dsa/dsa.h index 86766dacb4..408ee11b72 100644 --- a/crypto/dsa/dsa.h +++ b/crypto/dsa/dsa.h @@ -215,6 +215,11 @@ DSA_SIG * FIPS_dsa_sign_ctx(DSA *dsa, EVP_MD_CTX *ctx); int FIPS_dsa_verify_digest(DSA *dsa, const unsigned char *dig, int dlen, DSA_SIG *s); int FIPS_dsa_verify_ctx(DSA *dsa, EVP_MD_CTX *ctx, DSA_SIG *s); +int FIPS_dsa_verify(DSA *dsa, const unsigned char *msg, size_t msglen, + const EVP_MD *mhash, DSA_SIG *s); +DSA_SIG * FIPS_dsa_sign(DSA *dsa, const unsigned char *msg, size_t msglen, + const EVP_MD *mhash); + #endif DSA * DSA_new(void); diff --git a/crypto/ecdsa/ecdsa.h b/crypto/ecdsa/ecdsa.h index c3275b0839..cd6d19ccde 100644 --- a/crypto/ecdsa/ecdsa.h +++ b/crypto/ecdsa/ecdsa.h @@ -236,6 +236,11 @@ ECDSA_SIG * FIPS_ecdsa_sign_ctx(EC_KEY *key, EVP_MD_CTX *ctx); int FIPS_ecdsa_verify_digest(EC_KEY *key, const unsigned char *dig, int dlen, ECDSA_SIG *s); int FIPS_ecdsa_verify_ctx(EC_KEY *key, EVP_MD_CTX *ctx, ECDSA_SIG *s); +int FIPS_ecdsa_verify(EC_KEY *key, const unsigned char *msg, size_t msglen, + const EVP_MD *mhash, ECDSA_SIG *s); +ECDSA_SIG * FIPS_ecdsa_sign(EC_KEY *key, + const unsigned char *msg, size_t msglen, + const EVP_MD *mhash); #endif diff --git a/fips/dsa/fips_dsa_sign.c b/fips/dsa/fips_dsa_sign.c index ea1bd87303..274bcd9016 100644 --- a/fips/dsa/fips_dsa_sign.c +++ b/fips/dsa/fips_dsa_sign.c @@ -114,4 +114,28 @@ int FIPS_dsa_verify_digest(DSA *dsa, return dsa->meth->dsa_do_verify(dig,dlen,s,dsa); } +int FIPS_dsa_verify(DSA *dsa, const unsigned char *msg, size_t msglen, + const EVP_MD *mhash, DSA_SIG *s) + { + int ret=-1; + unsigned char dig[EVP_MAX_MD_SIZE]; + unsigned int dlen; + FIPS_digest(msg, msglen, dig, &dlen, mhash); + ret=FIPS_dsa_verify_digest(dsa, dig, dlen, s); + OPENSSL_cleanse(dig, dlen); + return ret; + } + +DSA_SIG * FIPS_dsa_sign(DSA *dsa, const unsigned char *msg, size_t msglen, + const EVP_MD *mhash) + { + DSA_SIG *s; + unsigned char dig[EVP_MAX_MD_SIZE]; + unsigned int dlen; + FIPS_digest(msg, msglen, dig, &dlen, mhash); + s = FIPS_dsa_sign_digest(dsa, dig, dlen); + OPENSSL_cleanse(dig, dlen); + return s; + } + #endif diff --git a/fips/dsa/fips_dsatest.c b/fips/dsa/fips_dsatest.c index 64d52258eb..3c95d176b8 100644 --- a/fips/dsa/fips_dsatest.c +++ b/fips/dsa/fips_dsatest.c @@ -154,9 +154,7 @@ int main(int argc, char **argv) unsigned char buf[256]; unsigned long h; BN_GENCB cb; - EVP_MD_CTX mctx; BN_GENCB_set(&cb, dsa_cb, stderr); - FIPS_md_ctx_init(&mctx); fips_algtest_init(); @@ -210,19 +208,11 @@ int main(int argc, char **argv) } DSA_generate_key(dsa); - if (!FIPS_digestinit(&mctx, EVP_sha1())) - goto end; - if (!FIPS_digestupdate(&mctx, str1, 20)) - goto end; - sig = FIPS_dsa_sign_ctx(dsa, &mctx); + sig = FIPS_dsa_sign(dsa, str1, 20, EVP_sha1()); if (!sig) goto end; - if (!FIPS_digestinit(&mctx, EVP_sha1())) - goto end; - if (!FIPS_digestupdate(&mctx, str1, 20)) - goto end; - if (FIPS_dsa_verify_ctx(dsa, &mctx, sig) != 1) + if (FIPS_dsa_verify(dsa, str1, 20, EVP_sha1(), sig) != 1) goto end; ret = 1; @@ -231,7 +221,6 @@ end: if (sig) FIPS_dsa_sig_free(sig); if (dsa != NULL) FIPS_dsa_free(dsa); - FIPS_md_ctx_cleanup(&mctx); #if 0 CRYPTO_mem_leaks(bio_err); #endif diff --git a/fips/dsa/fips_dssvs.c b/fips/dsa/fips_dssvs.c index 45eefb7c63..e2f2297f00 100644 --- a/fips/dsa/fips_dssvs.c +++ b/fips/dsa/fips_dssvs.c @@ -632,9 +632,7 @@ static void siggen(FILE *in, FILE *out) { unsigned char msg[1024]; int n; - EVP_MD_CTX mctx; DSA_SIG *sig; - FIPS_md_ctx_init(&mctx); n=hex2bin(value,msg); @@ -642,15 +640,12 @@ static void siggen(FILE *in, FILE *out) exit(1); do_bn_print_name(out, "Y",dsa->pub_key); - FIPS_digestinit(&mctx, md); - FIPS_digestupdate(&mctx, msg, n); - sig = FIPS_dsa_sign_ctx(dsa, &mctx); + sig = FIPS_dsa_sign(dsa, msg, n, md); do_bn_print_name(out, "R",sig->r); do_bn_print_name(out, "S",sig->s); fputs(RESP_EOL, out); FIPS_dsa_sig_free(sig); - FIPS_md_ctx_cleanup(&mctx); } } if (dsa) @@ -705,17 +700,12 @@ static void sigver(FILE *in, FILE *out) sig->r=hex2bn(value); else if(!strcmp(keyword,"S")) { - EVP_MD_CTX mctx; int r; - FIPS_md_ctx_init(&mctx); sig->s=hex2bn(value); - FIPS_digestinit(&mctx, md); - FIPS_digestupdate(&mctx, msg, n); no_err = 1; - r = FIPS_dsa_verify_ctx(dsa, &mctx, sig); + r = FIPS_dsa_verify(dsa, msg, n, md, sig); no_err = 0; - FIPS_md_ctx_cleanup(&mctx); if (sig->s) { BN_free(sig->s); diff --git a/fips/ecdsa/fips_ecdsa_sign.c b/fips/ecdsa/fips_ecdsa_sign.c index 0e86a50ef4..847d44e784 100644 --- a/fips/ecdsa/fips_ecdsa_sign.c +++ b/fips/ecdsa/fips_ecdsa_sign.c @@ -87,3 +87,28 @@ int FIPS_ecdsa_verify_ctx(EC_KEY *key, EVP_MD_CTX *ctx, ECDSA_SIG *s) return ret; } +int FIPS_ecdsa_verify(EC_KEY *key, const unsigned char *msg, size_t msglen, + const EVP_MD *mhash, ECDSA_SIG *s) + { + int ret=-1; + unsigned char dig[EVP_MAX_MD_SIZE]; + unsigned int dlen; + FIPS_digest(msg, msglen, dig, &dlen, mhash); + ret=FIPS_ecdsa_verify_digest(key, dig, dlen, s); + OPENSSL_cleanse(dig, dlen); + return ret; + } + +ECDSA_SIG * FIPS_ecdsa_sign(EC_KEY *key, + const unsigned char *msg, size_t msglen, + const EVP_MD *mhash) + { + ECDSA_SIG *s; + unsigned char dig[EVP_MAX_MD_SIZE]; + unsigned int dlen; + FIPS_digest(msg, msglen, dig, &dlen, mhash); + s = FIPS_dsa_sign_digest(key, dig, dlen); + OPENSSL_cleanse(dig, dlen); + return s; + } + diff --git a/fips/ecdsa/fips_ecdsavs.c b/fips/ecdsa/fips_ecdsavs.c index 35ff251060..5745a6d37a 100644 --- a/fips/ecdsa/fips_ecdsavs.c +++ b/fips/ecdsa/fips_ecdsavs.c @@ -308,8 +308,6 @@ static int SigGen(FILE *in, FILE *out) EC_KEY *key = NULL; ECDSA_SIG *sig = NULL; const EVP_MD *digest = NULL; - EVP_MD_CTX mctx; - EVP_MD_CTX_init(&mctx); Qx = BN_new(); Qy = BN_new(); while(fgets(buf, sizeof buf, in) != NULL) @@ -345,9 +343,7 @@ static int SigGen(FILE *in, FILE *out) return 0; } - FIPS_digestinit(&mctx, digest); - FIPS_digestupdate(&mctx, msg, mlen); - sig = FIPS_ecdsa_sign_ctx(key, &mctx); + sig = FIPS_ecdsa_sign(key, msg, mlen, digest); if (!sig) { @@ -369,7 +365,6 @@ static int SigGen(FILE *in, FILE *out) } BN_free(Qx); BN_free(Qy); - FIPS_md_ctx_cleanup(&mctx); return 1; } @@ -384,8 +379,6 @@ static int SigVer(FILE *in, FILE *out) EC_KEY *key = NULL; ECDSA_SIG sg, *sig = &sg; const EVP_MD *digest = NULL; - EVP_MD_CTX mctx; - EVP_MD_CTX_init(&mctx); sig->r = NULL; sig->s = NULL; while(fgets(buf, sizeof buf, in) != NULL) @@ -450,10 +443,8 @@ static int SigVer(FILE *in, FILE *out) return 0; } - FIPS_digestinit(&mctx, digest); - FIPS_digestupdate(&mctx, msg, mlen); no_err = 1; - rv = FIPS_ecdsa_verify_ctx(key, &mctx, sig); + rv = FIPS_ecdsa_verify(key, msg, mlen, digest, sig); EC_KEY_free(key); if (msg) OPENSSL_free(msg); @@ -471,7 +462,6 @@ static int SigVer(FILE *in, FILE *out) BN_free(Qx); if (Qy) BN_free(Qy); - EVP_MD_CTX_cleanup(&mctx); return 1; } #ifdef FIPS_ALGVS diff --git a/fips/fips.h b/fips/fips.h index 4cadbd26fd..8833dd2e9e 100644 --- a/fips/fips.h +++ b/fips/fips.h @@ -224,6 +224,16 @@ int FIPS_rsa_verify_digest(struct rsa_st *rsa, const struct env_md_st *mgf1Hash, const unsigned char *sigbuf, unsigned int siglen); +int FIPS_rsa_sign(struct rsa_st *rsa, const unsigned char *msg, int msglen, + const struct env_md_st *mhash, int rsa_pad_mode, + int saltlen, const struct env_md_st *mgf1Hash, + unsigned char *sigret, unsigned int *siglen); + +int FIPS_rsa_verify(struct rsa_st *rsa, const unsigned char *msg, int msglen, + const struct env_md_st *mhash, int rsa_pad_mode, + int saltlen, const struct env_md_st *mgf1Hash, + const unsigned char *sigbuf, unsigned int siglen); + #ifdef OPENSSL_FIPSCAPABLE int FIPS_digestinit(EVP_MD_CTX *ctx, const EVP_MD *type); diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index db0f18a16b..ee706d8a96 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -144,11 +144,9 @@ static int FIPS_dsa_test(int bad) DSA *dsa = NULL; unsigned char dgst[] = "etaonrishdlc"; int r = 0; - EVP_MD_CTX mctx; DSA_SIG *sig = NULL; ERR_clear_error(); - FIPS_md_ctx_init(&mctx); dsa = FIPS_dsa_new(); if (!dsa) goto end; @@ -159,23 +157,14 @@ static int FIPS_dsa_test(int bad) if (bad) BN_add_word(dsa->pub_key, 1); - if (!FIPS_digestinit(&mctx, EVP_sha256())) - goto end; - if (!FIPS_digestupdate(&mctx, dgst, sizeof(dgst) - 1)) - goto end; - sig = FIPS_dsa_sign_ctx(dsa, &mctx); + sig = FIPS_dsa_sign(dsa, dgst, sizeof(dgst) -1, EVP_sha256()); if (!sig) goto end; - if (!FIPS_digestinit(&mctx, EVP_sha256())) - goto end; - if (!FIPS_digestupdate(&mctx, dgst, sizeof(dgst) - 1)) - goto end; - r = FIPS_dsa_verify_ctx(dsa, &mctx, sig); + r = FIPS_dsa_verify(dsa, dgst, sizeof(dgst) -1, EVP_sha256(), sig); end: if (sig) FIPS_dsa_sig_free(sig); - FIPS_md_ctx_cleanup(&mctx); if (dsa) FIPS_dsa_free(dsa); if (r != 1) @@ -193,11 +182,9 @@ static int FIPS_rsa_test(int bad) unsigned char buf[256]; unsigned int slen; BIGNUM *bn; - EVP_MD_CTX mctx; int r = 0; ERR_clear_error(); - FIPS_md_ctx_init(&mctx); key = FIPS_rsa_new(); bn = BN_new(); if (!key || !bn) @@ -209,20 +196,13 @@ static int FIPS_rsa_test(int bad) if (bad) BN_add_word(key->n, 1); - if (!FIPS_digestinit(&mctx, EVP_sha256())) - goto end; - if (!FIPS_digestupdate(&mctx, input_ptext, sizeof(input_ptext) - 1)) - goto end; - if (!FIPS_rsa_sign_ctx(key, &mctx, RSA_PKCS1_PADDING, 0, NULL, buf, &slen)) + if (!FIPS_rsa_sign(key, input_ptext, sizeof(input_ptext) - 1, EVP_sha256(), + RSA_PKCS1_PADDING, 0, NULL, buf, &slen)) goto end; - if (!FIPS_digestinit(&mctx, EVP_sha256())) - goto end; - if (!FIPS_digestupdate(&mctx, input_ptext, sizeof(input_ptext) - 1)) - goto end; - r = FIPS_rsa_verify_ctx(key, &mctx, RSA_PKCS1_PADDING, 0, NULL, buf, slen); + r = FIPS_rsa_verify(key, input_ptext, sizeof(input_ptext) - 1, EVP_sha256(), + RSA_PKCS1_PADDING, 0, NULL, buf, slen); end: - FIPS_md_ctx_cleanup(&mctx); if (key) FIPS_rsa_free(key); if (r != 1) diff --git a/fips/rsa/fips_rsa_sign.c b/fips/rsa/fips_rsa_sign.c index 013333e0b4..a4e03e7417 100644 --- a/fips/rsa/fips_rsa_sign.c +++ b/fips/rsa/fips_rsa_sign.c @@ -442,4 +442,33 @@ err: return(ret); } +int FIPS_rsa_sign(RSA *rsa, const unsigned char *msg, int msglen, + const EVP_MD *mhash, int rsa_pad_mode, int saltlen, + const EVP_MD *mgf1Hash, + unsigned char *sigret, unsigned int *siglen) + { + unsigned int md_len, rv; + unsigned char md[EVP_MAX_MD_SIZE]; + FIPS_digest(msg, msglen, md, &md_len, mhash); + rv = FIPS_rsa_sign_digest(rsa, md, md_len, mhash, rsa_pad_mode, + saltlen, mgf1Hash, sigret, siglen); + OPENSSL_cleanse(md, md_len); + return rv; + } + + +int FIPS_rsa_verify(RSA *rsa, const unsigned char *msg, int msglen, + const EVP_MD *mhash, int rsa_pad_mode, int saltlen, + const EVP_MD *mgf1Hash, + const unsigned char *sigbuf, unsigned int siglen) + { + unsigned int md_len, rv; + unsigned char md[EVP_MAX_MD_SIZE]; + FIPS_digest(msg, msglen, md, &md_len, mhash); + rv = FIPS_rsa_verify_digest(rsa, md, md_len, mhash, rsa_pad_mode, + saltlen, mgf1Hash, sigbuf, siglen); + OPENSSL_cleanse(md, md_len); + return rv; + } + #endif diff --git a/fips/rsa/fips_rsastest.c b/fips/rsa/fips_rsastest.c index 72e75a3cc6..a96f277e6a 100644 --- a/fips/rsa/fips_rsastest.c +++ b/fips/rsa/fips_rsastest.c @@ -325,15 +325,12 @@ static int rsa_printsig(FILE *out, RSA *rsa, const EVP_MD *dgst, unsigned char *sigbuf = NULL; int i, siglen, pad_mode; /* EVP_PKEY structure */ - EVP_MD_CTX ctx; siglen = RSA_size(rsa); sigbuf = OPENSSL_malloc(siglen); if (!sigbuf) goto error; - FIPS_md_ctx_init(&ctx); - if (Saltlen >= 0) pad_mode = RSA_PKCS1_PSS_PADDING; else if (Saltlen == -2) @@ -341,16 +338,10 @@ static int rsa_printsig(FILE *out, RSA *rsa, const EVP_MD *dgst, else pad_mode = RSA_PKCS1_PADDING; - if (!FIPS_digestinit(&ctx, dgst)) - goto error; - if (!FIPS_digestupdate(&ctx, Msg, Msglen)) - goto error; - if (!FIPS_rsa_sign_ctx(rsa, &ctx, pad_mode, Saltlen, NULL, + if (!FIPS_rsa_sign(rsa, Msg, Msglen, dgst, pad_mode, Saltlen, NULL, sigbuf, (unsigned int *)&siglen)) goto error; - FIPS_md_ctx_cleanup(&ctx); - fputs("S = ", out); for (i = 0; i < siglen; i++) diff --git a/fips/rsa/fips_rsavtest.c b/fips/rsa/fips_rsavtest.c index 7685c74ef0..9bfc5e688b 100644 --- a/fips/rsa/fips_rsavtest.c +++ b/fips/rsa/fips_rsavtest.c @@ -323,7 +323,6 @@ static int rsa_printver(FILE *out, int ret = 0, r, pad_mode; /* Setup RSA and EVP_PKEY structures */ RSA *rsa_pubkey = NULL; - EVP_MD_CTX ctx; unsigned char *buf = NULL; rsa_pubkey = FIPS_rsa_new(); if (!rsa_pubkey) @@ -333,8 +332,6 @@ static int rsa_printver(FILE *out, if (!rsa_pubkey->n || !rsa_pubkey->e) goto error; - FIPS_md_ctx_init(&ctx); - if (Saltlen >= 0) pad_mode = RSA_PKCS1_PSS_PADDING; else if (Saltlen == -2) @@ -342,19 +339,11 @@ static int rsa_printver(FILE *out, else pad_mode = RSA_PKCS1_PADDING; - if (!FIPS_digestinit(&ctx, dgst)) - goto error; - if (!FIPS_digestupdate(&ctx, Msg, Msglen)) - goto error; - no_err = 1; - r = FIPS_rsa_verify_ctx(rsa_pubkey, &ctx, + r = FIPS_rsa_verify(rsa_pubkey, Msg, Msglen, dgst, pad_mode, Saltlen, NULL, S, Slen); no_err = 0; - - FIPS_md_ctx_cleanup(&ctx); - if (r < 0) goto error; From f2b0cf9178450fe58d0546cbb1e2f25b48e30f24 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sat, 5 Nov 2011 13:55:20 +0000 Subject: [PATCH 011/120] ppc.pl: fix bug in bn_mul_comba4 [from HEAD]. PR: 2636 Submitted by: Charles Bryant --- crypto/bn/asm/ppc.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/bn/asm/ppc.pl b/crypto/bn/asm/ppc.pl index aaf669a5b3..1249ce2299 100644 --- a/crypto/bn/asm/ppc.pl +++ b/crypto/bn/asm/ppc.pl @@ -952,7 +952,7 @@ $data=< Date: Sat, 5 Nov 2011 13:56:10 +0000 Subject: [PATCH 012/120] x86cpuid.pl: don't punish "last-year" OSes on "this-year" CPUs [from HEAD]. PR: 2633 --- crypto/x86cpuid.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index 6595ff35fc..168e4fa0a9 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -122,7 +122,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &bt ("ecx",26); # check XSAVE bit &jnc (&label("done")); &bt ("ecx",27); # check OSXSAVE bit - &jnc (&label("clear_xmm")); + &jnc (&label("clear_avx")); &xor ("ecx","ecx"); &data_byte(0x0f,0x01,0xd0); # xgetbv &and ("eax",6); From 04c80626369227c139eff7d1c4bfb4597659c775 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sat, 5 Nov 2011 13:57:02 +0000 Subject: [PATCH 013/120] armv4cpuid.S, armv4-gf2m.pl: make newest code compilable by older assembler [from HEAD]. --- crypto/armv4cpuid.S | 12 ++++++------ crypto/bn/asm/armv4-gf2m.pl | 34 +++++++++++++++++----------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/crypto/armv4cpuid.S b/crypto/armv4cpuid.S index c9102ca2a5..2d618deaa4 100644 --- a/crypto/armv4cpuid.S +++ b/crypto/armv4cpuid.S @@ -44,7 +44,7 @@ OPENSSL_atomic_add: bne .Lspin ldr r2,[r4] - add r2,r5 + add r2,r2,r5 str r2,[r4] str r0,[r6] @ release spinlock ldmia sp!,{r4-r6,lr} @@ -59,26 +59,26 @@ OPENSSL_atomic_add: OPENSSL_cleanse: eor ip,ip,ip cmp r1,#7 - subhs r1,#4 + subhs r1,r1,#4 bhs .Lot cmp r1,#0 beq .Lcleanse_done .Little: strb ip,[r0],#1 - subs r1,#1 + subs r1,r1,#1 bhi .Little b .Lcleanse_done .Lot: tst r0,#3 beq .Laligned strb ip,[r0],#1 - sub r1,#1 + sub r1,r1,#1 b .Lot .Laligned: str ip,[r0],#4 - subs r1,#4 + subs r1,r1,#4 bhs .Laligned - adds r1,#4 + adds r1,r1,#4 bne .Little .Lcleanse_done: tst lr,#1 diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl index 9928dae872..c52e0b75b5 100644 --- a/crypto/bn/asm/armv4-gf2m.pl +++ b/crypto/bn/asm/armv4-gf2m.pl @@ -218,38 +218,38 @@ $code.=<<___; mov $b,r3 @ $b=b1 ldr r3,[sp,#32] @ load b0 mov $mask,#7<<2 - sub sp,#32 @ allocate tab[8] + sub sp,sp,#32 @ allocate tab[8] bl mul_1x1_ialu @ a1·b1 str $lo,[$ret,#8] str $hi,[$ret,#12] - eor $b,r3 @ flip b0 and b1 - eor $a,r2 @ flip a0 and a1 - eor r3,$b - eor r2,$a - eor $b,r3 - eor $a,r2 + eor $b,$b,r3 @ flip b0 and b1 + eor $a,$a,r2 @ flip a0 and a1 + eor r3,r3,$b + eor r2,r2,$a + eor $b,$b,r3 + eor $a,$a,r2 bl mul_1x1_ialu @ a0·b0 str $lo,[$ret] str $hi,[$ret,#4] - eor $a,r2 - eor $b,r3 + eor $a,$a,r2 + eor $b,$b,r3 bl mul_1x1_ialu @ (a1+a0)·(b1+b0) ___ @r=map("r$_",(6..9)); $code.=<<___; ldmia $ret,{@r[0]-@r[3]} - eor $lo,$hi - eor $hi,@r[1] - eor $lo,@r[0] - eor $hi,@r[2] - eor $lo,@r[3] - eor $hi,@r[3] + eor $lo,$lo,$hi + eor $hi,$hi,@r[1] + eor $lo,$lo,@r[0] + eor $hi,$hi,@r[2] + eor $lo,$lo,@r[3] + eor $hi,$hi,@r[3] str $hi,[$ret,#8] - eor $lo,$hi - add sp,#32 @ destroy tab[8] + eor $lo,$lo,$hi + add sp,sp,#32 @ destroy tab[8] str $lo,[$ret,#4] #if __ARM_ARCH__>=5 From 01fc2c1598b6f54c462ccc1b708aa54cdd291f16 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 5 Nov 2011 18:04:50 +0000 Subject: [PATCH 014/120] fix set but unused warnings --- fips/aes/fips_aesavs.c | 10 ++++------ fips/aes/fips_gcmtest.c | 3 +-- fips/des/fips_desmovs.c | 10 ++++------ 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/fips/aes/fips_aesavs.c b/fips/aes/fips_aesavs.c index bbdb93aab9..fecaf990c6 100644 --- a/fips/aes/fips_aesavs.c +++ b/fips/aes/fips_aesavs.c @@ -554,7 +554,7 @@ static int proc_file(char *rqfile, char *rspfile) FILE *afp = NULL, *rfp = NULL; char ibuf[2048]; char tbuf[2048]; - int ilen, len, ret = 0; + int len; char algo[8] = ""; char amode[8] = ""; char atest[8] = ""; @@ -605,7 +605,6 @@ static int proc_file(char *rqfile, char *rspfile) while (!err && (fgets(ibuf, sizeof(ibuf), afp)) != NULL) { tidy_line(tbuf, ibuf); - ilen = strlen(ibuf); /* printf("step=%d ibuf=%s",step,ibuf); */ switch (step) { @@ -784,7 +783,7 @@ static int proc_file(char *rqfile, char *rspfile) } else { - ret = AESTest(&ctx, amode, akeysz, aKey, iVec, + AESTest(&ctx, amode, akeysz, aKey, iVec, dir, /* 0 = decrypt, 1 = encrypt */ plaintext, ciphertext, len); OutputValue("CIPHERTEXT",ciphertext,len,rfp, @@ -822,7 +821,7 @@ static int proc_file(char *rqfile, char *rspfile) } else { - ret = AESTest(&ctx, amode, akeysz, aKey, iVec, + AESTest(&ctx, amode, akeysz, aKey, iVec, dir, /* 0 = decrypt, 1 = encrypt */ plaintext, ciphertext, len); OutputValue("PLAINTEXT",(unsigned char *)plaintext,len,rfp, @@ -872,7 +871,7 @@ int main(int argc, char **argv) char *rqlist = "req.txt", *rspfile = NULL; FILE *fp = NULL; char fn[250] = "", rfn[256] = ""; - int f_opt = 0, d_opt = 1; + int d_opt = 1; fips_algtest_init(); if (argc > 1) @@ -883,7 +882,6 @@ int main(int argc, char **argv) } else if (strcasecmp(argv[1], "-f") == 0) { - f_opt = 1; d_opt = 0; } else diff --git a/fips/aes/fips_gcmtest.c b/fips/aes/fips_gcmtest.c index 02849bc2ab..9f50857fb9 100644 --- a/fips/aes/fips_gcmtest.c +++ b/fips/aes/fips_gcmtest.c @@ -271,7 +271,6 @@ static void xtstest(FILE *in, FILE *out) char *keyword, *value; int inlen = 0; int encrypt = 0; - int rv; long l; unsigned char *key = NULL, *iv = NULL; unsigned char *inbuf = NULL, *outbuf = NULL; @@ -327,7 +326,7 @@ static void xtstest(FILE *in, FILE *out) { FIPS_cipherinit(&ctx, xts, key, iv, encrypt); outbuf = OPENSSL_malloc(inlen); - rv = FIPS_cipher(&ctx, outbuf, inbuf, inlen); + FIPS_cipher(&ctx, outbuf, inbuf, inlen); OutputValue(encrypt ? "CT":"PT", outbuf, inlen, out, 0); OPENSSL_free(inbuf); OPENSSL_free(outbuf); diff --git a/fips/des/fips_desmovs.c b/fips/des/fips_desmovs.c index 79900aeae2..2bbeb53459 100644 --- a/fips/des/fips_desmovs.c +++ b/fips/des/fips_desmovs.c @@ -273,7 +273,7 @@ static int tproc_file(char *rqfile, char *rspfile) char afn[256], rfn[256]; FILE *afp = NULL, *rfp = NULL; char ibuf[2048], tbuf[2048]; - int ilen, len, ret = 0; + int len; char amode[8] = ""; char atest[100] = ""; int akeysz=0; @@ -324,7 +324,6 @@ static int tproc_file(char *rqfile, char *rspfile) while (!err && (fgets(ibuf, sizeof(ibuf), afp)) != NULL) { tidy_line(tbuf, ibuf); - ilen = strlen(ibuf); /* printf("step=%d ibuf=%s",step,ibuf);*/ if(step == 3 && !strcmp(amode,"ECB")) { @@ -555,7 +554,7 @@ static int tproc_file(char *rqfile, char *rspfile) else { assert(dir == 1); - ret = DESTest(&ctx, amode, akeysz, aKey, iVec, + DESTest(&ctx, amode, akeysz, aKey, iVec, dir, /* 0 = decrypt, 1 = encrypt */ ciphertext, plaintext, len); OutputValue("CIPHERTEXT",ciphertext,len,rfp, @@ -595,7 +594,7 @@ static int tproc_file(char *rqfile, char *rspfile) else { assert(dir == 0); - ret = DESTest(&ctx, amode, akeysz, aKey, iVec, + DESTest(&ctx, amode, akeysz, aKey, iVec, dir, /* 0 = decrypt, 1 = encrypt */ plaintext, ciphertext, len); OutputValue("PLAINTEXT",(unsigned char *)plaintext,len,rfp, @@ -645,7 +644,7 @@ int main(int argc, char **argv) char *rqlist = "req.txt", *rspfile = NULL; FILE *fp = NULL; char fn[250] = "", rfn[256] = ""; - int f_opt = 0, d_opt = 1; + int d_opt = 1; fips_algtest_init(); if (argc > 1) @@ -656,7 +655,6 @@ int main(int argc, char **argv) } else if (fips_strcasecmp(argv[1], "-f") == 0) { - f_opt = 1; d_opt = 0; } else From 21a5cb26965843b263044ff2b7220ea1f030cf4e Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 5 Nov 2011 18:11:16 +0000 Subject: [PATCH 015/120] typo: use key for POST callback --- fips/fips_post.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fips/fips_post.c b/fips/fips_post.c index e55ec08407..8cd2334362 100644 --- a/fips/fips_post.c +++ b/fips/fips_post.c @@ -207,7 +207,6 @@ int fips_pkey_signature_test(int id, EVP_PKEY *pkey, const char *fail_str) { int subid; - void *ex = NULL; int ret = 0; unsigned char *sig = NULL; unsigned int siglen; @@ -335,7 +334,7 @@ int fips_pkey_signature_test(int id, EVP_PKEY *pkey, FIPSerr(FIPS_F_FIPS_PKEY_SIGNATURE_TEST,FIPS_R_TEST_FAILURE); if (fail_str) FIPS_add_error_data(2, "Type=", fail_str); - fips_post_failed(id, subid, ex); + fips_post_failed(id, subid, pkey); return 0; } return fips_post_success(id, subid, pkey); From df64f34e843c19bb28102278ffc5a687ebee62f2 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 5 Nov 2011 18:15:01 +0000 Subject: [PATCH 016/120] make post failure simulation reversible in all cases --- fips/fips.h | 5 ++--- fips/fips_test_suite.c | 4 ++-- fips/rand/fips_drbg_lib.c | 4 ++-- fips/rand/fips_rand.c | 4 ++-- fips/rand/fips_rand_selftest.c | 7 ++++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fips/fips.h b/fips/fips.h index 8833dd2e9e..b6263575c3 100644 --- a/fips/fips.h +++ b/fips/fips.h @@ -97,9 +97,8 @@ int FIPS_selftest_rsa(void); int FIPS_selftest_dsa(void); int FIPS_selftest_ecdsa(void); int FIPS_selftest_ecdh(void); -void FIPS_corrupt_drbg(void); -void FIPS_x931_stick(void); -void FIPS_drbg_stick(void); +void FIPS_x931_stick(int onoff); +void FIPS_drbg_stick(int onoff); int FIPS_selftest_x931(void); int FIPS_selftest_hmac(void); int FIPS_selftest_drbg(void); diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index ee706d8a96..57a1b8a6a4 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -1104,9 +1104,9 @@ int main(int argc, char **argv) if (!FIPS_module_mode()) return 1; if (do_drbg_stick) - FIPS_drbg_stick(); + FIPS_drbg_stick(1); if (do_rng_stick) - FIPS_x931_stick(); + FIPS_x931_stick(1); /* AES encryption/decryption */ diff --git a/fips/rand/fips_drbg_lib.c b/fips/rand/fips_drbg_lib.c index 1596977fd5..e0e1d75091 100644 --- a/fips/rand/fips_drbg_lib.c +++ b/fips/rand/fips_drbg_lib.c @@ -544,9 +544,9 @@ void FIPS_drbg_set_reseed_interval(DRBG_CTX *dctx, int interval) static int drbg_stick = 0; -void FIPS_drbg_stick(void) +void FIPS_drbg_stick(int onoff) { - drbg_stick = 1; + drbg_stick = onoff; } /* Continuous DRBG utility function */ diff --git a/fips/rand/fips_rand.c b/fips/rand/fips_rand.c index cb9184e1f7..f80c005758 100644 --- a/fips/rand/fips_rand.c +++ b/fips/rand/fips_rand.c @@ -114,9 +114,9 @@ static FIPS_PRNG_CTX sctx; static int fips_prng_fail = 0; -void FIPS_x931_stick(void) +void FIPS_x931_stick(int onoff) { - fips_prng_fail = 1; + fips_prng_fail = onoff; } static void fips_rand_prng_reset(FIPS_PRNG_CTX *ctx) diff --git a/fips/rand/fips_rand_selftest.c b/fips/rand/fips_rand_selftest.c index bafce719ca..ec949cbdbb 100644 --- a/fips/rand/fips_rand_selftest.c +++ b/fips/rand/fips_rand_selftest.c @@ -129,15 +129,16 @@ static AES_PRNG_TV aes_256_tv = static int do_x931_test(unsigned char *key, int keylen, AES_PRNG_TV *tv) { - unsigned char R[16]; + unsigned char R[16], V[16]; int rv = 1; + memcpy(V, tv->V, sizeof(V)); if (!FIPS_x931_set_key(key, keylen)) return 0; if (!fips_post_started(FIPS_TEST_X931, keylen, NULL)) return 1; if (!fips_post_corrupt(FIPS_TEST_X931, keylen, NULL)) - tv->V[0]++; - FIPS_x931_seed(tv->V, 16); + V[0]++; + FIPS_x931_seed(V, 16); FIPS_x931_set_dt(tv->DT); FIPS_x931_bytes(R, 16); if (memcmp(R, tv->R, 16)) From 03eae35352210426398c554b570f315414469d78 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 5 Nov 2011 18:25:16 +0000 Subject: [PATCH 017/120] typo --- fips/ecdsa/fips_ecdsa_sign.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fips/ecdsa/fips_ecdsa_sign.c b/fips/ecdsa/fips_ecdsa_sign.c index 847d44e784..a7839ee592 100644 --- a/fips/ecdsa/fips_ecdsa_sign.c +++ b/fips/ecdsa/fips_ecdsa_sign.c @@ -107,7 +107,7 @@ ECDSA_SIG * FIPS_ecdsa_sign(EC_KEY *key, unsigned char dig[EVP_MAX_MD_SIZE]; unsigned int dlen; FIPS_digest(msg, msglen, dig, &dlen, mhash); - s = FIPS_dsa_sign_digest(key, dig, dlen); + s = FIPS_ecdsa_sign_digest(key, dig, dlen); OPENSSL_cleanse(dig, dlen); return s; } From 8a794abd9d427fb58519c03e2dd8fa6da08ee6f1 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 6 Nov 2011 12:52:27 +0000 Subject: [PATCH 018/120] Update fips_test_suite to take multiple command line options and an induced error checking function. --- CHANGES | 5 + fips/fips_test_suite.c | 371 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 338 insertions(+), 38 deletions(-) diff --git a/CHANGES b/CHANGES index d3883be28f..795ada567e 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,11 @@ Changes between 1.0.1 and 1.1.0 [xx XXX xxxx] + *) Update fips_test_suite to support multiple command line options. New + test to induce all self test errors in sequence and check expected + failures. + [Steve Henson] + *) Add FIPS_{rsa,dsa,ecdsa}_{sign,verify} functions which digest and sign or verify all in one operation. [Steve Henson] diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index 57a1b8a6a4..2d0a4bba8b 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -804,11 +804,14 @@ static int fail_id = -1; static int fail_sub = -1; static int fail_key = -1; +static int st_err, post_quiet = 0; + static int post_cb(int op, int id, int subid, void *ex) { const char *idstr, *exstr = ""; char asctmp[20]; int keytype = -1; + int exp_fail = 0; #ifdef FIPS_POST_TIME static struct timespec start, end, tstart, tend; #endif @@ -920,6 +923,11 @@ static int post_cb(int op, int id, int subid, void *ex) } + if (fail_id == id + && (fail_key == -1 || fail_key == keytype) + && (fail_sub == -1 || fail_sub == subid)) + exp_fail = 1; + switch(op) { case FIPS_POST_BEGIN: @@ -943,14 +951,22 @@ static int post_cb(int op, int id, int subid, void *ex) break; case FIPS_POST_STARTED: - printf("\t\t%s %s test started\n", idstr, exstr); + if (!post_quiet && !exp_fail) + printf("\t\t%s %s test started\n", idstr, exstr); #ifdef FIPS_POST_TIME clock_gettime(CLOCK_REALTIME, &start); #endif break; case FIPS_POST_SUCCESS: - printf("\t\t%s %s test OK\n", idstr, exstr); + if (exp_fail) + { + printf("\t\t%s %s test OK but should've failed\n", + idstr, exstr); + st_err++; + } + else if (!post_quiet) + printf("\t\t%s %s test OK\n", idstr, exstr); #ifdef FIPS_POST_TIME clock_gettime(CLOCK_REALTIME, &end); printf("\t\t\tTook %f seconds\n", @@ -960,13 +976,21 @@ static int post_cb(int op, int id, int subid, void *ex) break; case FIPS_POST_FAIL: - printf("\t\t%s %s test FAILED!!\n", idstr, exstr); + if (exp_fail) + { + printf("\t\t%s %s test failed as expected\n", + idstr, exstr); + } + else + { + printf("\t\t%s %s test Failed Incorrectly!!\n", + idstr, exstr); + st_err++; + } break; case FIPS_POST_CORRUPT: - if (fail_id == id - && (fail_key == -1 || fail_key == keytype) - && (fail_sub == -1 || fail_sub == subid)) + if (exp_fail) { printf("\t\t%s %s test failure induced\n", idstr, exstr); return 0; @@ -977,18 +1001,272 @@ static int post_cb(int op, int id, int subid, void *ex) return 1; } +/* Test POST induced failures */ + +typedef struct + { + const char *name; + int id, subid, keyid; + } fail_list; + +static fail_list flist[] = + { + {"Integrity", FIPS_TEST_INTEGRITY, -1, -1}, + {"AES", FIPS_TEST_CIPHER, NID_aes_128_ecb, -1}, + {"DES3", FIPS_TEST_CIPHER, NID_des_ede3_ecb, -1}, + {"AES-GCM", FIPS_TEST_GCM, -1, -1}, + {"AES-CCM", FIPS_TEST_CCM, -1, -1}, + {"AES-XTS", FIPS_TEST_XTS, -1, -1}, + {"Digest", FIPS_TEST_DIGEST, -1, -1}, + {"HMAC", FIPS_TEST_HMAC, -1, -1}, + {"CMAC", FIPS_TEST_CMAC, -1, -1}, + {"DRBG", FIPS_TEST_DRBG, -1, -1}, + {"X9.31 PRNG", FIPS_TEST_X931, -1, -1}, + {"RSA", FIPS_TEST_SIGNATURE, -1, EVP_PKEY_RSA}, + {"DSA", FIPS_TEST_SIGNATURE, -1, EVP_PKEY_DSA}, + {"ECDSA", FIPS_TEST_SIGNATURE, -1, EVP_PKEY_EC}, + {"ECDH", FIPS_TEST_ECDH, -1, -1}, + {NULL, -1, -1, -1} + }; + +static int do_fail_all(int fullpost, int fullerr) + { + fail_list *ftmp; + int rv; + size_t i; + RSA *rsa = NULL; + DSA *dsa = NULL; + DRBG_CTX *dctx = NULL; + EC_KEY *ec = NULL; + BIGNUM *bn = NULL; + unsigned char out[10]; + if (!fullpost) + post_quiet = 1; + if (!fullerr) + no_err = 1; + FIPS_module_mode_set(0, NULL); + for (ftmp = flist; ftmp->name; ftmp++) + { + printf(" Testing induced failure of %s test\n", ftmp->name); + fail_id = ftmp->id; + fail_sub = ftmp->subid; + fail_key = ftmp->keyid; + rv = FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS); + if (rv) + { + printf("\tFIPS mode incorrectly successful!!\n"); + st_err++; + } + } + printf(" Testing induced failure of RSA keygen test\n"); + /* NB POST will succeed with a pairwise test failures as + * it is not used during POST. + */ + fail_id = FIPS_TEST_PAIRWISE; + fail_key = EVP_PKEY_RSA; + /* Now enter FIPS mode successfully */ + if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + { + printf("\tError entering FIPS mode\n"); + st_err++; + } + + rsa = FIPS_rsa_new(); + bn = BN_new(); + if (!rsa || !bn) + return 0; + BN_set_word(bn, 65537); + if (RSA_generate_key_ex(rsa, 2048,bn,NULL)) + { + printf("\tRSA key generated OK incorrectly!!\n"); + st_err++; + } + else + printf("\tRSA key generation failed as expected.\n"); + + /* Leave FIPS mode to clear error */ + FIPS_module_mode_set(0, NULL); + + printf(" Testing induced failure of DSA keygen test\n"); + fail_key = EVP_PKEY_DSA; + /* Enter FIPS mode successfully */ + if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + { + printf("\tError entering FIPS mode\n"); + st_err++; + } + dsa = FIPS_dsa_new(); + if (!dsa) + return 0; + if (!DSA_generate_parameters_ex(dsa, 1024,NULL,0,NULL,NULL,NULL)) + return 0; + if (DSA_generate_key(dsa)) + { + printf("\tDSA key generated OK incorrectly!!\n"); + st_err++; + } + else + printf("\tDSA key generation failed as expected.\n"); + + /* Leave FIPS mode to clear error */ + FIPS_module_mode_set(0, NULL); + /* Enter FIPS mode successfully */ + if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + { + printf("\tError entering FIPS mode\n"); + st_err++; + } + + printf(" Testing induced failure of ECDSA keygen test\n"); + fail_key = EVP_PKEY_EC; + + ec = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1); + + if (!ec) + return 0; + + if (EC_KEY_generate_key(ec)) + { + printf("\tECDSA key generated OK incorrectly!!\n"); + st_err++; + } + else + printf("\tECDSA key generation failed as expected.\n"); + + fail_id = -1; + fail_sub = -1; + fail_key = -1; + /* Leave FIPS mode to clear error */ + FIPS_module_mode_set(0, NULL); + /* Enter FIPS mode successfully */ + if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + { + printf("\tError entering FIPS mode\n"); + st_err++; + } + /* Induce continuous PRNG failure for DRBG */ + printf(" Testing induced failure of DRBG CPRNG test\n"); + FIPS_drbg_stick(1); + + /* Initialise a DRBG context */ + dctx = FIPS_drbg_new(NID_sha1, 0); + if (!dctx) + return 0; + for (i = 0; i < sizeof(dummy_drbg_entropy); i++) + { + dummy_drbg_entropy[i] = i & 0xff; + } + FIPS_drbg_set_callbacks(dctx, drbg_test_cb, 0, 0x10, drbg_test_cb, 0); + if (!FIPS_drbg_instantiate(dctx, dummy_drbg_entropy, 10)) + { + printf("\tDRBG instantiate error!!\n"); + st_err++; + } + if (FIPS_drbg_generate(dctx, out, sizeof(out), 0, NULL, 0)) + { + printf("\tDRBG continuous PRNG OK incorrectly!!\n"); + st_err++; + } + else + printf("\tDRBG continuous PRNG failed as expected\n"); + FIPS_drbg_stick(0); + + /* Leave FIPS mode to clear error */ + FIPS_module_mode_set(0, NULL); + /* Enter FIPS mode successfully */ + if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + { + printf("\tError entering FIPS mode\n"); + st_err++; + } + + FIPS_drbg_free(dctx); + + /* Induce continuous PRNG failure for DRBG entropy source*/ + printf(" Testing induced failure of DRBG entropy CPRNG test\n"); + + /* Initialise a DRBG context */ + dctx = FIPS_drbg_new(NID_sha1, 0); + if (!dctx) + return 0; + for (i = 0; i < sizeof(dummy_drbg_entropy); i++) + { + dummy_drbg_entropy[i] = i & 0xf; + } + FIPS_drbg_set_callbacks(dctx, drbg_test_cb, 0, 0x10, drbg_test_cb, 0); + if (FIPS_drbg_instantiate(dctx, dummy_drbg_entropy, 10)) + { + printf("\tDRBG continuous PRNG entropy OK incorrectly!!\n"); + st_err++; + } + else + printf("\tDRBG continuous PRNG entropy failed as expected\n"); + /* Leave FIPS mode to clear error */ + FIPS_module_mode_set(0, NULL); + /* Enter FIPS mode successfully */ + if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + { + printf("\tError entering FIPS mode\n"); + st_err++; + } + FIPS_drbg_free(dctx); + + /* Leave FIPS mode to clear error */ + FIPS_module_mode_set(0, NULL); + /* Enter FIPS mode successfully */ + if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + { + printf("\tError entering FIPS mode\n"); + st_err++; + } + + printf(" Testing induced failure of X9.31 CPRNG test\n"); + FIPS_x931_stick(1); + if (!FIPS_x931_set_key(dummy_drbg_entropy, 32)) + { + printf("\tError initialiasing X9.31 PRNG\n"); + st_err++; + } + if (!FIPS_x931_seed(dummy_drbg_entropy + 32, 16)) + { + printf("\tError seeding X9.31 PRNG\n"); + st_err++; + } + if (FIPS_x931_bytes(out, 10) > 0) + { + printf("\tX9.31 continuous PRNG failure OK incorrectly!!\n"); + st_err++; + } + else + printf("\tX9.31 continuous PRNG failed as expected\n"); + FIPS_x931_stick(0); + + printf(" Induced failure test completed with %d errors\n", st_err); + post_quiet = 0; + no_err = 0; + BN_free(bn); + FIPS_rsa_free(rsa); + FIPS_dsa_free(dsa); + FIPS_ec_key_free(ec); + if (st_err) + return 0; + return 1; + } + #ifdef FIPS_ALGVS int fips_test_suite_main(int argc, char **argv) #else int main(int argc, char **argv) #endif { + char **args = argv + 1; int bad_rsa = 0, bad_dsa = 0; int do_rng_stick = 0; int do_drbg_stick = 0; int no_exit = 0; - int no_dh = 0; + int no_dh = 0, no_drbg = 0; char *pass = FIPS_AUTH_USER_PASS; + int fullpost = 0, fullerr = 0; FIPS_post_set_callback(post_cb); @@ -996,87 +1274,99 @@ int main(int argc, char **argv) printf("\t%s\n\n", FIPS_module_version_text()); - if (argv[1]) { + while(*args) { /* Corrupted KAT tests */ - if (!strcmp(argv[1], "integrity")) { + if (!strcmp(*args, "integrity")) { fail_id = FIPS_TEST_INTEGRITY; - } else if (!strcmp(argv[1], "aes")) { + } else if (!strcmp(*args, "aes")) { fail_id = FIPS_TEST_CIPHER; fail_sub = NID_aes_128_ecb; - } else if (!strcmp(argv[1], "aes-ccm")) { + } else if (!strcmp(*args, "aes-ccm")) { fail_id = FIPS_TEST_CCM; - } else if (!strcmp(argv[1], "aes-gcm")) { + } else if (!strcmp(*args, "aes-gcm")) { fail_id = FIPS_TEST_GCM; - } else if (!strcmp(argv[1], "aes-xts")) { + } else if (!strcmp(*args, "aes-xts")) { fail_id = FIPS_TEST_XTS; - } else if (!strcmp(argv[1], "des")) { + } else if (!strcmp(*args, "des")) { fail_id = FIPS_TEST_CIPHER; fail_sub = NID_des_ede3_ecb; - } else if (!strcmp(argv[1], "dsa")) { + } else if (!strcmp(*args, "dsa")) { fail_id = FIPS_TEST_SIGNATURE; fail_key = EVP_PKEY_DSA; } else if (!strcmp(argv[1], "ecdh")) { fail_id = FIPS_TEST_ECDH; - } else if (!strcmp(argv[1], "ecdsa")) { + } else if (!strcmp(*args, "ecdsa")) { fail_id = FIPS_TEST_SIGNATURE; fail_key = EVP_PKEY_EC; - } else if (!strcmp(argv[1], "rsa")) { + } else if (!strcmp(*args, "rsa")) { fail_id = FIPS_TEST_SIGNATURE; fail_key = EVP_PKEY_RSA; - } else if (!strcmp(argv[1], "rsakey")) { + } else if (!strcmp(*args, "rsakey")) { printf("RSA key generation and signature validation with corrupted key...\n"); bad_rsa = 1; no_exit = 1; - } else if (!strcmp(argv[1], "rsakeygen")) { + } else if (!strcmp(*args, "rsakeygen")) { fail_id = FIPS_TEST_PAIRWISE; fail_key = EVP_PKEY_RSA; no_exit = 1; - } else if (!strcmp(argv[1], "dsakey")) { + } else if (!strcmp(*args, "dsakey")) { printf("DSA key generation and signature validation with corrupted key...\n"); bad_dsa = 1; no_exit = 1; - } else if (!strcmp(argv[1], "dsakeygen")) { + } else if (!strcmp(*args, "dsakeygen")) { fail_id = FIPS_TEST_PAIRWISE; fail_key = EVP_PKEY_DSA; no_exit = 1; - } else if (!strcmp(argv[1], "sha1")) { + } else if (!strcmp(*args, "sha1")) { fail_id = FIPS_TEST_DIGEST; - } else if (!strcmp(argv[1], "hmac")) { + } else if (!strcmp(*args, "hmac")) { fail_id = FIPS_TEST_HMAC; - } else if (!strcmp(argv[1], "cmac")) { + } else if (!strcmp(*args, "cmac")) { fail_id = FIPS_TEST_CMAC; - } else if (!strcmp(argv[1], "drbg")) { + } else if (!strcmp(*args, "drbg")) { fail_id = FIPS_TEST_DRBG; } else if (!strcmp(argv[1], "rng")) { fail_id = FIPS_TEST_X931; - } else if (!strcmp(argv[1], "nodh")) { + } else if (!strcmp(*args, "nodrbg")) { + no_drbg = 1; + no_exit = 1; + } else if (!strcmp(*args, "nodh")) { no_dh = 1; no_exit = 1; - } else if (!strcmp(argv[1], "post")) { + } else if (!strcmp(*args, "post")) { fail_id = -1; - } else if (!strcmp(argv[1], "rngstick")) { + } else if (!strcmp(*args, "rngstick")) { do_rng_stick = 1; no_exit = 1; printf("RNG test with stuck continuous test...\n"); - } else if (!strcmp(argv[1], "drbgentstick")) { + } else if (!strcmp(*args, "drbgentstick")) { do_entropy_stick(); - } else if (!strcmp(argv[1], "drbgstick")) { + } else if (!strcmp(*args, "drbgstick")) { do_drbg_stick = 1; no_exit = 1; printf("DRBG test with stuck continuous test...\n"); - } else if (!strcmp(argv[1], "user")) { + } else if (!strcmp(*args, "user")) { pass = FIPS_AUTH_USER_PASS; - } else if (!strcmp(argv[1], "officer")) { + } else if (!strcmp(*args, "officer")) { pass = FIPS_AUTH_OFFICER_PASS; - } else if (!strcmp(argv[1], "badpass")) { + } else if (!strcmp(*args, "badpass")) { pass = "bad invalid password"; - } else if (!strcmp(argv[1], "nopass")) { + } else if (!strcmp(*args, "nopass")) { pass = ""; + } else if (!strcmp(*args, "fullpost")) { + fullpost = 1; + no_exit = 1; + } else if (!strcmp(*args, "fullerr")) { + fullerr = 1; + no_exit = 1; } else { - printf("Bad argument \"%s\"\n", argv[1]); + printf("Bad argument \"%s\"\n", *args); return 1; } - if (!no_exit) { + args++; + } + + if ((argc != 1) && !no_exit) { fips_algtest_init_nofips(); if (!FIPS_module_mode_set(1, pass)) { printf("Power-up self test failed\n"); @@ -1084,7 +1374,6 @@ int main(int argc, char **argv) } printf("Power-up self test successful\n"); return 0; - } } fips_algtest_init_nofips(); @@ -1202,9 +1491,15 @@ int main(int argc, char **argv) : Fail("failed INCORRECTLY!") ); printf("12. DRBG generation check...\n"); - printf("\t%s\n", do_drbg_all() ? "successful as expected" + if (no_drbg) + printf("\tskipped\n"); + else + printf("\t%s\n", do_drbg_all() ? "successful as expected" : Fail("failed INCORRECTLY!") ); + printf("13. Induced test failure check...\n"); + printf("\t%s\n", do_fail_all(fullpost, fullerr) ? "successful as expected" + : Fail("failed INCORRECTLY!") ); printf("\nAll tests completed with %d errors\n", Error); return Error ? 1 : 0; } From 79f2c9d1cddd9ac7d68c4e3e39c228f01ac79b7e Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 6 Nov 2011 13:08:54 +0000 Subject: [PATCH 019/120] check for unset entropy and nonce callbacks --- fips/rand/fips_drbg_lib.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fips/rand/fips_drbg_lib.c b/fips/rand/fips_drbg_lib.c index e0e1d75091..ee162d05eb 100644 --- a/fips/rand/fips_drbg_lib.c +++ b/fips/rand/fips_drbg_lib.c @@ -154,6 +154,8 @@ static size_t fips_get_entropy(DRBG_CTX *dctx, unsigned char **pout, { unsigned char *tout, *p; size_t bl = dctx->entropy_blocklen, rv; + if (!dctx->get_entropy) + return 0; if (dctx->xflags & DRBG_FLAG_TEST || !bl) return dctx->get_entropy(dctx, pout, entropy, min_len, max_len); rv = dctx->get_entropy(dctx, &tout, entropy + bl, @@ -241,7 +243,7 @@ int FIPS_drbg_instantiate(DRBG_CTX *dctx, goto end; } - if (dctx->max_nonce > 0) + if (dctx->max_nonce > 0 && dctx->get_nonce) { noncelen = dctx->get_nonce(dctx, &nonce, dctx->strength / 2, From 68b2f55b90698090a1cdc5c7178f6383891c8e02 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 6 Nov 2011 19:49:58 +0000 Subject: [PATCH 020/120] e_aes.c: fold aesni_xts_cipher and [most importantly] fix aes_xts_cipher's return value after custom flag was rightly reverted [from HEAD]. --- crypto/evp/e_aes.c | 63 +++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index 429255d215..71f9e037d5 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -89,6 +89,10 @@ typedef struct { AES_KEY ks1, ks2; /* AES key schedules to use */ XTS128_CONTEXT xts; + void (*stream)(const unsigned char *in, + unsigned char *out, size_t length, + const AES_KEY *key1, const AES_KEY *key2, + const unsigned char iv[16]); } EVP_AES_XTS_CTX; typedef struct @@ -123,6 +127,9 @@ void vpaes_cbc_encrypt(const unsigned char *in, unsigned char *ivec, int enc); #endif #ifdef BSAES_ASM +void bsaes_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, + unsigned char ivec[16], int enc); void bsaes_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, size_t len, const AES_KEY *key, const unsigned char ivec[16]); @@ -337,11 +344,13 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, { aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)aesni_encrypt; + xctx->stream = aesni_xts_encrypt; } else { aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)aesni_decrypt; + xctx->stream = aesni_xts_decrypt; } aesni_set_encrypt_key(key + ctx->key_len/2, @@ -360,32 +369,9 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; } +#define aesni_xts_cipher aes_xts_cipher static int aesni_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, - const unsigned char *in, size_t len) - { - EVP_AES_XTS_CTX *xctx = ctx->cipher_data; - if (!xctx->xts.key1 || !xctx->xts.key2) - return -1; - if (!out || !in) - return -1; -#ifdef OPENSSL_FIPS - /* Requirement of SP800-38E */ - if (FIPS_module_mode() && !(ctx->flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW) && - (len > (1L<<20)*16)) - { - EVPerr(EVP_F_AESNI_XTS_CIPHER, EVP_R_TOO_LARGE); - return -1; - } -#endif - if (ctx->encrypt) - aesni_xts_encrypt(in, out, len, - xctx->xts.key1, xctx->xts.key2, ctx->iv); - else - aesni_xts_decrypt(in, out, len, - xctx->xts.key1, xctx->xts.key2, ctx->iv); - - return len; - } + const unsigned char *in, size_t len); static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) @@ -503,6 +489,15 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, mode = ctx->cipher->flags & EVP_CIPH_MODE; if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) +#ifdef BSAES_CAPABLE + if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE) + { + ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); + dat->block = (block128_f)AES_decrypt; + dat->stream.cbc = (cbc128_f)bsaes_cbc_encrypt; + } + else +#endif #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { @@ -1050,6 +1045,7 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (key) do { + xctx->stream = NULL; /* key_len is two AES keys */ #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) @@ -1105,22 +1101,25 @@ static int aes_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, { EVP_AES_XTS_CTX *xctx = ctx->cipher_data; if (!xctx->xts.key1 || !xctx->xts.key2) - return -1; + return 0; if (!out || !in) - return -1; + return 0; #ifdef OPENSSL_FIPS /* Requirement of SP800-38E */ if (FIPS_module_mode() && !(ctx->flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW) && - (len > (1L<<20)*16)) + (len > (1UL<<20)*16)) { EVPerr(EVP_F_AES_XTS_CIPHER, EVP_R_TOO_LARGE); - return -1; + return 0; } #endif - if (CRYPTO_xts128_encrypt(&xctx->xts, ctx->iv, in, out, len, + if (xctx->stream) + (*xctx->stream)(in, out, len, + xctx->xts.key1, xctx->xts.key2, ctx->iv); + else if (CRYPTO_xts128_encrypt(&xctx->xts, ctx->iv, in, out, len, ctx->encrypt)) - return -1; - return len; + return 0; + return 1; } #define aes_xts_cleanup NULL From 1562ce17cba9225ddb200859c2a17fcc0768860a Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 7 Nov 2011 00:22:59 +0000 Subject: [PATCH 021/120] fipsld, incore: switch to new cross-compile support [from HEAD]. --- fips/fipsld | 28 +++++++++++++++++----------- util/incore | 3 ++- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/fips/fipsld b/fips/fipsld index 6184e2064e..62565fd032 100755 --- a/fips/fipsld +++ b/fips/fipsld @@ -1,6 +1,6 @@ #!/bin/sh -e # -# Copyright (c) 2005-2007 The OpenSSL Project. +# Copyright (c) 2005-2011 The OpenSSL Project. # # Depending on output file name, the script either embeds fingerprint # into libcrypto.so or static application. "Static" refers to static @@ -127,12 +127,15 @@ lib*|*.dll) # must be linking a shared lib... "${PREMAIN_C}" \ ${_WL_PREMAIN} "$@" - # generate signature... - if [ -z "${FIPS_SIG}" ]; then - SIG=`"${PREMAIN_DSO}" "${TARGET}"` - else - SIG=`"${FIPS_SIG}" -dso "${TARGET}"` + if [ "x${FIPS_SIG}" != "x" ]; then + # embed signature + "${FIPS_SIG}" "${TARGET}" + [ $? -ne 42 ] && exit $? fi + + # generate signature... + SIG=`"${PREMAIN_DSO}" "${TARGET}"` + /bin/rm -f "${TARGET}" if [ -z "${SIG}" ]; then echo "unable to collect signature"; exit 1 @@ -172,12 +175,15 @@ lib*|*.dll) # must be linking a shared lib... "${PREMAIN_C}" \ ${_WL_PREMAIN} "$@" - # generate signature... - if [ -z "${FIPS_SIG}" ]; then - SIG=`"${TARGET}"` - else - SIG=`"${FIPS_SIG}" -exe "${TARGET}"` + if [ "x${FIPS_SIG}" != "x" ]; then + # embed signature + "${FIPS_SIG}" "${TARGET}" + [ $? -ne 42 ] && exit $? fi + + # generate signature... + SIG=`"${TARGET}"` + /bin/rm -f "${TARGET}" if [ -z "${SIG}" ]; then echo "unable to collect signature"; exit 1 diff --git a/util/incore b/util/incore index 883f63ff56..e6e6ecfd89 100755 --- a/util/incore +++ b/util/incore @@ -34,6 +34,7 @@ @e_ident{magic,class,data,version,osabi,abiver,pad}= unpack("a4C*",$elf); + $!=42; # signal fipsld to revert to two-step link die "not ELF file" if ($e_ident{magic} ne chr(0177)."ELF"); my $elf_bits = $e_ident{class}*32; # 32 or 64 @@ -377,7 +378,7 @@ $FIPS_text_endX = $exe->Lookup("FIPS_text_endX"); if (!$legacy_mode) { if (!$FIPS_text_startX || !$FIPS_text_endX) { print STDERR "@ARGV[$#ARGV] is not cross-compiler aware.\n"; - exit(1); + exit(42); # signal fipsld to revert to two-step link } $FINGERPRINT_ascii_value From bb25a72881db6d3a67d9cdd46bb0d1bd059e322b Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 7 Nov 2011 13:16:55 +0000 Subject: [PATCH 022/120] MacOS and iOS support --- Configure | 7 +++++++ config | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/Configure b/Configure index 10ef8cd115..594e7bedc6 100755 --- a/Configure +++ b/Configure @@ -577,8 +577,15 @@ my %table=( "darwin-i386-cc","cc:-arch i386 -O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:${x86_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-i386-cc","cc:-arch i386 -g3 -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:${x86_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin64-x86_64-cc","cc:-arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"darwin-x86_64-cc","cc:-arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc32_asm}:osx32:dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +##### iOS setup +"darwin-iphoneos-armv7-cross","llvm-gcc:-O3 -arch armv7 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -miphoneos-version-min=4.0 -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"darwin-iphoneos-armv6-cross","llvm-gcc:-O3 -arch armv6 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -miphoneos-version-min=4.0 -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"darwin-iphoneos-cross","llvm-gcc:-O3 -arch armv7 -arch armv6 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -miphoneos-version-min=4.0 -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"darwin-iphonesimulator-cross","llvm-gcc:-O3 -arch i386 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -miphoneos-version-min=4.0 -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", + ##### A/UX "aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::", diff --git a/config b/config index d2b155aa44..08d83735ec 100755 --- a/config +++ b/config @@ -277,6 +277,9 @@ case "${SYSTEM}:${RELEASE}:${VERSION}:${MACHINE}" in Power*) echo "ppc-apple-darwin${VERSION}" ;; + x86_64) + echo "x86_64-apple-darwin${VERSION}" + ;; *) echo "i686-apple-darwin${VERSION}" ;; @@ -547,6 +550,8 @@ case "$GUESSOS" in fi fi OUT="darwin-ppc-cc" ;; + x86_64-*-darwin*) OUT="darwin-x86_64-cc" ;; + i386-whatever-darwin) OUT="darwin-i386-cc" ;; i?86-apple-darwin*) ISA64=`(sysctl -n hw.optional.x86_64) 2>/dev/null` if [ "$ISA64" = "1" ]; then @@ -825,6 +830,7 @@ case "$GUESSOS" in *-*-qnx6) OUT="QNX6" ;; x86-*-android|i?86-*-android) OUT="android-x86" ;; armv[7-9]*-*-android) OUT="android-armv7" ;; + darwin-whatever-*) OUT=`echo $GUESSOS | sed -e 's/-whatever//'`;; *) OUT=`echo $GUESSOS | awk -F- '{print $3}'`;; esac From be6dc7e56b91a25d47e0e0f73d97210bf2bf1140 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 7 Nov 2011 13:18:12 +0000 Subject: [PATCH 023/120] Prepare for RC2 --- fips/fips_locl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 24743be6ab..63695ceb00 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -68,7 +68,7 @@ int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); #define FIPS_MODULE_VERSION_NUMBER 0x20000002L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc2-dev unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc2 unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From cbed6cfcaa877acf0a3d15828baf4e03fd905570 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 7 Nov 2011 13:54:30 +0000 Subject: [PATCH 024/120] add fips_algvs.c to restricted tarball --- util/fipsdist.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/fipsdist.pl b/util/fipsdist.pl index b191fbe41e..e10a2fe8cf 100644 --- a/util/fipsdist.pl +++ b/util/fipsdist.pl @@ -76,7 +76,7 @@ while () } if (/^test\//) { - next unless /Makefile/ || /dummytest.c/; + next unless /Makefile/ || /dummytest.c/ || /fips_algvs.c/ ; } print "$_\n"; } From ffa76736fa7c0f46414db029b7d2a3f2df0e546c Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Tue, 8 Nov 2011 14:44:55 +0000 Subject: [PATCH 025/120] Platform update from HEAD. --- Configure | 10 +++------- TABLE | 33 +++++++++++++++++++++++++++++++++ config | 35 +++++++++++++++++++++++------------ fips/fips_canister.c | 1 + fips/fips_premain.c | 2 +- fips/fips_premain.c.sha1 | 2 +- 6 files changed, 62 insertions(+), 21 deletions(-) diff --git a/Configure b/Configure index 594e7bedc6..f93a9ee280 100755 --- a/Configure +++ b/Configure @@ -577,14 +577,9 @@ my %table=( "darwin-i386-cc","cc:-arch i386 -O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:${x86_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-i386-cc","cc:-arch i386 -g3 -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:${x86_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin64-x86_64-cc","cc:-arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", -"darwin-x86_64-cc","cc:-arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc32_asm}:osx32:dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", - -##### iOS setup -"darwin-iphoneos-armv7-cross","llvm-gcc:-O3 -arch armv7 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -miphoneos-version-min=4.0 -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", -"darwin-iphoneos-armv6-cross","llvm-gcc:-O3 -arch armv6 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -miphoneos-version-min=4.0 -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", -"darwin-iphoneos-cross","llvm-gcc:-O3 -arch armv7 -arch armv6 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -miphoneos-version-min=4.0 -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", -"darwin-iphonesimulator-cross","llvm-gcc:-O3 -arch i386 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -miphoneos-version-min=4.0 -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +# iPhoneOS/iOS +"iphoneos-cross","llvm-gcc:-O3 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", ##### A/UX "aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::", @@ -913,6 +908,7 @@ EOF } elsif (/^-[^-]/ or /^\+/) { + $_ =~ s/%([0-9a-f]{1,2})/chr(hex($1))/gei; $flags.=$_." "; } elsif (/^--prefix=(.*)$/) diff --git a/TABLE b/TABLE index c15ac01cb4..bf974b56f4 100644 --- a/TABLE +++ b/TABLE @@ -3465,6 +3465,39 @@ $ranlib = $arflags = $multilib = +*** iphoneos-cross +$cc = llvm-gcc +$cflags = -O3 -isysroot $(CROSS_TOP)/SDKs/$(CROSS_SDK) -fomit-frame-pointer -fno-common +$unistd = +$thread_cflag = -D_REENTRANT +$sys_id = iOS +$lflags = -Wl,-search_paths_first% +$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR +$cpuid_obj = +$bn_obj = +$des_obj = +$aes_obj = +$bf_obj = +$md5_obj = +$sha1_obj = +$cast_obj = +$rc4_obj = +$rmd160_obj = +$rc5_obj = +$wp_obj = +$cmll_obj = +$modes_obj = +$engines_obj = +$perlasm_scheme = void +$dso_scheme = dlfcn +$shared_target= darwin-shared +$shared_cflag = -fPIC -fno-common +$shared_ldflag = -dynamiclib +$shared_extension = .$(SHLIB_MAJOR).$(SHLIB_MINOR).dylib +$ranlib = +$arflags = +$multilib = + *** irix-cc $cc = cc $cflags = -O2 -use_readonly_const -DTERMIOS -DB_ENDIAN diff --git a/config b/config index 08d83735ec..851a161136 100755 --- a/config +++ b/config @@ -277,9 +277,6 @@ case "${SYSTEM}:${RELEASE}:${VERSION}:${MACHINE}" in Power*) echo "ppc-apple-darwin${VERSION}" ;; - x86_64) - echo "x86_64-apple-darwin${VERSION}" - ;; *) echo "i686-apple-darwin${VERSION}" ;; @@ -541,7 +538,7 @@ case "$GUESSOS" in ppc-apple-rhapsody) OUT="rhapsody-ppc-cc" ;; ppc-apple-darwin*) ISA64=`(sysctl -n hw.optional.64bitops) 2>/dev/null` - if [ "$ISA64" = "1" ]; then + if [ "$ISA64" = "1" -a -z "$KERNEL_BITS" ]; then echo "WARNING! If you wish to build 64-bit library, then you have to" echo " invoke './Configure darwin64-ppc-cc' *manually*." if [ "$TEST" = "false" -a -t 1 ]; then @@ -549,12 +546,14 @@ case "$GUESSOS" in (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 fi fi - OUT="darwin-ppc-cc" ;; - x86_64-*-darwin*) OUT="darwin-x86_64-cc" ;; - i386-whatever-darwin) OUT="darwin-i386-cc" ;; + if [ "$ISA64" = "1" -a "$KERNEL_BITS" = "64" ]; then + OUT="darwin64-ppc-cc" + else + OUT="darwin-ppc-cc" + fi ;; i?86-apple-darwin*) ISA64=`(sysctl -n hw.optional.x86_64) 2>/dev/null` - if [ "$ISA64" = "1" ]; then + if [ "$ISA64" = "1" -a -z "$KERNEL_BITS" ]; then echo "WARNING! If you wish to build 64-bit library, then you have to" echo " invoke './Configure darwin64-x86_64-cc' *manually*." if [ "$TEST" = "false" -a -t 1 ]; then @@ -562,7 +561,17 @@ case "$GUESSOS" in (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 fi fi - OUT="darwin-i386-cc" ;; + if [ "$ISA64" = "1" -a "$KERNEL_BITS" = "64" ]; then + OUT="darwin64-x86_64-cc" + else + OUT="darwin-i386-cc" + fi ;; + armv6+7-*-iphoneos) + options="$options -arch%20armv6 -arch%20armv7" + OUT="iphoneos-cross" ;; + *-*-iphoneos) + options="$options -arch%20${MACHINE}" + OUT="iphoneos-cross" ;; alpha-*-linux2) ISA=`awk '/cpu model/{print$4;exit(0);}' /proc/cpuinfo` case ${ISA:-generic} in @@ -669,7 +678,7 @@ case "$GUESSOS" in sun4[uv]*-*-solaris2) OUT="solaris-sparcv9-$CC" ISA64=`(isalist) 2>/dev/null | grep sparcv9` - if [ "$ISA64" != "" ]; then + if [ "$ISA64" != "" -a "$KERNEL_BITS" = "" ]; then if [ "$CC" = "cc" -a $CCVER -ge 50 ]; then echo "WARNING! If you wish to build 64-bit library, then you have to" echo " invoke './Configure solaris64-sparcv9-cc' *manually*." @@ -699,13 +708,16 @@ case "$GUESSOS" in fi fi fi + if [ "$ISA64" != "" -a "$KERNEL_BITS" = "64" ]; then + OUT="solaris64-sparcv9-$CC" + fi ;; sun4m-*-solaris2) OUT="solaris-sparcv8-$CC" ;; sun4d-*-solaris2) OUT="solaris-sparcv8-$CC" ;; sun4*-*-solaris2) OUT="solaris-sparcv7-$CC" ;; *86*-*-solaris2) ISA64=`(isalist) 2>/dev/null | grep amd64` - if [ "$ISA64" != "" ]; then + if [ "$ISA64" != "" -a ${KERNEL_BITS:-64} -eq 64 ]; then OUT="solaris64-x86_64-$CC" else OUT="solaris-x86-$CC" @@ -830,7 +842,6 @@ case "$GUESSOS" in *-*-qnx6) OUT="QNX6" ;; x86-*-android|i?86-*-android) OUT="android-x86" ;; armv[7-9]*-*-android) OUT="android-armv7" ;; - darwin-whatever-*) OUT=`echo $GUESSOS | sed -e 's/-whatever//'`;; *) OUT=`echo $GUESSOS | awk -F- '{print $3}'`;; esac diff --git a/fips/fips_canister.c b/fips/fips_canister.c index 7d67d32d6c..7be48426d9 100644 --- a/fips/fips_canister.c +++ b/fips/fips_canister.c @@ -34,6 +34,7 @@ const void *FIPS_text_end(void); defined(__mips__)|| defined(__mips))) || \ (defined(__linux) && ((defined(__PPC__) && !defined(__PPC64__)) || \ defined(__arm__) || defined(__arm))) || \ + (defined(__APPLE__) /* verified on all MacOS X & iOS flavors */)|| \ (defined(_WIN32) && defined(_MSC_VER)) # define FIPS_REF_POINT_IS_CROSS_COMPILER_AWARE # endif diff --git a/fips/fips_premain.c b/fips/fips_premain.c index a7c8b78f8f..7dc5246006 100644 --- a/fips/fips_premain.c +++ b/fips/fips_premain.c @@ -7,7 +7,7 @@ #include #include #include -#if defined(__unix) || defined(__unix__) || defined(__vxworks) || defined(__ANDROID__) +#if defined(__unix) || defined(__unix__) || defined(__vxworks) || defined(__ANDROID__) || defined(__APPLE__) #include #endif diff --git a/fips/fips_premain.c.sha1 b/fips/fips_premain.c.sha1 index e0332e8afd..b9fb5dfc1d 100644 --- a/fips/fips_premain.c.sha1 +++ b/fips/fips_premain.c.sha1 @@ -1 +1 @@ -HMAC-SHA1(fips_premain.c)= a401afd9c2b57f0f11d2b34b6d0c9815b1fe6a66 +HMAC-SHA1(fips_premain.c)= 1eaf66f76187877ff403708a2948d240f92736a0 From 7437036cdf3aefa51be93b3bc6e04c4c21e2cf5d Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 8 Nov 2011 19:08:40 +0000 Subject: [PATCH 026/120] Prepare for RC3 (which may never happen). --- fips/fips_locl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 63695ceb00..9a5e99d73c 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -67,8 +67,8 @@ int fips_post_failed(int id, int subid, void *ex); int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); -#define FIPS_MODULE_VERSION_NUMBER 0x20000002L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc2 unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_NUMBER 0x20000003L +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc3-dev unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 3b4fb53221cb13d3affd63477fc17a81978838f6 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 9 Nov 2011 14:23:17 +0000 Subject: [PATCH 027/120] fclose streams in fips_drbvs.c Produced error message for unsupported curves in fips_ecdhvs.c --- fips/ecdh/fips_ecdhvs.c | 5 +++++ fips/rand/fips_drbgvs.c | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fips/ecdh/fips_ecdhvs.c b/fips/ecdh/fips_ecdhvs.c index a30e335e2b..099285aac3 100644 --- a/fips/ecdh/fips_ecdhvs.c +++ b/fips/ecdh/fips_ecdhvs.c @@ -413,6 +413,11 @@ int main(int argc, char **argv) if (group) EC_GROUP_free(group); group = EC_GROUP_new_by_curve_name(nid); + if (!group) + { + fprintf(stderr, "ERROR: unsupported curve %s\n", buf + 1); + return 1; + } } if (strlen(buf) > 6 && !strncmp(buf, "[E", 2)) diff --git a/fips/rand/fips_drbgvs.c b/fips/rand/fips_drbgvs.c index bcdfa6dac3..9aae88c3e1 100644 --- a/fips/rand/fips_drbgvs.c +++ b/fips/rand/fips_drbgvs.c @@ -176,7 +176,7 @@ int fips_drbgvs_main(int argc,char **argv) int main(int argc,char **argv) #endif { - FILE *in, *out; + FILE *in = NULL, *out = NULL; DRBG_CTX *dctx = NULL; TEST_ENT t; int r, nid = 0; @@ -406,6 +406,10 @@ int main(int argc,char **argv) } } + if (in && in != stdin) + fclose(in); + if (out && out != stdout) + fclose(out); return 0; } From 9eca2399f1f5ab45527f33147ee5180df4c035a2 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 11 Nov 2011 19:01:11 +0000 Subject: [PATCH 028/120] portability fix for some perl versions --- fips/fipsalgtest.pl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fips/fipsalgtest.pl b/fips/fipsalgtest.pl index cd6ba8c116..30cd9c0ebd 100644 --- a/fips/fipsalgtest.pl +++ b/fips/fipsalgtest.pl @@ -513,29 +513,29 @@ my $mkcmd = "mkdir"; my $cmpall = 0; my %fips_enabled = ( - dsa => 1, - dsa2 => 2, + "dsa" => 1, + "dsa2" => 2, "dsa-pqgver" => 2, - ecdsa => 2, - rsa => 1, + "ecdsa" => 2, + "rsa" => 1, "rsa-pss0" => 2, "rsa-pss62" => 1, - sha => 1, - hmac => 1, - cmac => 2, + "sha" => 1, + "hmac" => 1, + "cmac" => 2, "rand-aes" => 1, "rand-des2" => 0, - aes => 1, + "aes" => 1, "aes-cfb1" => 2, - des3 => 1, + "des3" => 1, "des3-cfb1" => 2, - drbg => 2, + "drbg" => 2, "aes-ccm" => 2, "aes-xts" => 2, "aes-gcm" => 2, - dh => 0, - ecdh => 2, - v2 => 1, + "dh" => 0, + "ecdh" => 2, + "v2" => 1, ); foreach (@ARGV) { From 901b9b5c367ed81f2ce7879082a6ec600cb818d9 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 16 Nov 2011 13:28:11 +0000 Subject: [PATCH 029/120] In EC_KEY_set_public_key_affine_coordinates include explicit check to see passed components do not exceed field order --- crypto/ec/ec_key.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crypto/ec/ec_key.c b/crypto/ec/ec_key.c index f3331e1ce5..24ae707560 100644 --- a/crypto/ec/ec_key.c +++ b/crypto/ec/ec_key.c @@ -511,10 +511,12 @@ int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y) tx, ty, ctx)) goto err; } - /* Check if retrieved coordinates match originals: if not values - * are out of range. + /* Check if retrieved coordinates match originals and are less than + * field order: if not values are out of range. */ - if (BN_cmp(x, tx) || BN_cmp(y, ty)) + if (BN_cmp(x, tx) || BN_cmp(y, ty) + || (BN_cmp(x, &key->group->field) >= 0) + || (BN_cmp(y, &key->group->field) >= 0)) { ECerr(EC_F_EC_KEY_SET_PUBLIC_KEY_AFFINE_COORDINATES, EC_R_COORDINATES_OUT_OF_RANGE); From c08128acc2dc5c4c8c1083f0f2bded2e1f097a0d Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 18 Nov 2011 18:50:57 +0000 Subject: [PATCH 030/120] prepare for RC3 --- fips/fips_locl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 9a5e99d73c..c46caf639c 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -68,7 +68,7 @@ int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); #define FIPS_MODULE_VERSION_NUMBER 0x20000003L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc3-dev unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc3 unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 52876c3100f4cd38f02b9b4c2b08e2496e96314e Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 18 Nov 2011 21:59:36 +0000 Subject: [PATCH 031/120] bump version to rc4-dev --- fips/fips_locl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index c46caf639c..7b7fc61917 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -67,8 +67,8 @@ int fips_post_failed(int id, int subid, void *ex); int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); -#define FIPS_MODULE_VERSION_NUMBER 0x20000003L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc3 unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_NUMBER 0x20000004L +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc4-dev unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From f6385248f6dbca423aa7b9a1e1dadadd15cb4f9b Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 19 Nov 2011 17:03:44 +0000 Subject: [PATCH 032/120] Add flag to support cofactor ECDH --- CHANGES | 3 +++ crypto/ecdh/ecdh.h | 2 ++ crypto/ecdh/ech_ossl.c | 12 ++++++++++++ fips/ecdh/fips_ecdh_selftest.c | 2 ++ fips/ecdh/fips_ecdhvs.c | 1 + 5 files changed, 20 insertions(+) diff --git a/CHANGES b/CHANGES index 795ada567e..0e9afd12ed 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,9 @@ Changes between 1.0.1 and 1.1.0 [xx XXX xxxx] + *) Add flag to EC_KEY to use cofactor ECDH if set. + [Steve Henson] + *) Update fips_test_suite to support multiple command line options. New test to induce all self test errors in sequence and check expected failures. diff --git a/crypto/ecdh/ecdh.h b/crypto/ecdh/ecdh.h index b4b58ee65b..8ac82b8cbd 100644 --- a/crypto/ecdh/ecdh.h +++ b/crypto/ecdh/ecdh.h @@ -85,6 +85,8 @@ extern "C" { #endif +#define EC_FLAG_COFACTOR_ECDH 0x1000 + const ECDH_METHOD *ECDH_OpenSSL(void); void ECDH_set_default_method(const ECDH_METHOD *); diff --git a/crypto/ecdh/ech_ossl.c b/crypto/ecdh/ech_ossl.c index 94a8f4b696..2656797449 100644 --- a/crypto/ecdh/ech_ossl.c +++ b/crypto/ecdh/ech_ossl.c @@ -146,6 +146,18 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key, } group = EC_KEY_get0_group(ecdh); + + if (EC_KEY_get_flags(ecdh) & EC_FLAG_COFACTOR_ECDH) + { + if (!EC_GROUP_get_cofactor(group, x, ctx) || + !BN_mul(x, x, priv_key, ctx)) + { + ECDHerr(ECDH_F_ECDH_COMPUTE_KEY, ERR_R_MALLOC_FAILURE); + goto err; + } + priv_key = x; + } + if ((tmp=EC_POINT_new(group)) == NULL) { ECDHerr(ECDH_F_ECDH_COMPUTE_KEY,ERR_R_MALLOC_FAILURE); diff --git a/fips/ecdh/fips_ecdh_selftest.c b/fips/ecdh/fips_ecdh_selftest.c index 2b21ceaf48..0b16c57aae 100644 --- a/fips/ecdh/fips_ecdh_selftest.c +++ b/fips/ecdh/fips_ecdh_selftest.c @@ -166,6 +166,7 @@ int FIPS_selftest_ecdh(void) rv = -1; goto err; } + EC_KEY_set_flags(ec1, EC_FLAG_COFACTOR_ECDH); if (!EC_KEY_set_public_key_affine_coordinates(ec1, x, y)) { @@ -194,6 +195,7 @@ int FIPS_selftest_ecdh(void) rv = -1; goto err; } + EC_KEY_set_flags(ec1, EC_FLAG_COFACTOR_ECDH); if (!EC_KEY_set_public_key_affine_coordinates(ec2, x, y)) { diff --git a/fips/ecdh/fips_ecdhvs.c b/fips/ecdh/fips_ecdhvs.c index 099285aac3..61d216d1b7 100644 --- a/fips/ecdh/fips_ecdhvs.c +++ b/fips/ecdh/fips_ecdhvs.c @@ -261,6 +261,7 @@ static void ec_output_Zhash(FILE *out, int exout, EC_GROUP *group, unsigned char chash[EVP_MAX_MD_SIZE]; int Zlen; ec = EC_KEY_new(); + EC_KEY_set_flags(ec, EC_FLAG_COFACTOR_ECDH); EC_KEY_set_group(ec, group); peerkey = make_peer(group, cx, cy); if (rhash == NULL) From 0e508c12e0233bcfa308cfd6481fdcfe6a19f6d1 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 19 Nov 2011 17:04:28 +0000 Subject: [PATCH 033/120] prepare for rc4 --- fips/fips_locl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 7b7fc61917..46dc1a4611 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -68,7 +68,7 @@ int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); #define FIPS_MODULE_VERSION_NUMBER 0x20000004L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc4-dev unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc4 unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 6ecd287acc7720f89b7b11bb609988e6c67439cb Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 21 Nov 2011 00:05:15 +0000 Subject: [PATCH 034/120] bump version for rc5-dev: hopefully will never be needed... --- fips/fips_locl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 46dc1a4611..0100428258 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -67,8 +67,8 @@ int fips_post_failed(int id, int subid, void *ex); int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); -#define FIPS_MODULE_VERSION_NUMBER 0x20000004L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc4 unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_NUMBER 0x20000005L +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc5-dev unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 7dcdc0d94d692cd6539bdf3950b3e4be61a0f441 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 25 Nov 2011 15:00:20 +0000 Subject: [PATCH 035/120] check counter value against 4 * L, not 4096 --- crypto/dsa/dsa_gen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/dsa/dsa_gen.c b/crypto/dsa/dsa_gen.c index d5f4debc92..3b49420c76 100644 --- a/crypto/dsa/dsa_gen.c +++ b/crypto/dsa/dsa_gen.c @@ -666,7 +666,7 @@ int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, /* "offset = offset + n + 1" */ /* step 14 */ - if (counter >= 4096) break; + if (counter >= (int)(4 * L)) break; } } end: From 31bf5f13e0250890b5711478c23ba6def7966f5e Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 25 Nov 2011 16:03:27 +0000 Subject: [PATCH 036/120] return error if counter exceeds limit and seed value supplied --- crypto/dsa/dsa_gen.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crypto/dsa/dsa_gen.c b/crypto/dsa/dsa_gen.c index 3b49420c76..9e3e57a828 100644 --- a/crypto/dsa/dsa_gen.c +++ b/crypto/dsa/dsa_gen.c @@ -668,6 +668,12 @@ int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, /* step 14 */ if (counter >= (int)(4 * L)) break; } + if (seed_in) + { + ok = 0; + DSAerr(DSA_F_DSA_BUILTIN_PARAMGEN2, DSA_R_INVALID_PARAMETERS); + goto err; + } } end: if(!BN_GENCB_call(cb, 2, 1)) From 9bd2dde42f59d60dfec149a28f8c91b6fb2cf717 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 25 Nov 2011 16:27:19 +0000 Subject: [PATCH 037/120] prepare for rc5 --- fips/fips_locl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 0100428258..de8ee04086 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -68,7 +68,7 @@ int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); #define FIPS_MODULE_VERSION_NUMBER 0x20000005L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc5-dev unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc5 unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 44cb365eaf1c200758f8eca5343133e8befc14cb Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 3 Dec 2011 18:26:26 +0000 Subject: [PATCH 038/120] bn/asm/mips.pl: fix typos [from HEAD], original by Andy --- crypto/bn/asm/mips.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl index acfd35968e..c162a3ec23 100644 --- a/crypto/bn/asm/mips.pl +++ b/crypto/bn/asm/mips.pl @@ -267,7 +267,7 @@ ___ $code.=<<___; jr $ra move $a0,$v0 -.end bn_mul_add_words +.end bn_mul_add_words_internal .align 5 .globl bn_mul_words @@ -778,7 +778,7 @@ ___ $code.=<<___; jr $ra move $a0,$v0 -.end bn_sub_words +.end bn_sub_words_internal .align 5 .globl bn_div_3_words From 75b250a4ed2fcc0d98e2a499843cd00490f01ba0 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 3 Dec 2011 18:27:31 +0000 Subject: [PATCH 039/120] remove unused functions from module --- crypto/cryptlib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 524daf037d..a5999bde3b 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -219,6 +219,8 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, } #endif +#ifndef OPENSSL_FIPSCANISTER + #if defined(_WIN32) && !defined(__CYGWIN__) #include #include @@ -382,3 +384,5 @@ void OpenSSLDie(const char *file,int line,const char *assertion) #ifndef OPENSSL_FIPSCANISTER void *OPENSSL_stderr(void) { return stderr; } #endif + +#endif From 5e900f3cef549e21094ada742cb01c58b8b2e429 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 3 Dec 2011 19:19:34 +0000 Subject: [PATCH 040/120] functions aren't unused: revert --- crypto/cryptlib.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index a5999bde3b..524daf037d 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -219,8 +219,6 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, } #endif -#ifndef OPENSSL_FIPSCANISTER - #if defined(_WIN32) && !defined(__CYGWIN__) #include #include @@ -384,5 +382,3 @@ void OpenSSLDie(const char *file,int line,const char *assertion) #ifndef OPENSSL_FIPSCANISTER void *OPENSSL_stderr(void) { return stderr; } #endif - -#endif From 476e7e49722aa8759943628c8b6c7aa1aa6988b3 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 3 Dec 2011 19:41:28 +0000 Subject: [PATCH 041/120] Add tests to ensure ECDSA key gen and DSA signing fails if DRBG entropy source fails. --- fips/fips_test_suite.c | 69 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index 2d0a4bba8b..cf8f085e95 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -650,6 +650,13 @@ static size_t drbg_test_cb(DRBG_CTX *ctx, unsigned char **pout, return (min_len + 0xf) & ~0xf; } +/* Callback which returns 0 to indicate entropy source failure */ +static size_t drbg_fail_cb(DRBG_CTX *ctx, unsigned char **pout, + int entropy, size_t min_len, size_t max_len) + { + return 0; + } + /* DRBG test: just generate lots of data and trigger health checks */ static int do_drbg_test(int type, int flags) @@ -1036,7 +1043,7 @@ static int do_fail_all(int fullpost, int fullerr) size_t i; RSA *rsa = NULL; DSA *dsa = NULL; - DRBG_CTX *dctx = NULL; + DRBG_CTX *dctx = NULL, *defctx = NULL; EC_KEY *ec = NULL; BIGNUM *bn = NULL; unsigned char out[10]; @@ -1133,6 +1140,9 @@ static int do_fail_all(int fullpost, int fullerr) else printf("\tECDSA key generation failed as expected.\n"); + FIPS_ec_key_free(ec); + ec = NULL; + fail_id = -1; fail_sub = -1; fail_key = -1; @@ -1241,6 +1251,63 @@ static int do_fail_all(int fullpost, int fullerr) printf("\tX9.31 continuous PRNG failed as expected\n"); FIPS_x931_stick(0); + /* Leave FIPS mode to clear error */ + FIPS_module_mode_set(0, NULL); + /* Enter FIPS mode successfully */ + if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + { + printf("\tError entering FIPS mode\n"); + st_err++; + } + + printf(" Testing operation failure with DRBG entropy failure\n"); + + /* Generate DSA key for later use */ + if (DSA_generate_key(dsa)) + printf("\tDSA key generated OK as expected.\n"); + else + { + printf("\tDSA key generation FAILED!!\n"); + st_err++; + } + + /* Initialise default DRBG context */ + defctx = FIPS_get_default_drbg(); + if (!defctx) + return 0; + if (!FIPS_drbg_init(defctx, NID_sha512, 0)) + return 0; + /* Set entropy failure callback */ + FIPS_drbg_set_callbacks(defctx, drbg_fail_cb, 0, 0x10, drbg_test_cb, 0); + if (FIPS_drbg_instantiate(defctx, dummy_drbg_entropy, 10)) + { + printf("\tDRBG entropy fail OK incorrectly!!\n"); + st_err++; + } + else + printf("\tDRBG entropy fail failed as expected\n"); + + if (FIPS_dsa_sign(dsa, dummy_drbg_entropy, 5, EVP_sha256())) + { + printf("\tDSA signing OK incorrectly!!\n"); + st_err++; + } + else + printf("\tDSA signing failed as expected\n"); + + ec = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1); + + if (!ec) + return 0; + + if (EC_KEY_generate_key(ec)) + { + printf("\tECDSA key generated OK incorrectly!!\n"); + st_err++; + } + else + printf("\tECDSA key generation failed as expected.\n"); + printf(" Induced failure test completed with %d errors\n", st_err); post_quiet = 0; no_err = 0; From fcd3e8e97be2801b2cf1875dbc3c8d949651a291 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 3 Dec 2011 19:51:52 +0000 Subject: [PATCH 042/120] Prepare for RC6. --- fips/fips_locl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index de8ee04086..0285c56269 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -67,8 +67,8 @@ int fips_post_failed(int id, int subid, void *ex); int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); -#define FIPS_MODULE_VERSION_NUMBER 0x20000005L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc5 unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_NUMBER 0x20000006L +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc6-dev unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From dd4eefdb7bfe0898d7debc061b199f0cc2fdd8ce Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 3 Dec 2011 21:44:01 +0000 Subject: [PATCH 043/120] Change EVP_MAXCHUNK so it doesn't wraparound to 0 on some platforms (IP32L64). --- crypto/evp/evp_locl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/evp/evp_locl.h b/crypto/evp/evp_locl.h index 94162d6419..6d1753522a 100644 --- a/crypto/evp/evp_locl.h +++ b/crypto/evp/evp_locl.h @@ -75,7 +75,7 @@ static int cname##_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const uns return 1;\ } -#define EVP_MAXCHUNK ((size_t)1<<(sizeof(long)*8-2)) +#define EVP_MAXCHUNK ((size_t)1<<(sizeof(int)*8-2)) #define BLOCK_CIPHER_func_ofb(cname, cprefix, cbits, kstruct, ksched) \ static int cname##_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) \ From efd031abca58553db78fd2c7ca036812273580b3 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 3 Dec 2011 21:47:48 +0000 Subject: [PATCH 044/120] Fix x86cpuid so it doesn't fail for some (currently theoretical) virtual machines. --- crypto/x86cpuid.pl | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index 168e4fa0a9..e8eaef7582 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -119,8 +119,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &mov ("esi","edx"); &or ("ebp","ecx"); # merge AMD XOP flag - &bt ("ecx",26); # check XSAVE bit - &jnc (&label("done")); &bt ("ecx",27); # check OSXSAVE bit &jnc (&label("clear_avx")); &xor ("ecx","ecx"); From 32b56fe4d252ad574743936439e8977953c6f1b0 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 4 Dec 2011 15:04:20 +0000 Subject: [PATCH 045/120] avoid use of symlinks on Windows: it causes problems on some build environments --- util/mklink.pl | 1 + util/point.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/util/mklink.pl b/util/mklink.pl index 61db12c68f..72a562ecaf 100755 --- a/util/mklink.pl +++ b/util/mklink.pl @@ -52,6 +52,7 @@ my $to = join('/', @to_path); my $file; $symlink_exists=eval {symlink("",""); 1}; if ($^O eq "msys") { $symlink_exists=0 }; +if ($^O eq "MSWin32") { $symlink_exists=0 }; foreach $file (@files) { my $err = ""; if ($symlink_exists) { diff --git a/util/point.sh b/util/point.sh index da39899cb1..22daf0e8c5 100755 --- a/util/point.sh +++ b/util/point.sh @@ -1,7 +1,7 @@ #!/bin/sh rm -f "$2" -if test "$OSTYPE" = msdosdjgpp || test "x$PLATFORM" = xmingw ; then +if test "$OSTYPE" = msdosdjgpp || test "x$PLATFORM" = xmingw || test "x$OS" = xWindows_NT ; then cp "$1" "$2" else ln -s "$1" "$2" From 61c3085d476c5d53f4f4f3c5007d89bf4afbb099 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 4 Dec 2011 15:11:44 +0000 Subject: [PATCH 046/120] Workaround for VxWorks --- crypto/cryptlib.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 524daf037d..87768d94e7 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -359,7 +359,15 @@ void OPENSSL_showfatal (const char *fmta,...) { va_list ap; va_start (ap,fmta); +#if defined(OPENSSL_SYS_VXWORKS) + { + char buf[256]; + vsnprintf(buf,sizeof(buf),fmta,ap); + printf("%s",buf); + } +#else vfprintf (stderr,fmta,ap); +#endif va_end (ap); } int OPENSSL_isservice (void) { return 0; } From 58886fdefc4b4fcfacebc495bffadadd9fd97076 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 4 Dec 2011 15:14:13 +0000 Subject: [PATCH 047/120] use BUILD_ONE_CMD for fips specific links otherwise we effectively do 'make links' twice --- Makefile.fips | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.fips b/Makefile.fips index b3811dff22..4c7add6f1d 100644 --- a/Makefile.fips +++ b/Makefile.fips @@ -524,7 +524,7 @@ files: links: @$(PERL) $(TOP)/util/mkdir-p.pl include/openssl @$(PERL) $(TOP)/util/mklink.pl include/openssl $(EXHEADER) - @set -e; dir=fips target=links; $(RECURSIVE_BUILD_CMD) + @set -e; dir=fips target=links; $(BUILD_ONE_CMD) @(cd crypto ; SDIRS='$(LINKDIRS)' $(MAKE) -e links) gentests: From 1d235039d6f2da55476052154cffe8117edc4fb0 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 4 Dec 2011 15:26:26 +0000 Subject: [PATCH 048/120] For FIPS builds we don't use the normal test files (and in the restricted tarball some don't exist) so set TEST='' to avoid linking to them. This also avoids problems on platforms that copy instead of symlink. --- Makefile.fips | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.fips b/Makefile.fips index 4c7add6f1d..157e901099 100644 --- a/Makefile.fips +++ b/Makefile.fips @@ -525,7 +525,7 @@ links: @$(PERL) $(TOP)/util/mkdir-p.pl include/openssl @$(PERL) $(TOP)/util/mklink.pl include/openssl $(EXHEADER) @set -e; dir=fips target=links; $(BUILD_ONE_CMD) - @(cd crypto ; SDIRS='$(LINKDIRS)' $(MAKE) -e links) + @(cd crypto ; TEST='' SDIRS='$(LINKDIRS)' $(MAKE) -e links) gentests: @(cd test && echo "generating dummy tests (if needed)..." && \ From 81fc8cd029e2915b1d3d7dfaaaf4d246daa3a954 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 4 Dec 2011 21:29:08 +0000 Subject: [PATCH 049/120] prepare for RC6 --- fips/fips_locl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 0285c56269..10c593e3a4 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -68,7 +68,7 @@ int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); #define FIPS_MODULE_VERSION_NUMBER 0x20000006L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc6-dev unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc6 unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 7c0d30038f19ce927ac121bd2114e41b8a17ed8e Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Thu, 8 Dec 2011 15:14:38 +0000 Subject: [PATCH 050/120] Close file streams in FIPS algorithm test utilities. --- fips/dh/fips_dhvs.c | 4 ++++ fips/ecdh/fips_ecdhvs.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/fips/dh/fips_dhvs.c b/fips/dh/fips_dhvs.c index 3ba1977862..0fb52f79a4 100644 --- a/fips/dh/fips_dhvs.c +++ b/fips/dh/fips_dhvs.c @@ -279,6 +279,10 @@ int main(int argc, char **argv) rhash, rhashlen); } } + if (in && in != stdin) + fclose(in); + if (out && out != stdout) + fclose(out); return 0; parse_error: fprintf(stderr, "Error Parsing request file\n"); diff --git a/fips/ecdh/fips_ecdhvs.c b/fips/ecdh/fips_ecdhvs.c index 61d216d1b7..a1422868b3 100644 --- a/fips/ecdh/fips_ecdhvs.c +++ b/fips/ecdh/fips_ecdhvs.c @@ -484,6 +484,10 @@ int main(int argc, char **argv) BN_free(cy); if (group) EC_GROUP_free(group); + if (in && in != stdin) + fclose(in); + if (out && out != stdout) + fclose(out); if (rv) fprintf(stderr, "Error Parsing request file\n"); return rv; From 0e480d55537be586c806c8d0175c16a3262daf5c Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 10 Dec 2011 13:29:23 +0000 Subject: [PATCH 051/120] use different names for asm temp files to avoid problems on some platforms --- util/fipsas.pl | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/util/fipsas.pl b/util/fipsas.pl index fc2a759308..1694c59a41 100644 --- a/util/fipsas.pl +++ b/util/fipsas.pl @@ -8,6 +8,9 @@ my @ARGS = @ARGV; my $top = shift @ARGS; my $target = shift @ARGS; +my $tmptarg = $target; + +$tmptarg =~ s/\.[^\\\/\.]+$/.tmp/; my $runasm = 1; @@ -48,11 +51,15 @@ while () my ($from, $to); +#delete any temp file lying around + +unlink $tmptarg; + #rename target temporarily -rename($target, "tmptarg.s") || die "Can't rename $target"; +rename($target, $tmptarg) || die "Can't rename $target"; #edit target -open(IN,"tmptarg.s") || die "Can't open temporary file"; +open(IN,$tmptarg) || die "Can't open temporary file"; open(OUT, ">$target") || die "Can't open output file $target"; while () @@ -75,16 +82,12 @@ if ($runasm) # restore target unlink $target; - rename "tmptarg.s", $target; + rename $tmptarg, $target; die "Error executing assembler!" if $rv != 0; } else { # Don't care about target - unlink "tmptarg.s"; + unlink $tmptarg; } - - - - From df0884ffb7cd8042ae901793934de281674aa569 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 10 Dec 2011 18:06:55 +0000 Subject: [PATCH 052/120] Retry rename operation with a slight delay to workaround problems on some versions of Windows. --- util/fipsas.pl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/util/fipsas.pl b/util/fipsas.pl index 1694c59a41..2734a20181 100644 --- a/util/fipsas.pl +++ b/util/fipsas.pl @@ -56,7 +56,14 @@ my ($from, $to); unlink $tmptarg; #rename target temporarily -rename($target, $tmptarg) || die "Can't rename $target"; +my $rencnt = 0; +# On windows the previous file doesn't always close straight away +# so retry the rename operation a few times if it fails. +while (!rename($target, $tmptarg)) + { + sleep 2; + die "Can't rename $target" if ($rencnt++ > 10); + } #edit target open(IN,$tmptarg) || die "Can't open temporary file"; From 49dbcbaa4b844dae21c368e2e13fef07b143b085 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 12 Dec 2011 13:44:05 +0000 Subject: [PATCH 053/120] Prepare for RC7. --- fips/fips_locl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 10c593e3a4..98d95f699d 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -67,8 +67,8 @@ int fips_post_failed(int id, int subid, void *ex); int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); -#define FIPS_MODULE_VERSION_NUMBER 0x20000006L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc6 unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_NUMBER 0x20000007L +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc7 unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From c567812fa6d240e9ef5661d43326c26c7b85343e Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 12 Dec 2011 14:02:57 +0000 Subject: [PATCH 054/120] set version to rc8-dev --- fips/fips_locl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index 98d95f699d..c7f3e3ac1f 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -67,8 +67,8 @@ int fips_post_failed(int id, int subid, void *ex); int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); -#define FIPS_MODULE_VERSION_NUMBER 0x20000007L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc7 unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_NUMBER 0x20000008L +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc8-dev unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 421de62232cb19339199b20f3f092e90b5ad8a22 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 3 Jan 2012 14:22:45 +0000 Subject: [PATCH 055/120] unlink target and retry to avoid intermittent Win32 failures --- util/fipslink.pl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/util/fipslink.pl b/util/fipslink.pl index 8b6fbad7d8..262d691d1b 100644 --- a/util/fipslink.pl +++ b/util/fipslink.pl @@ -57,6 +57,15 @@ print "$fips_cc -DHMAC_SHA1_SIG=\\\"$fips_hash\\\" $fips_cc_args $fips_libdir/fi system "$fips_cc -DHMAC_SHA1_SIG=\\\"$fips_hash\\\" $fips_cc_args $fips_libdir/fips_premain.c"; die "Second stage Compile failure" if $? != 0; +my $delcnt = 0; +# On windows the previous file doesn't always close straight away +# so retry an unlink operation a few times if it fails. +while (!unlink($target)) + { + sleep 2; + die "Can't delete $target" if ($delcnt++ > 10); + } + print "$fips_link @ARGV\n"; system "$fips_link @ARGV"; From 409abd2fecba4431486de57afc7b5d6b662ab8e0 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 3 Jan 2012 14:23:54 +0000 Subject: [PATCH 056/120] Prepare RC8 --- fips/fips_locl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index c7f3e3ac1f..b3618a57de 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -68,7 +68,7 @@ int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); #define FIPS_MODULE_VERSION_NUMBER 0x20000008L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc8-dev unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc8 unvalidated test module xx XXX xxxx" #ifdef __cplusplus } From 24fadf2a20f50d5623f61f2cc08f95c22b5ba75c Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 3 Jan 2012 19:43:06 +0000 Subject: [PATCH 057/120] typo --- util/fipslink.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/fipslink.pl b/util/fipslink.pl index 262d691d1b..fd8b52ea0d 100644 --- a/util/fipslink.pl +++ b/util/fipslink.pl @@ -60,10 +60,10 @@ die "Second stage Compile failure" if $? != 0; my $delcnt = 0; # On windows the previous file doesn't always close straight away # so retry an unlink operation a few times if it fails. -while (!unlink($target)) +while (!unlink($fips_target)) { sleep 2; - die "Can't delete $target" if ($delcnt++ > 10); + die "Can't delete $fips_target" if ($delcnt++ > 10); } From ac381944ac3e8970f40e7eb3fc528e58dcca9d4d Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 18 Jan 2012 14:54:20 +0000 Subject: [PATCH 058/120] give a hand old assemblers assembling loop instruction. (original by Andy) --- crypto/perlasm/x86gas.pl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl index d0b7ae27ae..263182b985 100644 --- a/crypto/perlasm/x86gas.pl +++ b/crypto/perlasm/x86gas.pl @@ -45,10 +45,8 @@ sub ::generic undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o); if ($#_==0) { &::emit($opcode); } - elsif ($opcode =~ m/^j/o && $#_==1) { &::emit($opcode,@arg); } - elsif ($opcode eq "call" && $#_==1) { &::emit($opcode,@arg); } - elsif ($opcode eq "clflush" && $#_==1){ &::emit($opcode,@arg); } - elsif ($opcode =~ m/^set/&& $#_==1) { &::emit($opcode,@arg); } + elsif ($#_==1 && $opcode =~ m/^(call|clflush|j|loop|set)/o) + { &::emit($opcode,@arg); } else { &::emit($opcode.$suffix,@arg);} 1; From 1de6a6222241850c8e3acc81d3c5f2e39b838713 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 18 Jan 2012 15:07:11 +0000 Subject: [PATCH 059/120] revert fipslink.pl unlink retry change --- fips/fips_locl.h | 4 ++-- util/fipslink.pl | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/fips/fips_locl.h b/fips/fips_locl.h index b3618a57de..6efa93194e 100644 --- a/fips/fips_locl.h +++ b/fips/fips_locl.h @@ -67,8 +67,8 @@ int fips_post_failed(int id, int subid, void *ex); int fips_post_corrupt(int id, int subid, void *ex); int fips_post_status(void); -#define FIPS_MODULE_VERSION_NUMBER 0x20000008L -#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc8 unvalidated test module xx XXX xxxx" +#define FIPS_MODULE_VERSION_NUMBER 0x20000009L +#define FIPS_MODULE_VERSION_TEXT "FIPS 2.0-rc9 unvalidated test module xx XXX xxxx" #ifdef __cplusplus } diff --git a/util/fipslink.pl b/util/fipslink.pl index fd8b52ea0d..4f47efa39c 100644 --- a/util/fipslink.pl +++ b/util/fipslink.pl @@ -57,16 +57,6 @@ print "$fips_cc -DHMAC_SHA1_SIG=\\\"$fips_hash\\\" $fips_cc_args $fips_libdir/fi system "$fips_cc -DHMAC_SHA1_SIG=\\\"$fips_hash\\\" $fips_cc_args $fips_libdir/fips_premain.c"; die "Second stage Compile failure" if $? != 0; -my $delcnt = 0; -# On windows the previous file doesn't always close straight away -# so retry an unlink operation a few times if it fails. -while (!unlink($fips_target)) - { - sleep 2; - die "Can't delete $fips_target" if ($delcnt++ > 10); - } - - print "$fips_link @ARGV\n"; system "$fips_link @ARGV"; die "Second stage Link failure" if $? != 0; From 455ecb3a0630dbe185aeaa0dd18686f5943aaee8 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 16 May 2012 18:56:33 +0000 Subject: [PATCH 061/120] initial mk1mf cross compilation support --- CHANGES | 10 ++++++++++ util/fipslink.pl | 13 +++++++++++++ util/mk1mf.pl | 19 ++++++++++++++----- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/CHANGES b/CHANGES index 0e9afd12ed..3dd573b709 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,16 @@ Changes between 1.0.1 and 1.1.0 [xx XXX xxxx] + *) Add initial cross compilation support for Windows build. The following + environment variables should be set: + + FIPS_SHA1_PATH: path to fips_standalone_sha1 exectutable which will + be used explicitly and not built. + FIPS_SIG: similar to other builds: path to a "get signature" script + which is used to obtain the signature of the target instead of + executing it on the host. + [Steve Henson] + *) Add flag to EC_KEY to use cofactor ECDH if set. [Steve Henson] diff --git a/util/fipslink.pl b/util/fipslink.pl index 4f47efa39c..331c456878 100644 --- a/util/fipslink.pl +++ b/util/fipslink.pl @@ -27,6 +27,19 @@ if (exists $ENV{"PREMAIN_DSO_EXE"}) $fips_premain_dso = ""; } +my $fips_sig = $ENV{"FIPS_SIG"}; +if (defined $fips_sig) + { + if ($fips_premain_dso ne "") + { + $fips_premain_dso = "$fips_sig -dso"; + } + else + { + $fips_premain_dso = "$fips_sig -exe"; + } + } + check_hash($sha1_exe, "fips_premain.c"); check_hash($sha1_exe, "fipscanister.lib"); diff --git a/util/mk1mf.pl b/util/mk1mf.pl index af039c78ac..05ecd34e01 100755 --- a/util/mk1mf.pl +++ b/util/mk1mf.pl @@ -23,6 +23,7 @@ local $fips_canister_path = ""; my $fips_premain_dso_exe_path = ""; my $fips_premain_c_path = ""; my $fips_sha1_exe_path = ""; +my $fips_sha1_exe_build = 1; local $fipscanisterbuild = 0; @@ -500,8 +501,16 @@ if ($fips) { if ($fips_sha1_exe_path eq "") { - $fips_sha1_exe_path = - "\$(BIN_D)${o}fips_standalone_sha1$exep"; + $fips_sha1_exe_path = $ENV{"FIPS_SHA1_PATH"}; + if (defined $fips_sha1_exe_path) + { + $fips_sha1_exe_build = 0; + } + else + { + $fips_sha1_exe_path = + "\$(BIN_D)${o}fips_standalone_sha1$exep"; + } } } else @@ -959,16 +968,16 @@ if ($fips) # FIXME $rules.=&do_link_rule("\$(FIPS_SHA1_EXE)", "\$(OBJ_D)${o}fips_standalone_sha1$obj \$(OBJ_D)${o}sha1dgst$obj $sha1_asm_obj", - "","\$(EX_LIBS)", 1); + "","\$(EX_LIBS)", 1) if $fips_sha1_exe_build; } else { $rules.=&do_link_rule("\$(FIPS_SHA1_EXE)", "\$(OBJ_D)${o}fips_standalone_sha1$obj \$(O_FIPSCANISTER)", - "","", 1); + "","", 1) if $fips_sha1_exe_build; } - $rules.=&do_link_rule("\$(PREMAIN_DSO_EXE)","\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj \$(CRYPTOOBJ) \$(O_FIPSCANISTER)","","\$(EX_LIBS)", 1); + $rules.=&do_link_rule("\$(PREMAIN_DSO_EXE)","\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj \$(CRYPTOOBJ) \$(O_FIPSCANISTER)","","\$(EX_LIBS)", 1) unless defined $ENV{"FIPS_SIG"}; } From 05703abd9f5f90fb40731a520e0274bf27ba7fdb Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Thu, 17 May 2012 11:09:46 +0000 Subject: [PATCH 062/120] Make Windows FIPS build work more like other builds. Add build_tests target to build FIPS test utilities and build_algvs target to build the multicall fips_algvs utility. --- CHANGES | 5 +++++ util/mk1mf.pl | 15 +++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 3dd573b709..fe19d3a80c 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,11 @@ Changes between 1.0.1 and 1.1.0 [xx XXX xxxx] + *) Update Windows build system for FIPS. Don't compile algorithm test + utilties by default: the target build_tests is needed for that. Add + support for building fips_algvs with the build_algvs target. + [Steve Henson] + *) Add initial cross compilation support for Windows build. The following environment variables should be set: diff --git a/util/mk1mf.pl b/util/mk1mf.pl index 05ecd34e01..70b176c872 100755 --- a/util/mk1mf.pl +++ b/util/mk1mf.pl @@ -554,7 +554,7 @@ if ($fips) if ($fipscanisteronly) { - $build_targets = "\$(O_FIPSCANISTER) \$(T_EXE)"; + $build_targets = "\$(O_FIPSCANISTER)"; $libs_dep = ""; } @@ -753,7 +753,11 @@ headers: \$(HEADER) \$(EXHEADER) lib: \$(LIBS_DEP) \$(E_SHLIB) -exe: \$(T_EXE) \$(BIN_D)$o\$(E_EXE)$exep +exe: \$(BIN_D)$o\$(E_EXE)$exep + +build_tests: \$(T_EXE) + +build_algvs: \$(T_SRC) \$(BIN_D)${o}fips_algvs$exep install: all \$(MKDIR) \"\$(INSTALLTOP)\" @@ -855,6 +859,9 @@ if ($fips) $rules.=&cc_compile_target("\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj", "fips${o}fips_premain.c", "-DFINGERPRINT_PREMAIN_DSO_LOAD \$(SHLIB_CFLAGS)"); + $rules.=&cc_compile_target("\$(OBJ_D)${o}fips_algvs$obj", + "test${o}fips_algvs.c", + "\$(SHLIB_CFLAGS)"); } foreach (values %lib_nam) @@ -887,6 +894,7 @@ EOF } $defs.=&do_defs("T_EXE",$test,"\$(TEST_D)",$exep); +$defs.=&do_defs("T_SRC",$test,"\$(TMP_D)",".c"); foreach (split(/\s+/,$test)) { my $t_libs; @@ -908,8 +916,11 @@ foreach (split(/\s+/,$test)) $tt="\$(OBJ_D)${o}$t${obj}"; $rules.=&do_link_rule("\$(TEST_D)$o$t$exep",$tt,"\$(LIBS_DEP)","$t_libs \$(EX_LIBS)", $ltype); + $rules.=&do_copy_rule("\$(TMP_D)",$_,".c"); } + $rules.=&do_link_rule("\$(TEST_D)${o}fips_algvs$exep","\$(OBJ_D)${o}fips_algvs$obj","\$(LIBS_DEP)","\$(O_FIPSCANISTER) \$(EX_LIBS)", 2) if $fips; + $defs.=&do_defs("E_SHLIB",$engines . $otherlibs,"\$(ENG_D)",$shlibp); foreach (split(/\s+/,$engines)) From fcb81a191d3647fd07811ab64447b8061dd01e3b Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 18 May 2012 15:05:33 +0000 Subject: [PATCH 063/120] don't insert time in response files: it breaks some systems and we shouldn't be doing this anyway --- CHANGES | 5 +++++ fips/aes/fips_aesavs.c | 6 ++---- fips/des/fips_desmovs.c | 6 ++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/CHANGES b/CHANGES index fe19d3a80c..e8464c7ae3 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,11 @@ Changes between 1.0.1 and 1.1.0 [xx XXX xxxx] + *) Don't attempt to insert current time into AES/3DES tests, we should + be just copying input line across and this breaks some systems lacking + ctime. + [Steve Henson] + *) Update Windows build system for FIPS. Don't compile algorithm test utilties by default: the target build_tests is needed for that. Add support for building fips_algvs with the build_algvs target. diff --git a/fips/aes/fips_aesavs.c b/fips/aes/fips_aesavs.c index fecaf990c6..ce07cac992 100644 --- a/fips/aes/fips_aesavs.c +++ b/fips/aes/fips_aesavs.c @@ -635,10 +635,8 @@ static int proc_file(char *rqfile, char *rspfile) char *xp, *pp = ibuf+2; int n; if (akeysz) - { /* insert current time & date */ - time_t rtim = time(0); - fputs("# ", rfp); - copy_line(ctime(&rtim), rfp); + { + copy_line(ibuf, rfp); } else { diff --git a/fips/des/fips_desmovs.c b/fips/des/fips_desmovs.c index 2bbeb53459..0ffab89e2f 100644 --- a/fips/des/fips_desmovs.c +++ b/fips/des/fips_desmovs.c @@ -356,10 +356,8 @@ static int tproc_file(char *rqfile, char *rspfile) char *xp, *pp = ibuf+2; int n; if(*amode) - { /* insert current time & date */ - time_t rtim = time(0); - fputs("# ", rfp); - copy_line(ctime(&rtim), rfp); + { + copy_line(ibuf, rfp); } else { From 76f4af202ef76c9a2bff141f6635da875c571436 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 18 May 2012 15:55:40 +0000 Subject: [PATCH 064/120] cross compile target support for do_fips script --- ms/do_fips.bat | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ms/do_fips.bat b/ms/do_fips.bat index 73b0a3e8e4..18a3578a26 100644 --- a/ms/do_fips.bat +++ b/ms/do_fips.bat @@ -1,5 +1,13 @@ @echo off +if X%CROSS_TARGET% == X goto detect + +echo Cross compiling for %CROSS_TARGET% +SET TARGET=%CROSS_TARGET% +SET ASM=%CROSS_ASM% +goto compile + +:detect SET ASM=%1 SET EXARG= From b440c25d36bd04944ca6be4ab9b53314bb5f3aa7 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 22 May 2012 23:57:22 +0000 Subject: [PATCH 065/120] Changes needed to support WinCE compilation: Don't try to raise SIGABRT if not defined. Return from fips_dhvs.c main instead of calling exit. Workaround for lack of GetSystemFileAsFileTime. Disable optimisation for part of bn_nist.c to avoid compiler bug. Remove /WX flag so we don't exist on warnings. --- crypto/bn/bn_nist.c | 8 ++++++++ crypto/cryptlib.c | 2 ++ fips/dh/fips_dhvs.c | 2 +- fips/rand/fips_rand.c | 8 ++++++++ util/pl/VC-32.pl | 2 +- 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c index 86bb0429f3..99cfc3f355 100644 --- a/crypto/bn/bn_nist.c +++ b/crypto/bn/bn_nist.c @@ -1047,6 +1047,10 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, return 1; } +#ifdef _W32_WCE +#pragma optimize( "", off ) +#endif + #define BN_NIST_521_RSHIFT (521%BN_BITS2) #define BN_NIST_521_LSHIFT (BN_BITS2-BN_NIST_521_RSHIFT) #define BN_NIST_521_TOP_MASK ((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT) @@ -1113,6 +1117,10 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, return 1; } +#ifdef _W32_WCE +#pragma optimize( "", on ) +#endif + int (*BN_nist_mod_func(const BIGNUM *p))(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx) { if (BN_ucmp(&_bignum_nist_p_192, p) == 0) diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 87768d94e7..118fca1ee9 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -382,7 +382,9 @@ void OpenSSLDie(const char *file,int line,const char *assertion) abort(); #else /* Win32 abort() customarily shows a dialog, but we just did that... */ +#ifdef SIGABRT raise(SIGABRT); +#endif _exit(3); #endif } diff --git a/fips/dh/fips_dhvs.c b/fips/dh/fips_dhvs.c index 0fb52f79a4..a925e13c7d 100644 --- a/fips/dh/fips_dhvs.c +++ b/fips/dh/fips_dhvs.c @@ -286,7 +286,7 @@ int main(int argc, char **argv) return 0; parse_error: fprintf(stderr, "Error Parsing request file\n"); - exit(1); + return 1; } #endif diff --git a/fips/rand/fips_rand.c b/fips/rand/fips_rand.c index f80c005758..5fa052746d 100644 --- a/fips/rand/fips_rand.c +++ b/fips/rand/fips_rand.c @@ -232,6 +232,9 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr) { #ifdef OPENSSL_SYS_WIN32 FILETIME ft; +#ifdef _WIN32_WCE + SYSTEMTIME t; +#endif #elif defined(OPENSSL_SYS_VXWORKS) struct timespec ts; #else @@ -243,7 +246,12 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr) #endif #ifdef OPENSSL_SYS_WIN32 +#ifdef _WIN32_WCE + GetSystemTime(&t); + SystemTimeToFileTime(&t, &ft); +#else GetSystemTimeAsFileTime(&ft); +#endif buf[0] = (unsigned char) (ft.dwHighDateTime & 0xff); buf[1] = (unsigned char) ((ft.dwHighDateTime >> 8) & 0xff); buf[2] = (unsigned char) ((ft.dwHighDateTime >> 16) & 0xff); diff --git a/util/pl/VC-32.pl b/util/pl/VC-32.pl index e98eb1e1b9..24b8172ffc 100644 --- a/util/pl/VC-32.pl +++ b/util/pl/VC-32.pl @@ -123,7 +123,7 @@ elsif ($FLAVOR =~ /CE/) } $cc='$(CC)'; - $base_cflags=' /W3 /WX /GF /Gy /nologo -DUNICODE -D_UNICODE -DOPENSSL_SYSNAME_WINCE -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DDSO_WIN32 -DNO_CHMOD -DOPENSSL_SMALL_FOOTPRINT'; + $base_cflags=' /W3 /GF /Gy /nologo -DUNICODE -D_UNICODE -DOPENSSL_SYSNAME_WINCE -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DDSO_WIN32 -DNO_CHMOD -DOPENSSL_SMALL_FOOTPRINT'; $base_cflags.=" $wcecdefs"; $base_cflags.=' -I$(WCECOMPAT)/include' if (defined($ENV{'WCECOMPAT'})); $base_cflags.=' -I$(PORTSDK_LIBPATH)/../../include' if (defined($ENV{'PORTSDK_LIBPATH'})); From b75ff26d7b34c2ed335e2feb47e8f5ed4a4840c4 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 23 May 2012 00:39:01 +0000 Subject: [PATCH 066/120] Don't install fips_standalone_sha1.exe if it isn't being built --- util/mk1mf.pl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/util/mk1mf.pl b/util/mk1mf.pl index 70b176c872..e2e9ffd086 100755 --- a/util/mk1mf.pl +++ b/util/mk1mf.pl @@ -576,9 +576,14 @@ if ($fipscanisteronly) \$(CP) \"fips${o}fips_premain.c.sha1\" \"\$(INSTALLTOP)${o}lib\" \$(CP) \"\$(INCO_D)${o}fips.h\" \"\$(INSTALLTOP)${o}include${o}openssl\" \$(CP) \"\$(INCO_D)${o}fips_rand.h\" \"\$(INSTALLTOP)${o}include${o}openssl\" - \$(CP) "\$(BIN_D)${o}fips_standalone_sha1$exep" \"\$(INSTALLTOP)${o}bin\" \$(CP) \"util${o}fipslink.pl\" \"\$(INSTALLTOP)${o}bin\" EOF + if ($fips_sha1_exe_build) + { + $extra_install .= <<"EOF"; + \$(CP) "\$(BIN_D)${o}fips_standalone_sha1$exep" \"\$(INSTALLTOP)${o}bin\" +EOF + } } elsif ($shlib) { From 35b412322f230fb19c83f08d3b9828c53adac266 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 23 May 2012 11:45:21 +0000 Subject: [PATCH 067/120] fix typo --- crypto/bn/bn_nist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c index 99cfc3f355..117c2881fa 100644 --- a/crypto/bn/bn_nist.c +++ b/crypto/bn/bn_nist.c @@ -1047,7 +1047,7 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, return 1; } -#ifdef _W32_WCE +#ifdef _WIN32_WCE #pragma optimize( "", off ) #endif @@ -1117,7 +1117,7 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, return 1; } -#ifdef _W32_WCE +#ifdef _WIN32_WCE #pragma optimize( "", on ) #endif From 4972d50da0da83be2fc2027eab5b224dfb02ee61 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 23 May 2012 11:47:01 +0000 Subject: [PATCH 068/120] add comment --- crypto/bn/bn_nist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c index 117c2881fa..ce860b1d75 100644 --- a/crypto/bn/bn_nist.c +++ b/crypto/bn/bn_nist.c @@ -1048,6 +1048,7 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, } #ifdef _WIN32_WCE +/* Workaround for compiler bug under CE */ #pragma optimize( "", off ) #endif From 4feb7ef3947ad0514b6642343c4d317b8b9fa374 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 23 May 2012 12:44:48 +0000 Subject: [PATCH 069/120] Add --script-sfprefix option to insert a prefix before the request and response filenames in output script. --- fips/fipsalgtest.pl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fips/fipsalgtest.pl b/fips/fipsalgtest.pl index 30cd9c0ebd..d95a8c324c 100644 --- a/fips/fipsalgtest.pl +++ b/fips/fipsalgtest.pl @@ -495,6 +495,7 @@ my $onedir = 0; my $filter = ""; my $tvdir; my $tprefix; +my $sfprefix = ""; my $debug = 0; my $quiet = 0; my $notest = 0; @@ -615,6 +616,9 @@ foreach (@ARGV) { elsif (/--script-tprefix=(.*)$/) { $stprefix = $1; } + elsif (/--script-fprefix=(.*)$/) { + $sfprefix = $1; + } elsif (/--mkdir=(.*)$/) { $mkcmd = $1; } @@ -1044,7 +1048,7 @@ END mkdir($outdir) || die "Can't create directory $outdir"; } } - my $cmd = "$tcmd \"$req\" \"$out\""; + my $cmd = "$tcmd \"$sfprefix$req\" \"$sfprefix$out\""; print STDERR "DEBUG: running test $tname\n" if ( $debug && !$verify ); if ($outfile ne "") { if ($minimal_script) { From 83db979256f44d6ff5f7b1420ad77782ccec31a9 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 23 May 2012 17:07:23 +0000 Subject: [PATCH 070/120] Version of "incore" for Windows executables. Original by Andy. --- CHANGES | 4 + util/hmac_sha1.pl | 196 ++++++++++++++++++++++++++++++++++++++++++++++ util/msincore | 169 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 369 insertions(+) create mode 100755 util/hmac_sha1.pl create mode 100755 util/msincore diff --git a/CHANGES b/CHANGES index e8464c7ae3..8c8a09ce2b 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,10 @@ Changes between 1.0.1 and 1.1.0 [xx XXX xxxx] + *) Add perl scripts to calculate FIPS signatures for Windows + exectuables including WinCE. + [Andy Polyakov] + *) Don't attempt to insert current time into AES/3DES tests, we should be just copying input line across and this breaks some systems lacking ctime. diff --git a/util/hmac_sha1.pl b/util/hmac_sha1.pl new file mode 100755 index 0000000000..494f7e8569 --- /dev/null +++ b/util/hmac_sha1.pl @@ -0,0 +1,196 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2011 The OpenSSL Project. +# +###################################################################### +# +# SHA1 and HMAC in Perl by . +# +{ package SHA1; + use integer; + + { + ################################### SHA1 block code generator + my @V = ('$A','$B','$C','$D','$E'); + my $i; + + sub XUpdate { + my $ret; + $ret="(\$T=\$W[($i-16)%16]^\$W[($i-14)%16]^\$W[($i-8)%16]^\$W[($i-3)%16],\n\t"; + if ((1<<31)<<1) { + $ret.=" \$W[$i%16]=((\$T<<1)|(\$T>>31))&0xffffffff)\n\t "; + } else { + $ret.=" \$W[$i%16]=(\$T<<1)|((\$T>>31)&1))\n\t "; + } + } + sub tail { + my ($a,$b,$c,$d,$e)=@V; + my $ret; + if ((1<<31)<<1) { + $ret.="(($a<<5)|($a>>27));\n\t"; + $ret.="$b=($b<<30)|($b>>2); $e&=0xffffffff; #$b&=0xffffffff;\n\t"; + } else { + $ret.="(($a<<5)|($a>>27)&0x1f);\n\t"; + $ret.="$b=($b<<30)|($b>>2)&0x3fffffff;\n\t"; + } + $ret; + } + sub BODY_00_15 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=\$W[$i]+0x5a827999+((($c^$d)&$b)^$d)+".tail(); + } + sub BODY_16_19 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=".XUpdate()."+0x5a827999+((($c^$d)&$b)^$d)+".tail(); + } + sub BODY_20_39 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=".XUpdate()."+0x6ed9eba1+($b^$c^$d)+".tail(); + } + sub BODY_40_59 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=".XUpdate()."+0x8f1bbcdc+(($b&$c)|(($b|$c)&$d))+".tail(); + } + sub BODY_60_79 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=".XUpdate()."+0xca62c1d6+($b^$c^$d)+".tail(); + } + + my $sha1_impl = + 'sub block { + my $self = @_[0]; + my @W = unpack("N16",@_[1]); + my ($A,$B,$C,$D,$E,$T) = @{$self->{H}}; + '; + + $sha1_impl.=' + $A &= 0xffffffff; + $B &= 0xffffffff; + ' if ((1<<31)<<1); + + for($i=0;$i<16;$i++){ $sha1_impl.=BODY_00_15(); unshift(@V,pop(@V)); } + for(;$i<20;$i++) { $sha1_impl.=BODY_16_19(); unshift(@V,pop(@V)); } + for(;$i<40;$i++) { $sha1_impl.=BODY_20_39(); unshift(@V,pop(@V)); } + for(;$i<60;$i++) { $sha1_impl.=BODY_40_59(); unshift(@V,pop(@V)); } + for(;$i<80;$i++) { $sha1_impl.=BODY_60_79(); unshift(@V,pop(@V)); } + + $sha1_impl.=' + $self->{H}[0]+=$A; $self->{H}[1]+=$B; $self->{H}[2]+=$C; + $self->{H}[3]+=$D; $self->{H}[4]+=$E; }'; + + #print $sha1_impl,"\n"; + eval($sha1_impl); # generate code + } + + sub Init { + my $class = shift; # multiple instances... + my $self = {}; + + bless $self,$class; + $self->{H} = [0x67452301,0xefcdab89,0x98badcfe,0x10325476,0xc3d2e1f0]; + $self->{N} = 0; + return $self; + } + + sub Update { + my $self = shift; + my $msg; + + foreach $msg (@_) { + my $len = length($msg); + my $num = length($self->{buf}); + my $off = 0; + + $self->{N} += $len; + + if (($num+$len)<64) + { $self->{buf} .= $msg; next; } + elsif ($num) + { $self->{buf} .= substr($msg,0,($off=64-$num)); + $self->block($self->{buf}); + } + + while(($off+64) <= $len) + { $self->block(substr($msg,$off,64)); + $off += 64; + } + + $self->{buf} = substr($msg,$off); + } + return $self; + } + + sub Final { + my $self = shift; + my $num = length($self->{buf}); + + $self->{buf} .= chr(0x80); $num++; + if ($num>56) + { $self->{buf} .= chr(0)x(64-$num); + $self->block($self->{buf}); + $self->{buf}=undef; + $num=0; + } + $self->{buf} .= chr(0)x(56-$num); + $self->{buf} .= pack("N2",($self->{N}>>29)&0x7,$self->{N}<<3); + $self->block($self->{buf}); + + return pack("N*",@{$self->{H}}); + } + + sub Selftest { + my $hash; + + $hash=SHA1->Init()->Update('abc')->Final(); + die "SHA1 test#1" if (unpack("H*",$hash) ne 'a9993e364706816aba3e25717850c26c9cd0d89d'); + + $hash=SHA1->Init()->Update('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq')->Final(); + die "SHA1 test#2" if (unpack("H*",$hash) ne '84983e441c3bd26ebaae4aa1f95129e5e54670f1'); + + #$hash=SHA1->Init()->Update('a'x1000000)->Final(); + #die "SHA1 test#3" if (unpack("H*",$hash) ne '34aa973cd4c4daa4f61eeb2bdbad27316534016f'); + } +} + +{ package HMAC; + + sub Init { + my $class = shift; + my $key = shift; + my $self = {}; + + bless $self,$class; + + if (length($key)>64) { + $key = SHA1->Init()->Update($key)->Final(); + } + $key .= chr(0x00)x(64-length($key)); + + my @ikey = map($_^=0x36,unpack("C*",$key)); + ($self->{hash} = SHA1->Init())->Update(pack("C*",@ikey)); + $self->{okey} = pack("C*",map($_^=0x36^0x5c,@ikey)); + + return $self; + } + + sub Update { + my $self = shift; + $self->{hash}->Update(@_); + return $self; + } + + sub Final { + my $self = shift; + my $ihash = $self->{hash}->Final(); + return SHA1->Init()->Update($self->{okey},$ihash)->Final(); + } + + sub Selftest { + my $hmac; + + $hmac = HMAC->Init('0123456789:;<=>?@ABC')->Update('Sample #2')->Final(); + die "HMAC test" if (unpack("H*",$hmac) ne '0922d3405faa3d194f82a45830737d5cc6c75d24'); + } +} + +1; diff --git a/util/msincore b/util/msincore new file mode 100755 index 0000000000..08f81be8d5 --- /dev/null +++ b/util/msincore @@ -0,0 +1,169 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2012 The OpenSSL Project. +# +# The script embeds fingerprint into Microsoft PE-COFF executable object. + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + +unshift(@INC,$dir); +require "hmac_sha1.pl"; + +###################################################################### +# +# PE-COFF segment table parser by . +# +{ package PECOFF; + use FileHandle; + + sub dup { my %copy=map {$_} @_; return \%copy; } + + sub Load { + my $class = shift; + my $self = {}; + my $FD = FileHandle->new(); # autoclose + my $file = shift; + + bless $self,$class; + + sysopen($FD,$file,0) or die "$!"; + binmode($FD); + + ################################################# + # read IMAGE_DOS_HEADER + # + read($FD,my $mz,64) or die "$!"; + my @dos_header=unpack("a2C58V",$mz); + + $!=42; # signal fipsld to revert to two-step link + die "$file is not PE-COFF image" if (@dos_header[0] ne "MZ"); + + my $e_lfanew=pop(@dos_header); + seek($FD,$e_lfanew,0) or die "$!"; + read($FD,my $magic,4) or die "$!"; + + $!=42; # signal fipsld to revert to two-step link + die "$file is not PE-COFF image" if (unpack("V",$magic)!=0x4550); + + ################################################# + # read and parse COFF header... + # + read($FD,my $coff,20) or die "$!"; + + my %coff_header; + @coff_header{machine,nsects,date,syms_off,nsyms,opt,flags}= + unpack("v2V3v2",$coff); + + my $strings; + my $symsize; + + ################################################# + # load strings table + # + if ($coff_header{syms_off}) { + seek($FD,$coff_header{syms_off}+18*$coff_header{nsyms},0) or die "$!"; + read($FD,$strings,4) or die "$!"; + $symsize = unpack("V",$strings); + read($FD,$strings,$symsize,4) or die "$!"; + } + + ################################################# + # read sections + # + my $i; + + # seek to section headers + seek($FD,$e_lfanew+24+@coff_header{opt},0) or die "$!"; + + for ($i=0;$i<$coff_header{nsects};$i++) { + my %coff_shdr; + my $name; + + read($FD,my $section,40) or die "$!"; + + @coff_shdr{sh_name,sh_vsize,sh_vaddr, + sh_rawsize,sh_offset,sh_relocs,sh_lines, + sh_nrelocls,sh_nlines,sh_flags} = + unpack("a8V6v2V",$section); + + $name = $coff_shdr{sh_name}; + # see if sh_name is an offset in $strings + my ($hi,$lo) = unpack("V2",$name); + if ($hi==0 && $lo<$symsize) { + $name = substr($strings,$lo,64); + } + $name = (split(chr(0),$name))[0]; + $coff_shdr{sh_name} = $name; + + $self->{sections}{$name} = dup(%coff_shdr); + } + + return $self; + } + + sub Lookup { + my $self = shift; + my $name = shift; + return $self->{sections}{$name}; + } +} + +###################################################################### +# +# main() +# +my $legacy_mode; + +if ($#ARGV<0 || ($#ARGV>0 && !($legacy_mode=(@ARGV[0] =~ /^\-(dso|exe)$/)))) { + print STDERR "usage: $0 [-dso|-exe] pe-coff-binary\n"; + exit(1); +} + +$exe = PECOFF->Load(@ARGV[$#ARGV]); + +sysopen(FD,@ARGV[$#ARGV],$legacy_mode?0:2) or die "$!"; # 2 is read/write +binmode(FD); + +sub FIPS_incore_fingerprint { + my $ctx = HMAC->Init("etaonrishdlcupfm"); + my ($beg,$end); + my $sect; + + $sect = $exe->Lookup("fipstx") or die "no fipstx section"; + + seek(FD,$sect->{sh_offset},0) or die "$!"; + read(FD,$blob,$sect->{sh_vsize}) or die "$!"; + + ($beg = index($blob,"SPIFxet_ts_tXtra")) >= 0 + or die "no FIPS_text_startX"; + ($end = rindex($blob,"SPIFxet_ne_t][Xd")) >= 0 + or die "no FIPS_text_endX"; + + $ctx->Update(substr($blob,$beg,$end-$beg)); + + $sect = $exe->Lookup("fipsro") or die "no fipsro section"; + + seek(FD,$sect->{sh_offset},0) or die "$!"; + read(FD,$blob,$sect->{sh_vsize}) or die "$!"; + + ($beg = index($blob,"SPIFdor__atarats",40)) >= 0 + or die "no FIPS_rodata_start"; + ($end = rindex($blob,"SPIFdor__ata[dne")) >= 0 + or die "no FIPS_rodata_end"; + + $ctx->Update(substr($blob,$beg,$end-$beg)); + + return $ctx->Final(); +} + +$fingerprint = FIPS_incore_fingerprint(); + +if ($legacy_mode) { + print unpack("H*",$fingerprint); +} else { + my $sect = $exe->Lookup("fipsro"); + seek(FD,$sect->{sh_offset},0) or die "$!"; + print FD unpack("H*",$fingerprint) or die "$!"; +} + +close (FD); From 88e9264dd25dceb98cfc2cb347eaba1ad0b3d664 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Thu, 7 Jun 2012 17:14:31 +0000 Subject: [PATCH 071/120] automatically make output directory is using minimal script --- fips/fipsalgtest.pl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fips/fipsalgtest.pl b/fips/fipsalgtest.pl index d95a8c324c..2e31335ae9 100644 --- a/fips/fipsalgtest.pl +++ b/fips/fipsalgtest.pl @@ -1021,6 +1021,10 @@ END $out =~ s|/req/(\S+)\.req|/$rspdir/$1.rsp|; my $outdir = $out; $outdir =~ s|/[^/]*$||; + if ( !-d $outdir && ($outfile eq "" || $minimal_script)) { + print STDERR "DEBUG: Creating directory $outdir\n" if $debug; + mkdir($outdir) || die "Can't create directory $outdir"; + } if ($outfile ne "") { if ($win32) { $outdir =~ tr|/|\\|; @@ -1043,9 +1047,6 @@ END } $lastdir = $outdir; } - } elsif ( !-d $outdir ) { - print STDERR "DEBUG: Creating directory $outdir\n" if $debug; - mkdir($outdir) || die "Can't create directory $outdir"; } } my $cmd = "$tcmd \"$sfprefix$req\" \"$sfprefix$out\""; From ea11fc17cf6842eb0b2a95f3ea4839201e133f03 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Thu, 4 Oct 2012 14:27:39 +0000 Subject: [PATCH 081/120] Add support for Windows CE and C64+ to FIPS module. (from fips2-stable) --- Configure | 4 +- Makefile.fips | 2 +- c6x/do_fips | 7 + c6x/env | 7 + c6x/fips_standalone_sha1 | 32 + c6x/fipscanister.cmd | 19 + c6x/hmac_sha1.pl | 196 ++++ c6x/incore6x | 241 +++++ c6x/run6x | 43 + crypto/aes/asm/aes-c64xplus.pl | 1329 ++++++++++++++++++++++++++++ crypto/bn/asm/bn-c64xplus.asm | 333 +++++++ crypto/bn/asm/c64xplus-gf2m.pl | 146 +++ crypto/bn/bn_nist.c | 4 + crypto/c64xpluscpuid.pl | 246 +++++ crypto/cmac/cmac.c | 3 +- crypto/des/spr.h | 3 + crypto/modes/asm/ghash-c64xplus.pl | 231 +++++ crypto/modes/gcm128.c | 6 + crypto/sha/asm/sha1-c64xplus.pl | 323 +++++++ crypto/sha/asm/sha256-c64xplus.pl | 292 ++++++ crypto/sha/asm/sha512-c64xplus.pl | 410 +++++++++ crypto/uid.c | 2 +- e_os.h | 2 +- fips/aes/fips_aesavs.c | 16 +- fips/aes/fips_gcmtest.c | 9 +- fips/dsa/fips_dsatest.c | 2 + fips/fips.c | 8 +- fips/fips_canister.c | 9 + fips/fips_premain.c | 6 + fips/fips_premain.c.sha1 | 2 +- fips/fipssyms.h | 1 + fips/rand/fips_rand.c | 14 +- ms/do_fips.bat | 29 +- test/fips_algvs.c | 29 +- util/fips_standalone_sha1 | 32 + util/fipsas.pl | 52 +- util/fipsdist.pl | 2 +- util/mk1mf.pl | 12 +- util/pl/TI_CGTOOLS.pl | 274 ++++++ util/pl/VC-32.pl | 9 +- 40 files changed, 4304 insertions(+), 83 deletions(-) create mode 100644 c6x/do_fips create mode 100644 c6x/env create mode 100644 c6x/fips_standalone_sha1 create mode 100644 c6x/fipscanister.cmd create mode 100644 c6x/hmac_sha1.pl create mode 100644 c6x/incore6x create mode 100644 c6x/run6x create mode 100644 crypto/aes/asm/aes-c64xplus.pl create mode 100644 crypto/bn/asm/bn-c64xplus.asm create mode 100644 crypto/bn/asm/c64xplus-gf2m.pl create mode 100644 crypto/c64xpluscpuid.pl create mode 100644 crypto/modes/asm/ghash-c64xplus.pl create mode 100644 crypto/sha/asm/sha1-c64xplus.pl create mode 100644 crypto/sha/asm/sha256-c64xplus.pl create mode 100644 crypto/sha/asm/sha512-c64xplus.pl create mode 100644 util/fips_standalone_sha1 create mode 100644 util/pl/TI_CGTOOLS.pl diff --git a/Configure b/Configure index f93a9ee280..90b108baaa 100755 --- a/Configure +++ b/Configure @@ -610,12 +610,14 @@ my %table=( "uClinux-dist","$ENV{'CC'}:\$(CFLAGS)::-D_REENTRANT::\$(LDFLAGS) \$(LDLIBS):BN_LLONG:${no_asm}:$ENV{'LIBSSL_dlfcn'}:linux-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):$ENV{'RANLIB'}::", "uClinux-dist64","$ENV{'CC'}:\$(CFLAGS)::-D_REENTRANT::\$(LDFLAGS) \$(LDLIBS):SIXTY_FOUR_BIT_LONG:${no_asm}:$ENV{'LIBSSL_dlfcn'}:linux-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):$ENV{'RANLIB'}::", +"c64xplus","cl6x:-mv6400+ -o2 -ox -ms -pden -DNO_SYS_TYPES_H -DGETPID_IS_MEANINGLESS -DMD32_REG_T=int -DOPENSSL_SMALL_FOOTPRINT:::DSPBIOS::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:", + ); my @MK1MF_Builds=qw(VC-WIN64I VC-WIN64A debug-VC-WIN64I debug-VC-WIN64A VC-NT VC-CE VC-WIN32 debug-VC-WIN32 - BC-32 + BC-32 c64xplus netware-clib netware-clib-bsdsock netware-libc netware-libc-bsdsock); diff --git a/Makefile.fips b/Makefile.fips index 157e901099..74db06574b 100644 --- a/Makefile.fips +++ b/Makefile.fips @@ -186,7 +186,7 @@ SHARED_LDFLAGS= GENERAL= Makefile BASENAME= openssl NAME= $(BASENAME)-$(VERSION) -TARFILE= openssl-fips-2.0-test.tar +TARFILE= openssl-fips-2.0.tar WTARFILE= $(NAME)-win.tar EXHEADER= e_os2.h HEADER= e_os.h diff --git a/c6x/do_fips b/c6x/do_fips new file mode 100644 index 0000000000..c1c29fcf83 --- /dev/null +++ b/c6x/do_fips @@ -0,0 +1,7 @@ +#!/bin/sh + +perl Configure c64xplus fipscanisteronly no-engine +perl util/mkfiles.pl > MINFO +perl util/mk1mf.pl auto > c6x/fips.mak +make -f c6x/fips.mak +make -f c6x/fips_algvs.mak diff --git a/c6x/env b/c6x/env new file mode 100644 index 0000000000..543d33081e --- /dev/null +++ b/c6x/env @@ -0,0 +1,7 @@ +# MSYS-style PATH +export PATH=/c/CCStudio_v3.3/c6000/cgtools/bin:/c/Program\ Files/ActivePerl58/bin:$PATH + +# Windows-style variables +export C6X_C_DIR='C:\CCStudio_v3.3\c6000\cgtools\include;C:\CCStudio_v3.3\c6000\cgtools\lib' + +export PERL5LIB=C:/CCStudio_v3.3/bin/utilities/ccs_scripting diff --git a/c6x/fips_standalone_sha1 b/c6x/fips_standalone_sha1 new file mode 100644 index 0000000000..ea2268cb4e --- /dev/null +++ b/c6x/fips_standalone_sha1 @@ -0,0 +1,32 @@ +#!/usr/bin/env perl +# +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + +unshift(@INC,$dir); +require "hmac_sha1.pl"; + +(!@ARV[0] && -f @ARGV[$#ARGV]) || die "usage: $0 [-verify] file"; + +$verify=shift if (@ARGV[0] eq "-verify"); + +sysopen(FD,@ARGV[0],0) || die "$!"; +binmode(FD); + +my $ctx = HMAC->Init("etaonrishdlcupfm"); + +while (read(FD,$blob,4*1024)) { $ctx->Update($blob); } + +close(FD); + +my $signature = unpack("H*",$ctx->Final()); + +print "HMAC-SHA1(@ARGV[0])= $signature\n"; + +if ($verify) { + open(FD,"<@ARGV[0].sha1") || die "$!"; + $line = ; + close(FD); + exit(0) if ($line =~ /HMAC\-SHA1\([^\)]*\)=\s*([0-9a-f]+)/i && + $1 eq $signature); + die "signature mismatch"; +} diff --git a/c6x/fipscanister.cmd b/c6x/fipscanister.cmd new file mode 100644 index 0000000000..a06ee15cb3 --- /dev/null +++ b/c6x/fipscanister.cmd @@ -0,0 +1,19 @@ +SECTIONS +{ + .text: + { + *(.fips_text:start) + *(.text) + *(.const:aes_asm) + *(.const:sha_asm) + *(.const:des_sptrans) + *(.switch) + *(.fips_text:end) + } + .const: + { + *(.fips_const:start) + *(.const) + *(.fips_const:end) + } +} diff --git a/c6x/hmac_sha1.pl b/c6x/hmac_sha1.pl new file mode 100644 index 0000000000..494f7e8569 --- /dev/null +++ b/c6x/hmac_sha1.pl @@ -0,0 +1,196 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2011 The OpenSSL Project. +# +###################################################################### +# +# SHA1 and HMAC in Perl by . +# +{ package SHA1; + use integer; + + { + ################################### SHA1 block code generator + my @V = ('$A','$B','$C','$D','$E'); + my $i; + + sub XUpdate { + my $ret; + $ret="(\$T=\$W[($i-16)%16]^\$W[($i-14)%16]^\$W[($i-8)%16]^\$W[($i-3)%16],\n\t"; + if ((1<<31)<<1) { + $ret.=" \$W[$i%16]=((\$T<<1)|(\$T>>31))&0xffffffff)\n\t "; + } else { + $ret.=" \$W[$i%16]=(\$T<<1)|((\$T>>31)&1))\n\t "; + } + } + sub tail { + my ($a,$b,$c,$d,$e)=@V; + my $ret; + if ((1<<31)<<1) { + $ret.="(($a<<5)|($a>>27));\n\t"; + $ret.="$b=($b<<30)|($b>>2); $e&=0xffffffff; #$b&=0xffffffff;\n\t"; + } else { + $ret.="(($a<<5)|($a>>27)&0x1f);\n\t"; + $ret.="$b=($b<<30)|($b>>2)&0x3fffffff;\n\t"; + } + $ret; + } + sub BODY_00_15 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=\$W[$i]+0x5a827999+((($c^$d)&$b)^$d)+".tail(); + } + sub BODY_16_19 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=".XUpdate()."+0x5a827999+((($c^$d)&$b)^$d)+".tail(); + } + sub BODY_20_39 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=".XUpdate()."+0x6ed9eba1+($b^$c^$d)+".tail(); + } + sub BODY_40_59 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=".XUpdate()."+0x8f1bbcdc+(($b&$c)|(($b|$c)&$d))+".tail(); + } + sub BODY_60_79 { + my ($a,$b,$c,$d,$e)=@V; + "$e+=".XUpdate()."+0xca62c1d6+($b^$c^$d)+".tail(); + } + + my $sha1_impl = + 'sub block { + my $self = @_[0]; + my @W = unpack("N16",@_[1]); + my ($A,$B,$C,$D,$E,$T) = @{$self->{H}}; + '; + + $sha1_impl.=' + $A &= 0xffffffff; + $B &= 0xffffffff; + ' if ((1<<31)<<1); + + for($i=0;$i<16;$i++){ $sha1_impl.=BODY_00_15(); unshift(@V,pop(@V)); } + for(;$i<20;$i++) { $sha1_impl.=BODY_16_19(); unshift(@V,pop(@V)); } + for(;$i<40;$i++) { $sha1_impl.=BODY_20_39(); unshift(@V,pop(@V)); } + for(;$i<60;$i++) { $sha1_impl.=BODY_40_59(); unshift(@V,pop(@V)); } + for(;$i<80;$i++) { $sha1_impl.=BODY_60_79(); unshift(@V,pop(@V)); } + + $sha1_impl.=' + $self->{H}[0]+=$A; $self->{H}[1]+=$B; $self->{H}[2]+=$C; + $self->{H}[3]+=$D; $self->{H}[4]+=$E; }'; + + #print $sha1_impl,"\n"; + eval($sha1_impl); # generate code + } + + sub Init { + my $class = shift; # multiple instances... + my $self = {}; + + bless $self,$class; + $self->{H} = [0x67452301,0xefcdab89,0x98badcfe,0x10325476,0xc3d2e1f0]; + $self->{N} = 0; + return $self; + } + + sub Update { + my $self = shift; + my $msg; + + foreach $msg (@_) { + my $len = length($msg); + my $num = length($self->{buf}); + my $off = 0; + + $self->{N} += $len; + + if (($num+$len)<64) + { $self->{buf} .= $msg; next; } + elsif ($num) + { $self->{buf} .= substr($msg,0,($off=64-$num)); + $self->block($self->{buf}); + } + + while(($off+64) <= $len) + { $self->block(substr($msg,$off,64)); + $off += 64; + } + + $self->{buf} = substr($msg,$off); + } + return $self; + } + + sub Final { + my $self = shift; + my $num = length($self->{buf}); + + $self->{buf} .= chr(0x80); $num++; + if ($num>56) + { $self->{buf} .= chr(0)x(64-$num); + $self->block($self->{buf}); + $self->{buf}=undef; + $num=0; + } + $self->{buf} .= chr(0)x(56-$num); + $self->{buf} .= pack("N2",($self->{N}>>29)&0x7,$self->{N}<<3); + $self->block($self->{buf}); + + return pack("N*",@{$self->{H}}); + } + + sub Selftest { + my $hash; + + $hash=SHA1->Init()->Update('abc')->Final(); + die "SHA1 test#1" if (unpack("H*",$hash) ne 'a9993e364706816aba3e25717850c26c9cd0d89d'); + + $hash=SHA1->Init()->Update('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq')->Final(); + die "SHA1 test#2" if (unpack("H*",$hash) ne '84983e441c3bd26ebaae4aa1f95129e5e54670f1'); + + #$hash=SHA1->Init()->Update('a'x1000000)->Final(); + #die "SHA1 test#3" if (unpack("H*",$hash) ne '34aa973cd4c4daa4f61eeb2bdbad27316534016f'); + } +} + +{ package HMAC; + + sub Init { + my $class = shift; + my $key = shift; + my $self = {}; + + bless $self,$class; + + if (length($key)>64) { + $key = SHA1->Init()->Update($key)->Final(); + } + $key .= chr(0x00)x(64-length($key)); + + my @ikey = map($_^=0x36,unpack("C*",$key)); + ($self->{hash} = SHA1->Init())->Update(pack("C*",@ikey)); + $self->{okey} = pack("C*",map($_^=0x36^0x5c,@ikey)); + + return $self; + } + + sub Update { + my $self = shift; + $self->{hash}->Update(@_); + return $self; + } + + sub Final { + my $self = shift; + my $ihash = $self->{hash}->Final(); + return SHA1->Init()->Update($self->{okey},$ihash)->Final(); + } + + sub Selftest { + my $hmac; + + $hmac = HMAC->Init('0123456789:;<=>?@ABC')->Update('Sample #2')->Final(); + die "HMAC test" if (unpack("H*",$hmac) ne '0922d3405faa3d194f82a45830737d5cc6c75d24'); + } +} + +1; diff --git a/c6x/incore6x b/c6x/incore6x new file mode 100644 index 0000000000..be73aca2d9 --- /dev/null +++ b/c6x/incore6x @@ -0,0 +1,241 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2011 The OpenSSL Project. +# +# The script embeds fingerprint into TI-COFF executable object. + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + +unshift(@INC,$dir); +require "hmac_sha1.pl"; + +###################################################################### +# +# COFF symbol table parser by . The table entries +# are extended with offset within executable file... +# +{ package COFF; + use FileHandle; + + sub dup { my %copy=map {$_} @_; return \%copy; } + + sub Load { + my $class = shift; + my $self = {}; + my $FD = FileHandle->new(); # autoclose + + bless $self,$class; + + sysopen($FD,shift,0) or die "$!"; + binmode($FD); + + ################################################# + # read and parse COFF header... + # + read($FD,my $coff,22) or die "$!"; + + my %coff_header; + @coff_header{version,nsects,date,syms_off,nsyms,opt,flags,magic}= + unpack("v2V3v3",$coff); + + $!=42; # signal fipsld to revert to two-step link + die "not TI-COFF file" if ($coff_header{version} != 0xC2); + + my $big_endian = ($coff_header{flags}>>9)&1; # 0 or 1 + + my $strings; + my $symsize; + + ################################################# + # load strings table + # + seek($FD,$coff_header{syms_off}+18*$coff_header{nsyms},0) or die "$!"; + read($FD,$strings,4) or die "$!"; + $symsize = unpack("V",$strings); + read($FD,$strings,$symsize,4) or die "$!"; + + ################################################# + # read sections + # + my $i; + my @sections; + + # seek to section headers + seek($FD,22+@coff_header{opt},0) or die "$!"; + for ($i=0;$i<$coff_header{nsects};$i++) { + my %coff_shdr; + my $name; + + read($FD,my $section,48) or die "$!"; + + @coff_shdr{sh_name,sh_phaddr,sh_vaddr, + sh_size,sh_offset,sh_relocs,sh_reserved, + sh_relocoff,sh_lines,sh_flags} = + unpack("a8V9",$section); + + $name = $coff_shdr{sh_name}; + # see if sh_name is a an offset in $strings + my ($hi,$lo) = unpack("V2",$name); + if ($hi==0 && $lo<$symsize) { + $name = substr($strings,$lo,64); + } + $coff_shdr{sh_name} = (split(chr(0),$name))[0]; + + push(@sections,dup(%coff_shdr)); + } + + ################################################# + # load symbols table + # + seek($FD,$coff_header{syms_off},0) or die "$!"; + for ($i=0;$i<$coff_header{nsyms};$i++) { + my %coff_sym; + my $name; + + read($FD,my $blob,18) or die "$!"; + + @coff_sym{st_name,st_value,st_shndx,reserved,class,aux} = + unpack("a8Vv2C2",$blob); + + # skip aux entries + if ($coff_sym{aux}) { + seek($FD,18*$coff_sym{aux},1) or die "$!"; + $i+=$coff_sym{aux}; + } + + $name = $coff_sym{st_name}; + # see if st_name is a an offset in $strings + my ($hi,$lo) = unpack("V2",$name); + if ($hi==0 && $lo<$symsize) { + $name = substr($strings,$lo,64); + } + $coff_sym{st_name} = $name = (split(chr(0),$name))[0]; + + my $st_secn = $coff_sym{st_shndx}-1; + if ($st_secn>=0 && $st_secn<=$#sections + && @sections[$st_secn]->{sh_offset} + && $name =~ m/^_[a-z]+/i) { + # synthesize st_offset, ... + $coff_sym{st_offset} = $coff_sym{st_value} + - @sections[$st_secn]->{sh_vaddr} + + @sections[$st_secn]->{sh_offset}; + $coff_sym{st_section} = @sections[$st_secn]->{sh_name}; + # ... and add to lookup table + $self->{symbols}{$name} = dup(%coff_sym); + } + } + + return $self; + } + + sub Lookup { + my $self = shift; + my $name = shift; + return $self->{symbols}{"_$name"}; + } + + sub Traverse { + my $self = shift; + my $code = shift; + + if (ref($code) eq 'CODE') { + for (keys(%{$self->{symbols}})) { &$code($self->{symbols}{$_}); } + } + } +} + +###################################################################### +# +# main() +# +my $legacy_mode; + +if ($#ARGV<0 || ($#ARGV>0 && !($legacy_mode=(@ARGV[0] =~ /^\-(dso|exe)$/)))) { + print STDERR "usage: $0 [-dso|-exe] ti-coff-binary\n"; + exit(1); +} + +$exe = COFF->Load(@ARGV[$#ARGV]); + +$FIPS_text_start = $exe->Lookup("FIPS_text_start") or die; +$FIPS_text_end = $exe->Lookup("FIPS_text_end") or die; +$FIPS_rodata_start = $exe->Lookup("FIPS_rodata_start") or die; +$FIPS_rodata_end = $exe->Lookup("FIPS_rodata_end") or die; +$FIPS_signature = $exe->Lookup("FIPS_signature") or die; + +# new cross-compile support +$FIPS_text_startX = $exe->Lookup("FIPS_text_startX"); +$FIPS_text_endX = $exe->Lookup("FIPS_text_endX"); + +if (!$legacy_mode) { + if (!$FIPS_text_startX || !$FIPS_text_endX) { + print STDERR "@ARGV[$#ARGV] is not cross-compiler aware.\n"; + exit(42); # signal fipsld to revert to two-step link + } + + $FINGERPRINT_ascii_value + = $exe->Lookup("FINGERPRINT_ascii_value"); +} +if ($FIPS_text_startX && $FIPS_text_endX) { + $FIPS_text_start = $FIPS_text_startX; + $FIPS_text_end = $FIPS_text_endX; +} + +sysopen(FD,@ARGV[$#ARGV],$legacy_mode?0:2) or die "$!"; # 2 is read/write +binmode(FD); + +sub HMAC_Update { + my ($hmac,$off,$len) = @_; + my $blob; + + seek(FD,$off,0) or die "$!"; + read(FD,$blob,$len) or die "$!"; + $$hmac->Update($blob); +} + +# fips/fips.c:FIPS_incore_fingerprint's Perl twin +# +sub FIPS_incore_fingerprint { + my $p1 = $FIPS_text_start->{st_offset}; + my $p2 = $FIPS_text_end->{st_offset}; + my $p3 = $FIPS_rodata_start->{st_offset}; + my $p4 = $FIPS_rodata_end->{st_offset}; + my $sig = $FIPS_signature->{st_offset}; + my $ctx = HMAC->Init("etaonrishdlcupfm"); + + # detect overlapping regions + if ($p1<=$p3 && $p2>=$p3) { + $p3 = $p1; $p4 = $p2>$p4?$p2:$p4; $p1 = 0; $p2 = 0; + } elsif ($p3<=$p1 && $p4>=$p1) { + $p3 = $p3; $p4 = $p2>$p4?$p2:$p4; $p1 = 0; $p2 = 0; + } + + if ($p1) { + HMAC_Update (\$ctx,$p1,$p2-$p1); + } + + if ($sig>=$p3 && $sig<$p4) { + # "punch" hole + HMAC_Update(\$ctx,$p3,$sig-$p3); + $p3 = $sig+20; + HMAC_Update(\$ctx,$p3,$p4-$p3); + } else { + HMAC_Update(\$ctx,$p3,$p4-$p3); + } + + return $ctx->Final(); +} + +$fingerprint = FIPS_incore_fingerprint(); + +if ($legacy_mode) { + print unpack("H*",$fingerprint); +} elsif ($FINGERPRINT_ascii_value) { + seek(FD,$FINGERPRINT_ascii_value->{st_offset},0) or die "$!"; + print FD unpack("H*",$fingerprint) or die "$!"; +} else { + seek(FD,$FIPS_signature->{st_offset},0) or die "$!"; + print FD $fingerprint or die "$!"; +} + +close (FD); diff --git a/c6x/run6x b/c6x/run6x new file mode 100644 index 0000000000..aecfabeb04 --- /dev/null +++ b/c6x/run6x @@ -0,0 +1,43 @@ +#!/usr/bin/env perl + +$exe = @ARGV[0]; +$exe .= ".out" if (! -f $exe); +die if (! -f $exe); + +use CCS_SCRIPTING_PERL; + +my $studio=new CCS_SCRIPTING_PERL::CCS_Scripting(); + +$studio->CCSOpenNamed("*","*",1); # connect to board +$studio->TargetReset(); + +print "loading $exe\n"; +$studio->ProgramLoad($exe); + +sub write_string { + my ($studio,$addr,$str) = @_; + my $len = length($str); + my $i; + + for ($i=0; $i<$len; $i++) { + $studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr+$i,8,vec($str,$i,8)); + } + $studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr+$i,8,0); + + return $i+1; +} + +$addr= $studio->SymbolGetAddress("__c_args"); +printf "setting up __c_args at 0x%X\n",$addr;#\n"; + +$studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr,32,$#ARGV+1); + +for ($i=0,$strings=$addr+($#ARGV+3)*4; $i<=$#ARGV; $i++) { + $off = write_string($studio,$strings,@ARGV[$i]); + $studio->MemoryWrite($CCS_SCRIPTING_PERL::PAGE_DATA,$addr+4*($i+1),32,$strings); + $strings += $off; +} +$studio->MemoryWrite($SCC_SCRIPTING_PERL::PAGE_DATA,$addr+4*($i+1),32,0); + +print "running...\n"; +$studio->TargetRun(); diff --git a/crypto/aes/asm/aes-c64xplus.pl b/crypto/aes/asm/aes-c64xplus.pl new file mode 100644 index 0000000000..206d7dce88 --- /dev/null +++ b/crypto/aes/asm/aes-c64xplus.pl @@ -0,0 +1,1329 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# [Endian-neutral] AES for C64x+. +# +# Even though SPLOOPs are scheduled for 13 cycles, and thus expected +# performance is ~8.5 cycles per byte processed with 128-bit key, +# measured performance turned to be ~10 cycles per byte. Discrepancy +# must be caused by limitations of L1D memory banking(*), see SPRU871 +# TI publication for further details. If any consolation it's still +# ~20% faster than TI's linear assembly module anyway... Compared to +# aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this +# code is 3.75x faster and almost 3x smaller (tables included). +# +# (*) This means that there might be subtle correlation between data +# and timing and one can wonder if it can be ... attacked:-( +# On the other hand this also means that *if* one chooses to +# implement *4* T-tables variant [instead of 1 T-table as in +# this implementation, or in addition to], then one ought to +# *interleave* them. Even though it complicates addressing, +# references to interleaved tables would be guaranteed not to +# clash. I reckon that it should be possible to break 8 cycles +# per byte "barrier," i.e. improve by ~20%, naturally at the +# cost of 8x increased pressure on L1D. 8x because you'd have +# to interleave both Te and Td tables... + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($TEA,$TEB)=("A5","B5"); +($KPA,$KPB)=("A3","B1"); +@K=("A6","B6","A7","B7"); +@s=("A8","B8","A9","B9"); +@Te0=@Td0=("A16","B16","A17","B17"); +@Te1=@Td1=("A18","B18","A19","B19"); +@Te2=@Td2=("A20","B20","A21","B21"); +@Te3=@Td3=("A22","B22","A23","B23"); + +$code=<<___; + .text + + .asg B3,RA + .asg A4,INP + .asg B4,OUT + .asg A6,KEY + .asg A4,RET + .asg B15,SP + + .eval 24,EXT0 + .eval 16,EXT1 + .eval 8,EXT2 + .eval 0,EXT3 + .eval 8,TBL1 + .eval 16,TBL2 + .eval 24,TBL3 + + .if .BIG_ENDIAN + .eval 24-EXT0,EXT0 + .eval 24-EXT1,EXT1 + .eval 24-EXT2,EXT2 + .eval 24-EXT3,EXT3 + .eval 32-TBL1,TBL1 + .eval 32-TBL2,TBL2 + .eval 32-TBL3,TBL3 + .endif + + .global _AES_encrypt +_AES_encrypt: + .asmfunc + MVK 1,B2 +__encrypt: + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL (AES_Te-_AES_encrypt),$TEA +|| ADDKPC _AES_encrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH (AES_Te-_AES_encrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + LDW *$KPA++[2],$Te0[0] ; zero round key +|| LDW *$KPB++[2],$Te0[1] +|| MVK 60,A0 +|| ADD B0,$TEA,$TEA ; AES_Te + LDW *KEY[A0],B0 ; rounds +|| MVK 1024,A0 ; sizeof(AES_Te) + LDW *$KPA++[2],$Te0[2] +|| LDW *$KPB++[2],$Te0[3] +|| MV $TEA,$TEB + NOP + .if .BIG_ENDIAN + MV A9,$s[0] +|| MV A8,$s[1] +|| MV B9,$s[2] +|| MV B8,$s[3] + .else + MV A8,$s[0] +|| MV A9,$s[1] +|| MV B8,$s[2] +|| MV B9,$s[3] + .endif + XOR $Te0[0],$s[0],$s[0] +|| XOR $Te0[1],$s[1],$s[1] +|| LDW *$KPA++[2],$K[0] ; 1st round key +|| LDW *$KPB++[2],$K[1] + SUB B0,2,B0 + + SPLOOPD 13 +|| MVC B0,ILC +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] +;;==================================================================== + EXTU $s[1],EXT1,24,$Te1[1] +|| EXTU $s[0],EXT3,24,$Te3[0] + LDW *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0 +|| LDW *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1 +|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled +|| EXTU $s[1],EXT3,24,$Te3[1] +|| EXTU $s[0],EXT1,24,$Te1[0] + LDW *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2 +|| LDW *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3 +|| EXTU $s[2],EXT2,24,$Te2[2] +|| EXTU $s[3],EXT2,24,$Te2[3] + LDW *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0 +|| LDW *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1 +|| EXTU $s[3],EXT3,24,$Te3[3] +|| EXTU $s[2],EXT1,24,$Te1[2] + LDW *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0 +|| LDW *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1 +|| EXTU $s[0],EXT2,24,$Te2[0] +|| EXTU $s[1],EXT2,24,$Te2[1] + LDW *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2 +|| LDW *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3 +|| EXTU $s[3],EXT1,24,$Te1[3] +|| EXTU $s[2],EXT3,24,$Te3[2] + LDW *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2 +|| LDW *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3 +|| ROTL $Te1[1],TBL1,$Te3[0] ; t0 +|| ROTL $Te3[0],TBL3,$Te1[1] ; t1 +|| EXTU $s[0],EXT0,24,$Te0[0] +|| EXTU $s[1],EXT0,24,$Te0[1] + LDW *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0 +|| LDW *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1 +|| ROTL $Te3[1],TBL3,$Te1[0] ; t2 +|| ROTL $Te1[0],TBL1,$Te3[1] ; t3 +|| EXTU $s[2],EXT0,24,$Te0[2] +|| EXTU $s[3],EXT0,24,$Te0[3] + LDW *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2 +|| LDW *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3 +|| ROTL $Te2[2],TBL2,$Te2[2] ; t0 +|| ROTL $Te2[3],TBL2,$Te2[3] ; t1 +|| XOR $K[0],$Te3[0],$s[0] +|| XOR $K[1],$Te1[1],$s[1] + ROTL $Te3[3],TBL3,$Te1[2] ; t0 +|| ROTL $Te1[2],TBL1,$Te3[3] ; t1 +|| XOR $K[2],$Te1[0],$s[2] +|| XOR $K[3],$Te3[1],$s[3] +|| LDW *$KPA++[2],$K[0] ; next round key +|| LDW *$KPB++[2],$K[1] + ROTL $Te2[0],TBL2,$Te2[0] ; t2 +|| ROTL $Te2[1],TBL2,$Te2[1] ; t3 +|| XOR $s[0],$Te2[2],$s[0] +|| XOR $s[1],$Te2[3],$s[1] +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] + ROTL $Te1[3],TBL1,$Te3[2] ; t2 +|| ROTL $Te3[2],TBL3,$Te1[3] ; t3 +|| XOR $s[0],$Te1[2],$s[0] +|| XOR $s[1],$Te3[3],$s[1] + XOR $s[2],$Te2[0],$s[2] +|| XOR $s[3],$Te2[1],$s[3] +|| XOR $s[0],$Te0[0],$s[0] +|| XOR $s[1],$Te0[1],$s[1] + SPKERNEL +|| XOR.L $s[2],$Te3[2],$s[2] +|| XOR.L $s[3],$Te1[3],$s[3] +;;==================================================================== + ADD.D ${TEA},A0,${TEA} ; point to Te4 +|| ADD.D ${TEB},A0,${TEB} +|| EXTU $s[1],EXT1,24,$Te1[1] +|| EXTU $s[0],EXT3,24,$Te3[0] + LDBU *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0 +|| LDBU *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1 +|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled +|| EXTU $s[0],EXT0,24,$Te0[0] +|| EXTU $s[1],EXT0,24,$Te0[1] + LDBU *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0 +|| LDBU *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1 +|| EXTU $s[3],EXT3,24,$Te3[3] +|| EXTU $s[2],EXT1,24,$Te1[2] + LDBU *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0 +|| LDBU *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1 +|| EXTU $s[2],EXT2,24,$Te2[2] +|| EXTU $s[3],EXT2,24,$Te2[3] + LDBU *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0 +|| LDBU *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1 +|| EXTU $s[1],EXT3,24,$Te3[1] +|| EXTU $s[0],EXT1,24,$Te1[0] + LDBU *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2 +|| LDBU *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3 +|| EXTU $s[3],EXT1,24,$Te1[3] +|| EXTU $s[2],EXT3,24,$Te3[2] + LDBU *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2 +|| LDBU *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3 +|| EXTU $s[2],EXT0,24,$Te0[2] +|| EXTU $s[3],EXT0,24,$Te0[3] + LDBU *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2 +|| LDBU *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3 +|| EXTU $s[0],EXT2,24,$Te2[0] +|| EXTU $s[1],EXT2,24,$Te2[1] + LDBU *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2 +|| LDBU *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3 + + .if .BIG_ENDIAN + PACK2 $Te0[0],$Te1[1],$Te0[0] +|| PACK2 $Te0[1],$Te1[2],$Te0[1] + PACK2 $Te2[2],$Te3[3],$Te2[2] +|| PACK2 $Te2[3],$Te3[0],$Te2[3] + PACKL4 $Te0[0],$Te2[2],$Te0[0] +|| PACKL4 $Te0[1],$Te2[3],$Te0[1] + XOR $K[0],$Te0[0],$Te0[0] ; s[0] +|| XOR $K[1],$Te0[1],$Te0[1] ; s[1] + + PACK2 $Te0[2],$Te1[3],$Te0[2] +|| PACK2 $Te0[3],$Te1[0],$Te0[3] + PACK2 $Te2[0],$Te3[1],$Te2[0] +|| PACK2 $Te2[1],$Te3[2],$Te2[1] +|| BNOP RA + PACKL4 $Te0[2],$Te2[0],$Te0[2] +|| PACKL4 $Te0[3],$Te2[1],$Te0[3] + XOR $K[2],$Te0[2],$Te0[2] ; s[2] +|| XOR $K[3],$Te0[3],$Te0[3] ; s[3] + + MV $Te0[0],A9 +|| MV $Te0[1],A8 + MV $Te0[2],B9 +|| MV $Te0[3],B8 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .else + PACK2 $Te1[1],$Te0[0],$Te1[1] +|| PACK2 $Te1[2],$Te0[1],$Te1[2] + PACK2 $Te3[3],$Te2[2],$Te3[3] +|| PACK2 $Te3[0],$Te2[3],$Te3[0] + PACKL4 $Te3[3],$Te1[1],$Te1[1] +|| PACKL4 $Te3[0],$Te1[2],$Te1[2] + XOR $K[0],$Te1[1],$Te1[1] ; s[0] +|| XOR $K[1],$Te1[2],$Te1[2] ; s[1] + + PACK2 $Te1[3],$Te0[2],$Te1[3] +|| PACK2 $Te1[0],$Te0[3],$Te1[0] + PACK2 $Te3[1],$Te2[0],$Te3[1] +|| PACK2 $Te3[2],$Te2[1],$Te3[2] +|| BNOP RA + PACKL4 $Te3[1],$Te1[3],$Te1[3] +|| PACKL4 $Te3[2],$Te1[0],$Te1[0] + XOR $K[2],$Te1[3],$Te1[3] ; s[2] +|| XOR $K[3],$Te1[0],$Te1[0] ; s[3] + + MV $Te1[1],A8 +|| MV $Te1[2],A9 + MV $Te1[3],B8 +|| MV $Te1[0],B9 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .endif + .endasmfunc + + .global _AES_decrypt +_AES_decrypt: + .asmfunc + MVK 1,B2 +__decrypt: + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL (AES_Td-_AES_decrypt),$TEA +|| ADDKPC _AES_decrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH (AES_Td-_AES_decrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + LDW *$KPA++[2],$Td0[0] ; zero round key +|| LDW *$KPB++[2],$Td0[1] +|| MVK 60,A0 +|| ADD B0,$TEA,$TEA ; AES_Td + LDW *KEY[A0],B0 ; rounds +|| MVK 1024,A0 ; sizeof(AES_Td) + LDW *$KPA++[2],$Td0[2] +|| LDW *$KPB++[2],$Td0[3] +|| MV $TEA,$TEB + NOP + .if .BIG_ENDIAN + MV A9,$s[0] +|| MV A8,$s[1] +|| MV B9,$s[2] +|| MV B8,$s[3] + .else + MV A8,$s[0] +|| MV A9,$s[1] +|| MV B8,$s[2] +|| MV B9,$s[3] + .endif + XOR $Td0[0],$s[0],$s[0] +|| XOR $Td0[1],$s[1],$s[1] +|| LDW *$KPA++[2],$K[0] ; 1st round key +|| LDW *$KPB++[2],$K[1] + SUB B0,2,B0 + + SPLOOPD 13 +|| MVC B0,ILC +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] +;;==================================================================== + EXTU $s[1],EXT3,24,$Td3[1] +|| EXTU $s[0],EXT1,24,$Td1[0] + LDW *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0 +|| LDW *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1 +|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled +|| EXTU $s[1],EXT1,24,$Td1[1] +|| EXTU $s[0],EXT3,24,$Td3[0] + LDW *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2 +|| LDW *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3 +|| EXTU $s[2],EXT2,24,$Td2[2] +|| EXTU $s[3],EXT2,24,$Td2[3] + LDW *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0 +|| LDW *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1 +|| EXTU $s[3],EXT1,24,$Td1[3] +|| EXTU $s[2],EXT3,24,$Td3[2] + LDW *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0 +|| LDW *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1 +|| EXTU $s[0],EXT2,24,$Td2[0] +|| EXTU $s[1],EXT2,24,$Td2[1] + LDW *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2 +|| LDW *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3 +|| EXTU $s[3],EXT3,24,$Td3[3] +|| EXTU $s[2],EXT1,24,$Td1[2] + LDW *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2 +|| LDW *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3 +|| ROTL $Td3[1],TBL3,$Td1[0] ; t0 +|| ROTL $Td1[0],TBL1,$Td3[1] ; t1 +|| EXTU $s[0],EXT0,24,$Td0[0] +|| EXTU $s[1],EXT0,24,$Td0[1] + LDW *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0 +|| LDW *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1 +|| ROTL $Td1[1],TBL1,$Td3[0] ; t2 +|| ROTL $Td3[0],TBL3,$Td1[1] ; t3 +|| EXTU $s[2],EXT0,24,$Td0[2] +|| EXTU $s[3],EXT0,24,$Td0[3] + LDW *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2 +|| LDW *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3 +|| ROTL $Td2[2],TBL2,$Td2[2] ; t0 +|| ROTL $Td2[3],TBL2,$Td2[3] ; t1 +|| XOR $K[0],$Td1[0],$s[0] +|| XOR $K[1],$Td3[1],$s[1] + ROTL $Td1[3],TBL1,$Td3[2] ; t0 +|| ROTL $Td3[2],TBL3,$Td1[3] ; t1 +|| XOR $K[2],$Td3[0],$s[2] +|| XOR $K[3],$Td1[1],$s[3] +|| LDW *$KPA++[2],$K[0] ; next round key +|| LDW *$KPB++[2],$K[1] + ROTL $Td2[0],TBL2,$Td2[0] ; t2 +|| ROTL $Td2[1],TBL2,$Td2[1] ; t3 +|| XOR $s[0],$Td2[2],$s[0] +|| XOR $s[1],$Td2[3],$s[1] +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] + ROTL $Td3[3],TBL3,$Td1[2] ; t2 +|| ROTL $Td1[2],TBL1,$Td3[3] ; t3 +|| XOR $s[0],$Td3[2],$s[0] +|| XOR $s[1],$Td1[3],$s[1] + XOR $s[2],$Td2[0],$s[2] +|| XOR $s[3],$Td2[1],$s[3] +|| XOR $s[0],$Td0[0],$s[0] +|| XOR $s[1],$Td0[1],$s[1] + SPKERNEL +|| XOR.L $s[2],$Td1[2],$s[2] +|| XOR.L $s[3],$Td3[3],$s[3] +;;==================================================================== + ADD.D ${TEA},A0,${TEA} ; point to Td4 +|| ADD.D ${TEB},A0,${TEB} +|| EXTU $s[1],EXT3,24,$Td3[1] +|| EXTU $s[0],EXT1,24,$Td1[0] + LDBU *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0 +|| LDBU *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1 +|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled +|| EXTU $s[0],EXT0,24,$Td0[0] +|| EXTU $s[1],EXT0,24,$Td0[1] + LDBU *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0 +|| LDBU *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1 +|| EXTU $s[2],EXT2,24,$Td2[2] +|| EXTU $s[3],EXT2,24,$Td2[3] + LDBU *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0 +|| LDBU *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1 +|| EXTU $s[3],EXT1,24,$Td1[3] +|| EXTU $s[2],EXT3,24,$Td3[2] + LDBU *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0 +|| LDBU *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1 +|| EXTU $s[1],EXT1,24,$Td1[1] +|| EXTU $s[0],EXT3,24,$Td3[0] + LDBU *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2 +|| LDBU *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3 +|| EXTU $s[0],EXT2,24,$Td2[0] +|| EXTU $s[1],EXT2,24,$Td2[1] + LDBU *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2 +|| LDBU *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3 +|| EXTU $s[3],EXT3,24,$Td3[3] +|| EXTU $s[2],EXT1,24,$Td1[2] + LDBU *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2 +|| LDBU *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3 +|| EXTU $s[2],EXT0,24,$Td0[2] +|| EXTU $s[3],EXT0,24,$Td0[3] + LDBU *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2 +|| LDBU *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3 + + .if .BIG_ENDIAN + PACK2 $Td0[0],$Td1[3],$Td0[0] +|| PACK2 $Td0[1],$Td1[0],$Td0[1] + PACK2 $Td2[2],$Td3[1],$Td2[2] +|| PACK2 $Td2[3],$Td3[2],$Td2[3] + PACKL4 $Td0[0],$Td2[2],$Td0[0] +|| PACKL4 $Td0[1],$Td2[3],$Td0[1] + XOR $K[0],$Td0[0],$Td0[0] ; s[0] +|| XOR $K[1],$Td0[1],$Td0[1] ; s[1] + + PACK2 $Td0[2],$Td1[1],$Td0[2] +|| PACK2 $Td0[3],$Td1[2],$Td0[3] + PACK2 $Td2[0],$Td3[3],$Td2[0] +|| PACK2 $Td2[1],$Td3[0],$Td2[1] +|| BNOP RA + PACKL4 $Td0[2],$Td2[0],$Td0[2] +|| PACKL4 $Td0[3],$Td2[1],$Td0[3] + XOR $K[2],$Td0[2],$Td0[2] ; s[2] +|| XOR $K[3],$Td0[3],$Td0[3] ; s[3] + + MV $Td0[0],A9 +|| MV $Td0[1],A8 + MV $Td0[2],B9 +|| MV $Td0[3],B8 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .else + PACK2 $Td1[3],$Td0[0],$Td1[3] +|| PACK2 $Td1[0],$Td0[1],$Td1[0] + PACK2 $Td3[1],$Td2[2],$Td3[1] +|| PACK2 $Td3[2],$Td2[3],$Td3[2] + PACKL4 $Td3[1],$Td1[3],$Td1[3] +|| PACKL4 $Td3[2],$Td1[0],$Td1[0] + XOR $K[0],$Td1[3],$Td1[3] ; s[0] +|| XOR $K[1],$Td1[0],$Td1[0] ; s[1] + + PACK2 $Td1[1],$Td0[2],$Td1[1] +|| PACK2 $Td1[2],$Td0[3],$Td1[2] + PACK2 $Td3[3],$Td2[0],$Td3[3] +|| PACK2 $Td3[0],$Td2[1],$Td3[0] +|| BNOP RA + PACKL4 $Td3[3],$Td1[1],$Td1[1] +|| PACKL4 $Td3[0],$Td1[2],$Td1[2] + XOR $K[2],$Td1[1],$Td1[1] ; s[2] +|| XOR $K[3],$Td1[2],$Td1[2] ; s[3] + + MV $Td1[3],A8 +|| MV $Td1[0],A9 + MV $Td1[1],B8 +|| MV $Td1[2],B9 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .endif + .endasmfunc +___ +{ +my @K=(@K,@s); # extended key +my @Te4=map("B$_",(16..19)); + +my @Kx9=@Te0; # used in AES_set_decrypt_key +my @KxB=@Te1; +my @KxD=@Te2; +my @KxE=@Te3; + +$code.=<<___; + .asg OUT,BITS + + .global _AES_set_encrypt_key +_AES_set_encrypt_key: +__set_encrypt_key: + .asmfunc + MV INP,A0 +|| SHRU BITS,5,BITS ; 128-192-256 -> 4-6-8 +|| MV KEY,A1 + [!A0] B RA +||[!A0] MVK -1,RET +||[!A0] MVK 1,A1 ; only one B RA + [!A1] B RA +||[!A1] MVK -1,RET +||[!A1] MVK 0,A0 +|| MVK 0,B0 +|| MVK 0,A1 + [A0] LDNDW *INP++,A9:A8 +|| [A0] CMPEQ 4,BITS,B0 +|| [A0] CMPLT 3,BITS,A1 + [B0] B key128? +|| [A1] LDNDW *INP++,B9:B8 +|| [A0] CMPEQ 6,BITS,B0 +|| [A0] CMPLT 5,BITS,A1 + [B0] B key192? +|| [A1] LDNDW *INP++,B17:B16 +|| [A0] CMPEQ 8,BITS,B0 +|| [A0] CMPLT 7,BITS,A1 + [B0] B key256? +|| [A1] LDNDW *INP++,B19:B18 + + [A0] ADD 0,KEY,$KPA +|| [A0] ADD 4,KEY,$KPB +|| [A0] MVKL (AES_Te4-_AES_set_encrypt_key),$TEA +|| [A0] ADDKPC _AES_set_encrypt_key,B6 + [A0] MVKH (AES_Te4-_AES_set_encrypt_key),$TEA + [A0] ADD B6,$TEA,$TEA ; AES_Te4 + NOP + NOP + + BNOP RA,5 +|| MVK -2,RET ; unknown bit lenght +|| MVK 0,B0 ; redundant +;;==================================================================== +;;==================================================================== +key128?: + .if .BIG_ENDIAN + MV A9,$K[0] +|| MV A8,$K[1] +|| MV B9,$Te4[2] +|| MV B8,$K[3] + .else + MV A8,$K[0] +|| MV A9,$K[1] +|| MV B8,$Te4[2] +|| MV B9,$K[3] + .endif + + MVK 256,A0 +|| MVK 9,B0 + + SPLOOPD 14 +|| MVC B0,ILC +|| MV $TEA,$TEB +|| ADD $TEA,A0,A30 ; rcon +;;==================================================================== + LDW *A30++[1],A31 ; rcon[i] +|| MV $Te4[2],$K[2] +|| EXTU $K[3],EXT1,24,$Te4[0] + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[3],A0 +|| EXTU $K[3],EXT2,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT3,24,A0 +|| EXTU $K[3],EXT0,24,$Te4[3] + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + .endif + + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + + XOR A31,$K[0],$K[0] ; ^=rcon[i] + .if .BIG_ENDIAN + PACK2 $Te4[0],$Te4[1],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[1],$Te4[3],$Te4[3] + .else + PACK2 $Te4[1],$Te4[0],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[3],$Te4[1],$Te4[3] + .endif + XOR $Te4[3],$K[0],$Te4[0] ; K[0] + XOR $Te4[0],$K[1],$K[1] ; K[1] + MV $Te4[0],$K[0] +|| XOR $K[1],$K[2],$Te4[2] ; K[2] + XOR $Te4[2],$K[3],$K[3] ; K[3] + SPKERNEL +;;==================================================================== + BNOP RA + MV $Te4[2],$K[2] +|| STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + MVK 10,B0 ; rounds + STW B0,*++${KPB}[15] + MVK 0,RET +;;==================================================================== +;;==================================================================== +key192?: + .if .BIG_ENDIAN + MV A9,$K[0] +|| MV A8,$K[1] +|| MV B9,$K[2] +|| MV B8,$K[3] + MV B17,$Te4[2] +|| MV B16,$K[5] + .else + MV A8,$K[0] +|| MV A9,$K[1] +|| MV B8,$K[2] +|| MV B9,$K[3] + MV B16,$Te4[2] +|| MV B17,$K[5] + .endif + + MVK 256,A0 +|| MVK 6,B0 + MV $TEA,$TEB +|| ADD $TEA,A0,A30 ; rcon +;;==================================================================== +loop192?: + LDW *A30++[1],A31 ; rcon[i] +|| MV $Te4[2],$K[4] +|| EXTU $K[5],EXT1,24,$Te4[0] + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[5],A0 +|| EXTU $K[5],EXT2,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT3,24,A0 +|| EXTU $K[5],EXT0,24,$Te4[3] + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + .endif + + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + STW $K[4],*$KPA++[2] +|| STW $K[5],*$KPB++[2] + + XOR A31,$K[0],$K[0] ; ^=rcon[i] + .if .BIG_ENDIAN + PACK2 $Te4[0],$Te4[1],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[1],$Te4[3],$Te4[3] + .else + PACK2 $Te4[1],$Te4[0],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[3],$Te4[1],$Te4[3] + .endif + BDEC loop192?,B0 +|| XOR $Te4[3],$K[0],$Te4[0] ; K[0] + XOR $Te4[0],$K[1],$K[1] ; K[1] + MV $Te4[0],$K[0] +|| XOR $K[1],$K[2],$Te4[2] ; K[2] + XOR $Te4[2],$K[3],$K[3] ; K[3] + MV $Te4[2],$K[2] +|| XOR $K[3],$K[4],$Te4[2] ; K[4] + XOR $Te4[2],$K[5],$K[5] ; K[5] +;;==================================================================== + BNOP RA + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + MVK 12,B0 ; rounds + STW B0,*++${KPB}[7] + MVK 0,RET +;;==================================================================== +;;==================================================================== +key256?: + .if .BIG_ENDIAN + MV A9,$K[0] +|| MV A8,$K[1] +|| MV B9,$K[2] +|| MV B8,$K[3] + MV B17,$K[4] +|| MV B16,$K[5] +|| MV B19,$Te4[2] +|| MV B18,$K[7] + .else + MV A8,$K[0] +|| MV A9,$K[1] +|| MV B8,$K[2] +|| MV B9,$K[3] + MV B16,$K[4] +|| MV B17,$K[5] +|| MV B18,$Te4[2] +|| MV B19,$K[7] + .endif + + MVK 256,A0 +|| MVK 6,B0 + MV $TEA,$TEB +|| ADD $TEA,A0,A30 ; rcon +;;==================================================================== +loop256?: + LDW *A30++[1],A31 ; rcon[i] +|| MV $Te4[2],$K[6] +|| EXTU $K[7],EXT1,24,$Te4[0] + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[7],A0 +|| EXTU $K[7],EXT2,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT3,24,A0 +|| EXTU $K[7],EXT0,24,$Te4[3] + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + .endif + + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + STW $K[4],*$KPA++[2] +|| STW $K[5],*$KPB++[2] + STW $K[6],*$KPA++[2] +|| STW $K[7],*$KPB++[2] +|| XOR A31,$K[0],$K[0] ; ^=rcon[i] + .if .BIG_ENDIAN + PACK2 $Te4[0],$Te4[1],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[1],$Te4[3],$Te4[3] +||[!B0] B done256? + .else + PACK2 $Te4[1],$Te4[0],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[3],$Te4[1],$Te4[3] +||[!B0] B done256? + .endif + XOR $Te4[3],$K[0],$Te4[0] ; K[0] + XOR $Te4[0],$K[1],$K[1] ; K[1] + MV $Te4[0],$K[0] +|| XOR $K[1],$K[2],$Te4[2] ; K[2] + XOR $Te4[2],$K[3],$K[3] ; K[3] + + MV $Te4[2],$K[2] +|| [B0] EXTU $K[3],EXT0,24,$Te4[0] +|| [B0] SUB B0,1,B0 + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[3],A0 +|| EXTU $K[3],EXT1,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT2,24,A0 +|| EXTU $K[3],EXT3,24,$Te4[3] + + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + NOP 3 + PACK2 $Te4[0],$Te4[1],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] +|| B loop256? + PACKL4 $Te4[1],$Te4[3],$Te4[3] + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + NOP 3 + PACK2 $Te4[1],$Te4[0],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] +|| B loop256? + PACKL4 $Te4[3],$Te4[1],$Te4[3] + .endif + + XOR $Te4[3],$K[4],$Te4[0] ; K[4] + XOR $Te4[0],$K[5],$K[5] ; K[5] + MV $Te4[0],$K[4] +|| XOR $K[5],$K[6],$Te4[2] ; K[6] + XOR $Te4[2],$K[7],$K[7] ; K[7] +;;==================================================================== +done256?: + BNOP RA + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + MVK 14,B0 ; rounds + STW B0,*--${KPB}[1] + MVK 0,RET + .endasmfunc + + .global _AES_set_decrypt_key +_AES_set_decrypt_key: + .asmfunc + B __set_encrypt_key ; guarantee local call + MV KEY,B30 ; B30 is not modified + MV RA, B31 ; B31 is not modified + ADDKPC ret?,RA,2 +ret?: ; B0 holds rounds or zero + [!B0] BNOP B31 ; return if zero + [B0] SHL B0,4,A0 ; offset to last round key + [B0] SHRU B0,1,B1 + [B0] SUB B1,1,B1 + [B0] MVK 0x0000001B,B3 ; AES polynomial + [B0] MVKH 0x07000000,B3 + + SPLOOPD 9 ; flip round keys +|| MVC B1,ILC +|| MV B30,$KPA +|| ADD B30,A0,$KPB +|| MVK 16,A0 ; sizeof(round key) +;;==================================================================== + LDW *${KPA}[0],A16 +|| LDW *${KPB}[0],B16 + LDW *${KPA}[1],A17 +|| LDW *${KPB}[1],B17 + LDW *${KPA}[2],A18 +|| LDW *${KPB}[2],B18 + LDW *${KPA}[3],A19 +|| ADD $KPA,A0,$KPA +|| LDW *${KPB}[3],B19 +|| SUB $KPB,A0,$KPB + NOP + STW B16,*${KPA}[-4] +|| STW A16,*${KPB}[4] + STW B17,*${KPA}[-3] +|| STW A17,*${KPB}[5] + STW B18,*${KPA}[-2] +|| STW A18,*${KPB}[6] + STW B19,*${KPA}[-1] +|| STW A19,*${KPB}[7] + SPKERNEL +;;==================================================================== + SUB B0,1,B0 ; skip last round +|| ADD B30,A0,$KPA ; skip first round +|| ADD B30,A0,$KPB +|| MVC GFPGFR,B30 ; save GFPGFR + LDW *${KPA}[0],$K[0] +|| LDW *${KPB}[1],$K[1] +|| MVC B3,GFPGFR + LDW *${KPA}[2],$K[2] +|| LDW *${KPB}[3],$K[3] + MVK 0x00000909,A24 +|| MVK 0x00000B0B,B24 + MVKH 0x09090000,A24 +|| MVKH 0x0B0B0000,B24 + MVC B0,ILC +|| SUB B0,1,B0 + + GMPY4 $K[0],A24,$Kx9[0] ; ·0x09 +|| GMPY4 $K[1],A24,$Kx9[1] +|| MVK 0x00000D0D,A25 +|| MVK 0x00000E0E,B25 + GMPY4 $K[2],A24,$Kx9[2] +|| GMPY4 $K[3],A24,$Kx9[3] +|| MVKH 0x0D0D0000,A25 +|| MVKH 0x0E0E0000,B25 + + GMPY4 $K[0],B24,$KxB[0] ; ·0x0B +|| GMPY4 $K[1],B24,$KxB[1] + GMPY4 $K[2],B24,$KxB[2] +|| GMPY4 $K[3],B24,$KxB[3] + + SPLOOP 11 ; InvMixColumns +;;==================================================================== + GMPY4 $K[0],A25,$KxD[0] ; ·0x0D +|| GMPY4 $K[1],A25,$KxD[1] +|| SWAP2 $Kx9[0],$Kx9[0] ; rotate by 16 +|| SWAP2 $Kx9[1],$Kx9[1] +|| MV $K[0],$s[0] ; this or DINT +|| MV $K[1],$s[1] +|| [B0] LDW *${KPA}[4],$K[0] +|| [B0] LDW *${KPB}[5],$K[1] + GMPY4 $K[2],A25,$KxD[2] +|| GMPY4 $K[3],A25,$KxD[3] +|| SWAP2 $Kx9[2],$Kx9[2] +|| SWAP2 $Kx9[3],$Kx9[3] +|| MV $K[2],$s[2] +|| MV $K[3],$s[3] +|| [B0] LDW *${KPA}[6],$K[2] +|| [B0] LDW *${KPB}[7],$K[3] + + GMPY4 $s[0],B25,$KxE[0] ; ·0x0E +|| GMPY4 $s[1],B25,$KxE[1] +|| XOR $Kx9[0],$KxB[0],$KxB[0] +|| XOR $Kx9[1],$KxB[1],$KxB[1] + GMPY4 $s[2],B25,$KxE[2] +|| GMPY4 $s[3],B25,$KxE[3] +|| XOR $Kx9[2],$KxB[2],$KxB[2] +|| XOR $Kx9[3],$KxB[3],$KxB[3] + + ROTL $KxB[0],TBL3,$KxB[0] +|| ROTL $KxB[1],TBL3,$KxB[1] +|| SWAP2 $KxD[0],$KxD[0] ; rotate by 16 +|| SWAP2 $KxD[1],$KxD[1] + ROTL $KxB[2],TBL3,$KxB[2] +|| ROTL $KxB[3],TBL3,$KxB[3] +|| SWAP2 $KxD[2],$KxD[2] +|| SWAP2 $KxD[3],$KxD[3] + + XOR $KxE[0],$KxD[0],$KxE[0] +|| XOR $KxE[1],$KxD[1],$KxE[1] +|| [B0] GMPY4 $K[0],A24,$Kx9[0] ; ·0x09 +|| [B0] GMPY4 $K[1],A24,$Kx9[1] +|| ADDAW $KPA,4,$KPA + XOR $KxE[2],$KxD[2],$KxE[2] +|| XOR $KxE[3],$KxD[3],$KxE[3] +|| [B0] GMPY4 $K[2],A24,$Kx9[2] +|| [B0] GMPY4 $K[3],A24,$Kx9[3] +|| ADDAW $KPB,4,$KPB + + XOR $KxB[0],$KxE[0],$KxE[0] +|| XOR $KxB[1],$KxE[1],$KxE[1] +|| [B0] GMPY4 $K[0],B24,$KxB[0] ; ·0x0B +|| [B0] GMPY4 $K[1],B24,$KxB[1] + XOR $KxB[2],$KxE[2],$KxE[2] +|| XOR $KxB[3],$KxE[3],$KxE[3] +|| [B0] GMPY4 $K[2],B24,$KxB[2] +|| [B0] GMPY4 $K[3],B24,$KxB[3] +|| STW $KxE[0],*${KPA}[-4] +|| STW $KxE[1],*${KPB}[-3] + STW $KxE[2],*${KPA}[-2] +|| STW $KxE[3],*${KPB}[-1] +|| [B0] SUB B0,1,B0 + SPKERNEL +;;==================================================================== + BNOP B31,3 + MVC B30,GFPGFR ; restore GFPGFR(*) + MVK 0,RET + .endasmfunc +___ +# (*) Even though ABI doesn't specify GFPGFR as non-volatile, there +# are code samples out there that *assume* its default value. +} +{ +my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8"); +$code.=<<___; + .global _AES_ctr32_encrypt +_AES_ctr32_encrypt: + .asmfunc + LDNDW *${ivp}[0],A31:A30 ; load counter value +|| MV $blocks,A2 ; reassign $blocks +|| DMV RA,$key,B27:B26 ; reassign RA and $key + LDNDW *${ivp}[1],B31:B30 +|| MVK 0,B2 ; don't let __encrypt load input +|| MVK 0,A1 ; and postpone writing output + .if .BIG_ENDIAN + NOP + .else + NOP 4 + SWAP2 B31,B31 ; keep least significant 32 bits + SWAP4 B31,B31 ; in host byte order + .endif +ctr32_loop?: + [A2] BNOP __encrypt +|| [A1] XOR A29,A9,A9 ; input^Ek(counter) +|| [A1] XOR A28,A8,A8 +|| [A2] LDNDW *INP++,A29:A28 ; load input + [!A2] BNOP B27 ; return +|| [A1] XOR B29,B9,B9 +|| [A1] XOR B28,B8,B8 +|| [A2] LDNDW *INP++,B29:B28 + .if .BIG_ENDIAN + [A1] STNDW A9:A8,*OUT++ ; save output +|| [A2] DMV A31,A30,A9:A8 ; pass counter value to __encrypt + [A1] STNDW B9:B8,*OUT++ +|| [A2] DMV B31,B30,B9:B8 +|| [A2] ADD B30,1,B30 ; counter++ + .else + [A1] STNDW A9:A8,*OUT++ ; save output +|| [A2] DMV A31,A30,A9:A8 +|| [A2] SWAP2 B31,B0 +|| [A2] ADD B31,1,B31 ; counter++ + [A1] STNDW B9:B8,*OUT++ +|| [A2] MV B30,B8 +|| [A2] SWAP4 B0,B9 + .endif + [A2] ADDKPC ctr32_loop?,RA ; return to ctr32_loop? +|| [A2] MV B26,KEY ; pass $key +|| [A2] SUB A2,1,A2 ; $blocks-- +||[!A1] MVK 1,A1 + NOP + NOP + .endasmfunc +___ +} +# Tables are kept in endian-neutral manner +$code.=<<___; + .sect ".const:aes_asm" + .align 128 +AES_Te: + .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 + .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d + .byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd + .byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54 + .byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03 + .byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d + .byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62 + .byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a + .byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d + .byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87 + .byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb + .byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b + .byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67 + .byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea + .byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7 + .byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b + .byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c + .byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a + .byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41 + .byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f + .byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4 + .byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08 + .byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73 + .byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f + .byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52 + .byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e + .byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1 + .byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5 + .byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36 + .byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d + .byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69 + .byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f + .byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e + .byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e + .byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2 + .byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb + .byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d + .byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce + .byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e + .byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97 + .byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68 + .byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c + .byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f + .byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed + .byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46 + .byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b + .byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4 + .byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a + .byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a + .byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16 + .byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7 + .byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94 + .byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10 + .byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81 + .byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44 + .byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3 + .byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe + .byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a + .byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc + .byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04 + .byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1 + .byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63 + .byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a + .byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d + .byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14 + .byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f + .byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2 + .byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39 + .byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2 + .byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47 + .byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7 + .byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95 + .byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98 + .byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f + .byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e + .byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83 + .byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29 + .byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c + .byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2 + .byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76 + .byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56 + .byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e + .byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a + .byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4 + .byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e + .byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6 + .byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4 + .byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b + .byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43 + .byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7 + .byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64 + .byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0 + .byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa + .byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25 + .byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e + .byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18 + .byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88 + .byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72 + .byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1 + .byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51 + .byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c + .byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21 + .byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc + .byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85 + .byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42 + .byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa + .byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05 + .byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12 + .byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f + .byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0 + .byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58 + .byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9 + .byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13 + .byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33 + .byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70 + .byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7 + .byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22 + .byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20 + .byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff + .byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a + .byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8 + .byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17 + .byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31 + .byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8 + .byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0 + .byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11 + .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc + .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a +AES_Te4: + .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 + .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 + .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 + .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 + .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc + .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 + .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a + .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 + .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 + .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 + .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b + .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf + .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 + .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 + .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 + .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 + .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 + .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 + .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 + .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb + .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c + .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 + .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 + .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 + .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 + .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a + .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e + .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e + .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 + .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf + .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 + .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +rcon: + .byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 + .byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 + .byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 + .byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 + .byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 + .align 128 +AES_Td: + .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 + .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 + .byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1 + .byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93 + .byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6 + .byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25 + .byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7 + .byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f + .byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67 + .byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1 + .byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12 + .byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6 + .byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95 + .byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda + .byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3 + .byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44 + .byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78 + .byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd + .byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17 + .byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4 + .byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82 + .byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45 + .byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84 + .byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94 + .byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19 + .byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7 + .byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2 + .byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a + .byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03 + .byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5 + .byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2 + .byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c + .byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92 + .byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1 + .byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5 + .byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a + .byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0 + .byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75 + .byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa + .byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51 + .byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d + .byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46 + .byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05 + .byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff + .byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97 + .byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77 + .byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88 + .byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb + .byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9 + .byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00 + .byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48 + .byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e + .byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56 + .byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27 + .byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21 + .byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a + .byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f + .byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e + .byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2 + .byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16 + .byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5 + .byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d + .byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad + .byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8 + .byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c + .byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd + .byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc + .byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34 + .byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc + .byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63 + .byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10 + .byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20 + .byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8 + .byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d + .byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3 + .byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0 + .byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99 + .byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22 + .byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a + .byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef + .byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1 + .byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36 + .byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28 + .byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4 + .byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d + .byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62 + .byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8 + .byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5 + .byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c + .byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3 + .byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7 + .byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b + .byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4 + .byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8 + .byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e + .byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6 + .byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce + .byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6 + .byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31 + .byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0 + .byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6 + .byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15 + .byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7 + .byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f + .byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d + .byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf + .byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b + .byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f + .byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d + .byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e + .byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52 + .byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13 + .byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a + .byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89 + .byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35 + .byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c + .byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f + .byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf + .byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b + .byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86 + .byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e + .byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f + .byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c + .byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41 + .byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde + .byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90 + .byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70 + .byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 +AES_Td4: + .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + .cstring "AES for C64x+, CRYPTOGAMS by " + .align 4 +___ + +print $code; diff --git a/crypto/bn/asm/bn-c64xplus.asm b/crypto/bn/asm/bn-c64xplus.asm new file mode 100644 index 0000000000..161547c3b0 --- /dev/null +++ b/crypto/bn/asm/bn-c64xplus.asm @@ -0,0 +1,333 @@ +;;==================================================================== +;; Written by Andy Polyakov for the OpenSSL +;; project. +;; +;; Rights for redistribution and usage in source and binary forms are +;; granted according to the OpenSSL license. Warranty of any kind is +;; disclaimed. +;;==================================================================== +;; Compiler-generated multiply-n-add SPLOOP runs at 12*n cycles, n +;; being the number of 32-bit words, addition - 8*n. Corresponding 4x +;; unrolled SPLOOP-free loops - at ~8*n and ~5*n. Below assembler +;; SPLOOPs spin at ... 2*n cycles [plus epilogue]. +;;==================================================================== + .text + + .asg B3,RA + .asg A4,ARG0 + .asg B4,ARG1 + .asg A6,ARG2 + .asg B6,ARG3 + .asg A8,ARG4 + .asg B8,ARG5 + .asg A4,RET + .asg A15,FP + .asg B14,DP + .asg B15,SP + + .global _bn_mul_add_words +_bn_mul_add_words: + .asmfunc + MV ARG2,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] ZERO A19 ; high part of accumulator +|| [B0] MV ARG0,A2 +|| [B0] MV ARG3,A3 + NOP 3 + + SPLOOP 2 ; 2*n+10 +;;==================================================================== + LDW *ARG1++,B7 ; ap[i] + NOP 3 + LDW *ARG0++,A7 ; rp[i] + MPY32U B7,A3,A17:A16 + NOP 3 ; [2,0] in epilogue + ADDU A16,A7,A21:A20 + ADDU A19,A21:A20,A19:A18 +|| MV.S A17,A23 + SPKERNEL 2,1 ; leave slot for "return value" +|| STW A18,*A2++ ; rp[i] +|| ADD A19,A23,A19 +;;==================================================================== + BNOP RA,4 + MV A19,RET ; return value + .endasmfunc + + .global _bn_mul_words +_bn_mul_words: + .asmfunc + MV ARG2,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] ZERO A19 ; high part of accumulator + NOP 3 + + SPLOOP 2 ; 2*n+10 +;;==================================================================== + LDW *ARG1++,A7 ; ap[i] + NOP 4 + MPY32U A7,ARG3,A17:A16 + NOP 4 ; [2,0] in epiloque + ADDU A19,A16,A19:A18 +|| MV.S A17,A21 + SPKERNEL 2,1 ; leave slot for "return value" +|| STW A18,*ARG0++ ; rp[i] +|| ADD.L A19,A21,A19 +;;==================================================================== + BNOP RA,4 + MV A19,RET ; return value + .endasmfunc + + .global _bn_sqr_words +_bn_sqr_words: + .asmfunc + MV ARG2,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] MV ARG0,B2 +|| [B0] ADD 4,ARG0,ARG0 + NOP 3 + + SPLOOP 2 ; 2*n+10 +;;==================================================================== + LDW *ARG1++,B7 ; ap[i] + NOP 4 + MPY32U B7,B7,B1:B0 + NOP 3 ; [2,0] in epilogue + STW B0,*B2++(8) ; rp[2*i] + MV B1,A1 + SPKERNEL 2,0 ; fully overlap BNOP RA,5 +|| STW A1,*ARG0++(8) ; rp[2*i+1] +;;==================================================================== + BNOP RA,5 + .endasmfunc + + .global _bn_add_words +_bn_add_words: + .asmfunc + MV ARG3,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] ZERO A1 ; carry flag +|| [B0] MV ARG0,A3 + NOP 3 + + SPLOOP 2 ; 2*n+6 +;;==================================================================== + LDW *ARG2++,A7 ; bp[i] +|| LDW *ARG1++,B7 ; ap[i] + NOP 4 + ADDU A7,B7,A9:A8 + ADDU A1,A9:A8,A1:A0 + SPKERNEL 0,0 ; fully overlap BNOP RA,5 +|| STW A0,*A3++ ; write result +|| MV A1,RET ; keep carry flag in RET +;;==================================================================== + BNOP RA,5 + .endasmfunc + + .global _bn_sub_words +_bn_sub_words: + .asmfunc + MV ARG3,B0 + [!B0] BNOP RA +||[!B0] MVK 0,RET + [B0] MVC B0,ILC + [B0] ZERO A2 ; borrow flag +|| [B0] MV ARG0,A3 + NOP 3 + + SPLOOP 2 ; 2*n+6 +;;==================================================================== + LDW *ARG2++,A7 ; bp[i] +|| LDW *ARG1++,B7 ; ap[i] + NOP 4 + SUBU B7,A7,A1:A0 + [A2] SUB A1:A0,1,A1:A0 + SPKERNEL 0,1 ; leave slot for "return borrow flag" +|| STW A0,*A3++ ; write result +|| AND 1,A1,A2 ; pass on borrow flag +;;==================================================================== + BNOP RA,4 + AND 1,A1,RET ; return borrow flag + .endasmfunc + + .global _bn_div_words + .global __divull +_bn_div_words: + .asmfunc + CALLP __divull,A3 ; jump to rts64plus.lib +|| MV ARG0,A5 +|| MV ARG1,ARG0 +|| MV ARG2,ARG1 +|| ZERO B5 + .endasmfunc + +;;==================================================================== +;; Not really Comba algorithm, just straightforward NxM... Dedicated +;; fully unrolled real Comba implementations are asymptotically 2x +;; faster, but naturally larger undertaking. Purpose of this exercise +;; was rather to learn to master nested SPLOOPs... +;;==================================================================== + .global _bn_sqr_comba8 + .global _bn_mul_comba8 +_bn_sqr_comba8: + MV ARG1,ARG2 +_bn_mul_comba8: + .asmfunc + MVK 8,B0 ; N, RILC +|| MVK 8,A0 ; M, outer loop counter +|| MV ARG1,A5 ; copy ap +|| MV ARG0,B4 ; copy rp +|| ZERO B19 ; high part of accumulator + MVC B0,RILC +|| SUB B0,2,B1 ; N-2, initial ILC +|| SUB B0,1,B2 ; const B2=N-1 +|| LDW *A5++,B6 ; ap[0] +|| MV A0,A3 ; const A3=M +sploopNxM?: ; for best performance arrange M<=N + [A0] SPLOOPD 2 ; 2*n+10 +|| MVC B1,ILC +|| ADDAW B4,B0,B5 +|| ZERO B7 +|| LDW *A5++,A9 ; pre-fetch ap[1] +|| ZERO A1 +|| SUB A0,1,A0 +;;==================================================================== +;; SPLOOP from bn_mul_add_words, but with flipped A<>B register files. +;; This is because of Advisory 15 from TI publication SPRZ247I. + LDW *ARG2++,A7 ; bp[i] + NOP 3 + [A1] LDW *B5++,B7 ; rp[i] + MPY32U A7,B6,B17:B16 + NOP 3 + ADDU B16,B7,B21:B20 + ADDU B19,B21:B20,B19:B18 +|| MV.S B17,B23 + SPKERNEL +|| STW B18,*B4++ ; rp[i] +|| ADD.S B19,B23,B19 +;;==================================================================== +outer?: ; m*2*(n+1)+10 + SUBAW ARG2,A3,ARG2 ; rewind bp to bp[0] + SPMASKR +|| CMPGT A0,1,A2 ; done pre-fetching ap[i+1]? + MVD A9,B6 ; move through .M unit(*) + [A2] LDW *A5++,A9 ; pre-fetch ap[i+1] + SUBAW B5,B2,B5 ; rewind rp to rp[1] + MVK 1,A1 + [A0] BNOP.S1 outer?,4 +|| [A0] SUB.L A0,1,A0 + STW B19,*B4--[B2] ; rewind rp tp rp[1] +|| ZERO.S B19 ; high part of accumulator +;; end of outer? + BNOP RA,5 ; return + .endasmfunc +;; (*) It should be noted that B6 is used as input to MPY32U in +;; chronologically next cycle in *preceding* SPLOOP iteration. +;; Normally such arrangement would require DINT, but at this +;; point SPLOOP is draining and interrupts are disabled +;; implicitly. + + .global _bn_sqr_comba4 + .global _bn_mul_comba4 +_bn_sqr_comba4: + MV ARG1,ARG2 +_bn_mul_comba4: + .asmfunc + .if 0 + BNOP sploopNxM?,3 + ;; Above mentioned m*2*(n+1)+10 does not apply in n=m=4 case, + ;; because of read-after-write penalties, it's rather + ;; n*2*(n+3)+10, or 66 cycles [plus various overheads]... + MVK 4,B0 ; N, RILC +|| MVK 4,A0 ; M, outer loop counter +|| MV ARG1,A5 ; copy ap +|| MV ARG0,B4 ; copy rp +|| ZERO B19 ; high part of accumulator + MVC B0,RILC +|| SUB B0,2,B1 ; first ILC +|| SUB B0,1,B2 ; const B2=N-1 +|| LDW *A5++,B6 ; ap[0] +|| MV A0,A3 ; const A3=M + .else + ;; This alternative is exercise in fully unrolled Comba + ;; algorithm implementation that operates at n*(n+1)+12, or + ;; as little as 32 cycles... + LDW *ARG1[0],B16 ; a[0] +|| LDW *ARG2[0],A16 ; b[0] + LDW *ARG1[1],B17 ; a[1] +|| LDW *ARG2[1],A17 ; b[1] + LDW *ARG1[2],B18 ; a[2] +|| LDW *ARG2[2],A18 ; b[2] + LDW *ARG1[3],B19 ; a[3] +|| LDW *ARG2[3],A19 ; b[3] + NOP + MPY32U A16,B16,A1:A0 ; a[0]*b[0] + MPY32U A17,B16,A23:A22 ; a[0]*b[1] + MPY32U A16,B17,A25:A24 ; a[1]*b[0] + MPY32U A16,B18,A27:A26 ; a[2]*b[0] + STW A0,*ARG0[0] +|| MPY32U A17,B17,A29:A28 ; a[1]*b[1] + MPY32U A18,B16,A31:A30 ; a[0]*b[2] +|| ADDU A22,A1,A1:A0 + MV A23,B0 +|| MPY32U A19,B16,A21:A20 ; a[3]*b[0] +|| ADDU A24,A1:A0,A1:A0 + ADDU A25,B0,B1:B0 +|| STW A0,*ARG0[1] +|| MPY32U A18,B17,A23:A22 ; a[2]*b[1] +|| ADDU A26,A1,A9:A8 + ADDU A27,B1,B9:B8 +|| MPY32U A17,B18,A25:A24 ; a[1]*b[2] +|| ADDU A28,A9:A8,A9:A8 + ADDU A29,B9:B8,B9:B8 +|| MPY32U A16,B19,A27:A26 ; a[0]*b[3] +|| ADDU A30,A9:A8,A9:A8 + ADDU A31,B9:B8,B9:B8 +|| ADDU B0,A9:A8,A9:A8 + STW A8,*ARG0[2] +|| ADDU A20,A9,A1:A0 + ADDU A21,B9,B1:B0 +|| MPY32U A19,B17,A21:A20 ; a[3]*b[1] +|| ADDU A22,A1:A0,A1:A0 + ADDU A23,B1:B0,B1:B0 +|| MPY32U A18,B18,A23:A22 ; a[2]*b[2] +|| ADDU A24,A1:A0,A1:A0 + ADDU A25,B1:B0,B1:B0 +|| MPY32U A17,B19,A25:A24 ; a[1]*b[3] +|| ADDU A26,A1:A0,A1:A0 + ADDU A27,B1:B0,B1:B0 +|| ADDU B8,A1:A0,A1:A0 + STW A0,*ARG0[3] +|| MPY32U A19,B18,A27:A26 ; a[3]*b[2] +|| ADDU A20,A1,A9:A8 + ADDU A21,B1,B9:B8 +|| MPY32U A18,B19,A29:A28 ; a[2]*b[3] +|| ADDU A22,A9:A8,A9:A8 + ADDU A23,B9:B8,B9:B8 +|| MPY32U A19,B19,A31:A30 ; a[3]*b[3] +|| ADDU A24,A9:A8,A9:A8 + ADDU A25,B9:B8,B9:B8 +|| ADDU B0,A9:A8,A9:A8 + STW A8,*ARG0[4] +|| ADDU A26,A9,A1:A0 + ADDU A27,B9,B1:B0 +|| ADDU A28,A1:A0,A1:A0 + ADDU A29,B1:B0,B1:B0 +|| BNOP RA +|| ADDU B8,A1:A0,A1:A0 + STW A0,*ARG0[5] +|| ADDU A30,A1,A9:A8 + ADD A31,B1,B8 + ADDU B0,A9:A8,A9:A8 ; removed || to avoid cross-path stall below + ADD B8,A9,A9 +|| STW A8,*ARG0[6] + STW A9,*ARG0[7] + .endif + .endasmfunc diff --git a/crypto/bn/asm/c64xplus-gf2m.pl b/crypto/bn/asm/c64xplus-gf2m.pl new file mode 100644 index 0000000000..cef83942c9 --- /dev/null +++ b/crypto/bn/asm/c64xplus-gf2m.pl @@ -0,0 +1,146 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# February 2012 +# +# The module implements bn_GF2m_mul_2x2 polynomial multiplication +# used in bn_gf2m.c. It's kind of low-hanging mechanical port from +# C for the time being... The subroutine runs in 37 cycles, which is +# 4.5x faster than compiler-generated code. Though comparison is +# totally unfair, because this module utilizes Galois Field Multiply +# instruction. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector + +($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20)); +($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20)); +($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7"); +($A,$B)=($Alo,$B_1); +$xFF="B1"; + +sub mul_1x1_upper { +my ($A,$B)=@_; +$code.=<<___; + EXTU $B,8,24,$B_2 ; smash $B to 4 bytes +|| AND $B,$xFF,$B_0 +|| SHRU $B,24,$B_3 + SHRU $A,16, $Ahi ; smash $A to two halfwords +|| EXTU $A,16,16,$Alo + + XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits muliplication +|| XORMPY $Ahi,$B_2,$Ahix2 +|| EXTU $B,16,24,$B_1 + XORMPY $Alo,$B_0,$Alox0 +|| XORMPY $Ahi,$B_0,$Ahix0 + XORMPY $Alo,$B_3,$Alox3 +|| XORMPY $Ahi,$B_3,$Ahix3 + XORMPY $Alo,$B_1,$Alox1 +|| XORMPY $Ahi,$B_1,$Ahix1 +___ +} +sub mul_1x1_merged { +my ($OUTlo,$OUThi,$A,$B)=@_; +$code.=<<___; + EXTU $B,8,24,$B_2 ; smash $B to 4 bytes +|| AND $B,$xFF,$B_0 +|| SHRU $B,24,$B_3 + SHRU $A,16, $Ahi ; smash $A to two halfwords +|| EXTU $A,16,16,$Alo + + XOR $Ahix0,$Alox2,$Ahix0 +|| MV $Ahix2,$OUThi +|| XORMPY $Alo,$B_2,$Alox2 + XORMPY $Ahi,$B_2,$Ahix2 +|| EXTU $B,16,24,$B_1 +|| XORMPY $Alo,$B_0,A1 ; $Alox0 + XOR $Ahix1,$Alox3,$Ahix1 +|| SHL $Ahix0,16,$OUTlo +|| SHRU $Ahix0,16,$Ahix0 + XOR $Alox0,$OUTlo,$OUTlo +|| XOR $Ahix0,$OUThi,$OUThi +|| XORMPY $Ahi,$B_0,$Ahix0 +|| XORMPY $Alo,$B_3,$Alox3 +|| SHL $Alox1,8,$Alox1 +|| SHL $Ahix3,8,$Ahix3 + XOR $Alox1,$OUTlo,$OUTlo +|| XOR $Ahix3,$OUThi,$OUThi +|| XORMPY $Ahi,$B_3,$Ahix3 +|| SHL $Ahix1,24,$Alox1 +|| SHRU $Ahix1,8, $Ahix1 + XOR $Alox1,$OUTlo,$OUTlo +|| XOR $Ahix1,$OUThi,$OUThi +|| XORMPY $Alo,$B_1,$Alox1 +|| XORMPY $Ahi,$B_1,$Ahix1 +|| MV A1,$Alox0 +___ +} +sub mul_1x1_lower { +my ($OUTlo,$OUThi)=@_; +$code.=<<___; + ;NOP + XOR $Ahix0,$Alox2,$Ahix0 +|| MV $Ahix2,$OUThi + NOP + XOR $Ahix1,$Alox3,$Ahix1 +|| SHL $Ahix0,16,$OUTlo +|| SHRU $Ahix0,16,$Ahix0 + XOR $Alox0,$OUTlo,$OUTlo +|| XOR $Ahix0,$OUThi,$OUThi +|| SHL $Alox1,8,$Alox1 +|| SHL $Ahix3,8,$Ahix3 + XOR $Alox1,$OUTlo,$OUTlo +|| XOR $Ahix3,$OUThi,$OUThi +|| SHL $Ahix1,24,$Alox1 +|| SHRU $Ahix1,8, $Ahix1 + XOR $Alox1,$OUTlo,$OUTlo +|| XOR $Ahix1,$OUThi,$OUThi +___ +} +$code.=<<___; + .text + + .global _bn_GF2m_mul_2x2 +_bn_GF2m_mul_2x2: + .asmfunc + MVK 0xFF,$xFF +___ + &mul_1x1_upper($a0,$b0); # a0·b0 +$code.=<<___; +|| MV $b1,$B + MV $a1,$A +___ + &mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1 +$code.=<<___; +|| XOR $b0,$b1,$B + XOR $a0,$a1,$A +___ + &mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1) +$code.=<<___; + XOR A28,A31,A29 +|| XOR B28,B31,B29 ; a0·b0+a1·b1 +___ + &mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1) +$code.=<<___; +|| BNOP B3 + XOR A29,A30,A30 +|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1 + XOR B28,A30,A30 +|| STW A28,*${rp}[0] + XOR B30,A31,A31 +|| STW A30,*${rp}[1] + STW A31,*${rp}[2] + STW B31,*${rp}[3] + .endasmfunc +___ + +print $code; +close STDOUT; diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c index ce860b1d75..ad98188e81 100644 --- a/crypto/bn/bn_nist.c +++ b/crypto/bn/bn_nist.c @@ -366,6 +366,10 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top) # endif #endif /* BN_BITS2 != 64 */ +#if defined(_TMS320C6X) && defined(NIST_INT64) +# undef NIST_INT64 /* compiler bug */ +# pragma diag_suppress 177 +#endif #define nist_set_192(to, from, a1, a2, a3) \ { \ diff --git a/crypto/c64xpluscpuid.pl b/crypto/c64xpluscpuid.pl new file mode 100644 index 0000000000..067b693d5c --- /dev/null +++ b/crypto/c64xpluscpuid.pl @@ -0,0 +1,246 @@ +#!/usr/bin/env perl +# + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$code.=<<___; + .text + + .asg B3,RA + + .global _OPENSSL_rdtsc +_OPENSSL_rdtsc: + .asmfunc + B RA + MVC TSCL,B0 + MVC TSCH,B1 + [!B0] MVC B0,TSCL ; start TSC + MV B0,A4 + MV B1,A5 + .endasmfunc + + .global _OPENSSL_cleanse +_OPENSSL_cleanse: + .asmfunc + ZERO A3:A2 +|| ZERO B2 +|| SHRU B4,3,B0 ; is length >= 8 +|| ADD 1,A4,B6 + [!B0] BNOP RA +|| ZERO A1 +|| ZERO B1 + [B0] MVC B0,ILC +||[!B0] CMPLT 0,B4,A1 +||[!B0] CMPLT 1,B4,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B2,*B6++[2] +||[!B0] CMPLT 2,B4,A1 +||[!B0] CMPLT 3,B4,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B2,*B6++[2] +||[!B0] CMPLT 4,B4,A1 +||[!B0] CMPLT 5,B4,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B2,*B6++[2] +||[!B0] CMPLT 6,B4,A1 + [A1] STB A2,*A4++[2] + + SPLOOP 1 + STNDW A3:A2,*A4++ +|| SUB B4,8,B4 + SPKERNEL + + MV B4,B0 ; remaining bytes +|| ADD 1,A4,B6 +|| BNOP RA + [B0] CMPLT 0,B0,A1 +|| [B0] CMPLT 1,B0,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B2,*B6++[2] +|| [B0] CMPLT 2,B0,A1 +|| [B0] CMPLT 3,B0,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B2,*B6++[2] +|| [B0] CMPLT 4,B0,A1 +|| [B0] CMPLT 5,B0,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B2,*B6++[2] +|| [B0] CMPLT 6,B0,A1 + [A1] STB A2,*A4++[2] + .endasmfunc + + .global _OPENSSL_atomic_add +_OPENSSL_atomic_add: + .asmfunc + MV A4,B0 +atomic_add?: + LL *B0,B5 + NOP 4 + ADD B4,B5,B5 + SL B5,*B0 + CMTL *B0,B1 + NOP 4 + [!B1] B atomic_add? + [B1] BNOP RA,4 + MV B5,A4 + .endasmfunc + + .global _OPENSSL_wipe_cpu +_OPENSSL_wipe_cpu: + .asmfunc + ZERO A0 +|| ZERO B0 +|| ZERO A1 +|| ZERO B1 + ZERO A3:A2 +|| MVD B0,B2 +|| ZERO A4 +|| ZERO B4 +|| ZERO A5 +|| ZERO B5 +|| BNOP RA + ZERO A7:A6 +|| ZERO B7:B6 +|| ZERO A8 +|| ZERO B8 +|| ZERO A9 +|| ZERO B9 + ZERO A17:A16 +|| ZERO B17:B16 +|| ZERO A18 +|| ZERO B18 +|| ZERO A19 +|| ZERO B19 + ZERO A21:A20 +|| ZERO B21:B20 +|| ZERO A22 +|| ZERO B22 +|| ZERO A23 +|| ZERO B23 + ZERO A25:A24 +|| ZERO B25:B24 +|| ZERO A26 +|| ZERO B26 +|| ZERO A27 +|| ZERO B27 + ZERO A29:A28 +|| ZERO B29:B28 +|| ZERO A30 +|| ZERO B30 +|| ZERO A31 +|| ZERO B31 + .endasmfunc + +CLFLUSH .macro CONTROL,ADDR,LEN + B passthrough? +|| STW ADDR,*CONTROL[0] + STW LEN,*CONTROL[1] +spinlock?: + LDW *CONTROL[1],A0 + NOP 3 +passthrough?: + NOP + [A0] BNOP spinlock?,5 + .endm + + .global _OPENSSL_instrument_bus +_OPENSSL_instrument_bus: + .asmfunc + MV B4,B0 ; reassign sizeof(output) +|| MV A4,B4 ; reassign output +|| MVK 0x00004030,A3 + MV B0,A4 ; return value +|| MVK 1,A1 +|| MVKH 0x01840000,A3 ; L1DWIBAR + MVC TSCL,B8 ; collect 1st tick +|| MVK 0x00004010,A5 + MV B8,B9 ; lasttick = tick +|| MVK 0,B7 ; lastdiff = 0 +|| MVKH 0x01840000,A5 ; L2WIBAR + CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line + CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line + LL *B4,B5 + NOP 4 + ADD B7,B5,B5 + SL B5,*B4 + CMTL *B4,B1 + NOP 4 + STW B5,*B4 +bus_loop1?: + MVC TSCL,B8 +|| [B0] SUB B0,1,B0 + SUB B8,B9,B7 ; lastdiff = tick - lasttick +|| MV B8,B9 ; lasttick = tick + CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line + CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line + LL *B4,B5 + NOP 4 + ADD B7,B5,B5 + SL B5,*B4 + CMTL *B4,B1 + STW B5,*B4 ; [!B1] is removed to flatten samples +|| ADDK 4,B4 +|| [B0] BNOP bus_loop1?,5 + + BNOP RA,5 + .endasmfunc + + .global _OPENSSL_instrument_bus2 +_OPENSSL_instrument_bus2: + .asmfunc + MV A6,B0 ; reassign max +|| MV B4,A6 ; reassing sizeof(output) +|| MVK 0x00004030,A3 + MV A4,B4 ; reassign output +|| MVK 0,A4 ; return value +|| MVK 1,A1 +|| MVKH 0x01840000,A3 ; L1DWIBAR + + MVC TSCL,B8 ; collect 1st tick +|| MVK 0x00004010,A5 + MV B8,B9 ; lasttick = tick +|| MVK 0,B7 ; lastdiff = 0 +|| MVKH 0x01840000,A5 ; L2WIBAR + CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line + CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line + LL *B4,B5 + NOP 4 + ADD B7,B5,B5 + SL B5,*B4 + CMTL *B4,B1 + NOP 4 + STW B5,*B4 + + MVC TSCL,B8 ; collect 1st diff + SUB B8,B9,B7 ; lastdiff = tick - lasttick +|| MV B8,B9 ; lasttick = tick +|| SUB B0,1,B0 +bus_loop2?: + CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line + CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line + LL *B4,B5 + NOP 4 + ADD B7,B5,B5 + SL B5,*B4 + CMTL *B4,B1 + STW B5,*B4 ; [!B1] is removed to flatten samples +||[!B0] BNOP bus_loop2_done?,2 +|| SUB B0,1,B0 + MVC TSCL,B8 + SUB B8,B9,B8 +|| MV B8,B9 + CMPEQ B8,B7,B2 +|| MV B8,B7 + [!B2] ADDAW B4,1,B4 +||[!B2] ADDK 1,A4 + CMPEQ A4,A6,A2 + [!A2] BNOP bus_loop2?,5 + +bus_loop2_done?: + BNOP RA,5 + .endasmfunc +___ + +print $code; +close STDOUT; diff --git a/crypto/cmac/cmac.c b/crypto/cmac/cmac.c index 5807e30ddd..e6cade6120 100644 --- a/crypto/cmac/cmac.c +++ b/crypto/cmac/cmac.c @@ -143,7 +143,8 @@ int CMAC_CTX_copy(CMAC_CTX *out, const CMAC_CTX *in) int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, const EVP_CIPHER *cipher, ENGINE *impl) { - static unsigned char zero_iv[EVP_MAX_BLOCK_LENGTH]; + __fips_constseg + static const unsigned char zero_iv[EVP_MAX_BLOCK_LENGTH] = {0}; /* All zeros means restart */ if (!key && !cipher && !impl && keylen == 0) { diff --git a/crypto/des/spr.h b/crypto/des/spr.h index 9be0dce9f6..35e71a5118 100644 --- a/crypto/des/spr.h +++ b/crypto/des/spr.h @@ -56,6 +56,9 @@ * [including the GNU Public Licence.] */ +#ifdef _TMS320C6X +# pragma DATA_SECTION(DES_SPtrans,".const:des_sptrans") +#endif __fips_constseg OPENSSL_GLOBAL const DES_LONG DES_SPtrans[8][64]={ { diff --git a/crypto/modes/asm/ghash-c64xplus.pl b/crypto/modes/asm/ghash-c64xplus.pl new file mode 100644 index 0000000000..1ac4d927d0 --- /dev/null +++ b/crypto/modes/asm/ghash-c64xplus.pl @@ -0,0 +1,231 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# December 2011 +# +# The module implements GCM GHASH function and underlying single +# multiplication operation in GF(2^128). Even though subroutines +# have _4bit suffix, they are not using any tables, but rely on +# hardware Galois Field Multiply support. Streamed GHASH processes +# byte in ~7 cycles, which is >6x faster than "4-bit" table-driven +# code compiled with TI's cl6x 6.0 with -mv6400+ -o2 flags. We are +# comparing apples vs. oranges, but compiler surely could have done +# better, because theoretical [though not necessarily achievable] +# estimate for "4-bit" table-driven implementation is ~12 cycles. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($Xip,$Htable,$inp,$len)=("A4","B4","A6","B6"); # arguments + +($Z0,$Z1,$Z2,$Z3, $H0, $H1, $H2, $H3, + $H0x,$H1x,$H2x,$H3x)=map("A$_",(16..27)); +($H01u,$H01y,$H2u,$H3u, $H0y,$H1y,$H2y,$H3y, + $H0z,$H1z,$H2z,$H3z)=map("B$_",(16..27)); +($FF000000,$E10000)=("B30","B31"); +($xip,$x0,$x1,$xib)=map("B$_",(6..9)); # $xip zaps $len + $xia="A9"; +($rem,$res)=("B4","B5"); # $rem zaps $Htable + +$code.=<<___; + .text + + .asg B3,RA + + .if 0 + .global _gcm_gmult_1bit +_gcm_gmult_1bit: + ADDAD $Htable,2,$Htable + .endif + .global _gcm_gmult_4bit +_gcm_gmult_4bit: + .asmfunc + LDDW *${Htable}[-1],$H1:$H0 ; H.lo + LDDW *${Htable}[-2],$H3:$H2 ; H.hi +|| MV $Xip,${xip} ; reassign Xi +|| MVK 15,B1 ; SPLOOPD constant + + MVK 0xE1,$E10000 +|| LDBU *++${xip}[15],$x1 ; Xi[15] + MVK 0xFF,$FF000000 +|| LDBU *--${xip},$x0 ; Xi[14] + SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial + SHL $FF000000,24,$FF000000 ; upper byte mask +|| BNOP ghash_loop? +|| MVK 1,B0 ; take a single spin + + PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes + AND $H2,$FF000000,$H2u ; H2's upper byte + AND $H3,$FF000000,$H3u ; H3's upper byte +|| SHRU $H2u,8,$H2u + SHRU $H3u,8,$H3u +|| ZERO $Z1:$Z0 + SHRU2 $xia,8,$H01u +|| ZERO $Z3:$Z2 + .endasmfunc + + .global _gcm_ghash_4bit +_gcm_ghash_4bit: + .asmfunc + LDDW *${Htable}[-1],$H1:$H0 ; H.lo +|| SHRU $len,4,B0 ; reassign len + LDDW *${Htable}[-2],$H3:$H2 ; H.hi +|| MV $Xip,${xip} ; reassign Xi +|| MVK 15,B1 ; SPLOOPD constant + + MVK 0xE1,$E10000 +|| [B0] LDNDW *${inp}[1],$H1x:$H0x + MVK 0xFF,$FF000000 +|| [B0] LDNDW *${inp}++[2],$H3x:$H2x + SHL $E10000,16,$E10000 ; [pre-shifted] reduction polynomial +|| LDDW *${xip}[1],$Z1:$Z0 + SHL $FF000000,24,$FF000000 ; upper byte mask +|| LDDW *${xip}[0],$Z3:$Z2 + + PACKH2 $H0,$H1,$xia ; pack H0' and H1's upper bytes + AND $H2,$FF000000,$H2u ; H2's upper byte + AND $H3,$FF000000,$H3u ; H3's upper byte +|| SHRU $H2u,8,$H2u + SHRU $H3u,8,$H3u + SHRU2 $xia,8,$H01u + +|| [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp +|| [B0] XOR $H1x,$Z1,$Z1 + .if .LITTLE_ENDIAN + [B0] XOR $H2x,$Z2,$Z2 +|| [B0] XOR $H3x,$Z3,$Z3 +|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall + STDW $Z1:$Z0,*${xip}[1] +|| [B0] SHRU $Z1,16,$x0 ; Xi[14] +|| [B0] ZERO $Z1:$Z0 + .else + [B0] XOR $H2x,$Z2,$Z2 +|| [B0] XOR $H3x,$Z3,$Z3 +|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall + STDW $Z1:$Z0,*${xip}[1] +|| [B0] SHRU $Z0,8,$x0 ; Xi[14] +|| [B0] ZERO $Z1:$Z0 + .endif + STDW $Z3:$Z2,*${xip}[0] +|| [B0] ZERO $Z3:$Z2 +|| [B0] MV $xia,$x1 + [B0] ADDK 14,${xip} + +ghash_loop?: + SPLOOPD 6 ; 6*16+7 +|| MVC B1,ILC +|| [B0] SUB B0,1,B0 +|| ZERO A0 +|| ADD $x1,$x1,$xib ; SHL $x1,1,$xib +|| SHL $x1,1,$xia +___ + +########____________________________ +# 0 D2. M1 M2 | +# 1 M1 | +# 2 M1 M2 | +# 3 D1. M1 M2 | +# 4 S1. L1 | +# 5 S2 S1x L1 D2 L2 |____________________________ +# 6/0 L1 S1 L2 S2x |D2. M1 M2 | +# 7/1 L1 S1 D1x S2 M2 | M1 | +# 8/2 S1 L1x S2 | M1 M2 | +# 9/3 S1 L1x | D1. M1 M2 | +# 10/4 D1x | S1. L1 | +# 11/5 |S2 S1x L1 D2 L2 |____________ +# 12/6/0 D1x __| L1 S1 L2 S2x |D2. .... +# 7/1 L1 S1 D1x S2 M2 | .... +# 8/2 S1 L1x S2 | .... +#####... ................|............ +$code.=<<___; + XORMPY $H0,$xia,$H0x ; 0 ; H·Xi[i] +|| XORMPY $H01u,$xib,$H01y +|| [A0] LDBU *--${xip},$x0 + XORMPY $H1,$xia,$H1x ; 1 + XORMPY $H2,$xia,$H2x ; 2 +|| XORMPY $H2u,$xib,$H2y + XORMPY $H3,$xia,$H3x ; 3 +|| XORMPY $H3u,$xib,$H3y +||[!A0] MVK.D 15,A0 ; *--${xip} counter + XOR.L $H0x,$Z0,$Z0 ; 4 ; Z^=H·Xi[i] +|| [A0] SUB.S A0,1,A0 + XOR.L $H1x,$Z1,$Z1 ; 5 +|| AND.D $H01y,$FF000000,$H0z +|| SWAP2.L $H01y,$H1y ; ; SHL $H01y,16,$H1y +|| SHL $x0,1,$xib +|| SHL $x0,1,$xia + + XOR.L $H2x,$Z2,$Z2 ; 6/0 ; [0,0] in epilogue +|| SHL $Z0,1,$rem ; ; rem=Z<<1 +|| SHRMB.S $Z1,$Z0,$Z0 ; ; Z>>=8 +|| AND.L $H1y,$FF000000,$H1z + XOR.L $H3x,$Z3,$Z3 ; 7/1 +|| SHRMB.S $Z2,$Z1,$Z1 +|| XOR.D $H0z,$Z0,$Z0 ; merge upper byte products +|| AND.S $H2y,$FF000000,$H2z +|| XORMPY $E10000,$rem,$res ; ; implicit rem&0x1FE + XOR.L $H1z,$Z1,$Z1 ; 8/2 +|| SHRMB.S $Z3,$Z2,$Z2 +|| AND.S $H3y,$FF000000,$H3z + XOR.L $H2z,$Z2,$Z2 ; 9/3 +|| SHRU $Z3,8,$Z3 + XOR.D $H3z,$Z3,$Z3 ; 10/4 + NOP ; 11/5 + + SPKERNEL 0,2 +|| XOR.D $res,$Z3,$Z3 ; 12/6/0; Z^=res + + ; input pre-fetch is possible where D1 slot is available... + [B0] LDNDW *${inp}[1],$H1x:$H0x ; 8/- + [B0] LDNDW *${inp}++[2],$H3x:$H2x ; 9/- + NOP ; 10/- + .if .LITTLE_ENDIAN + SWAP2 $Z0,$Z1 ; 11/- +|| SWAP4 $Z1,$Z0 + SWAP4 $Z1,$Z1 ; 12/- +|| SWAP2 $Z0,$Z0 + SWAP2 $Z2,$Z3 +|| SWAP4 $Z3,$Z2 +||[!B0] BNOP RA + SWAP4 $Z3,$Z3 +|| SWAP2 $Z2,$Z2 +|| [B0] BNOP ghash_loop? + [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp +|| [B0] XOR $H1x,$Z1,$Z1 + [B0] XOR $H2x,$Z2,$Z2 +|| [B0] XOR $H3x,$Z3,$Z3 +|| [B0] SHRU $Z1,24,$xia ; Xi[15], avoid cross-path stall + STDW $Z1:$Z0,*${xip}[1] +|| [B0] SHRU $Z1,16,$x0 ; Xi[14] +|| [B0] ZERO $Z1:$Z0 + .else + [!B0] BNOP RA ; 11/- + [B0] BNOP ghash_loop? ; 12/- + [B0] XOR $H0x,$Z0,$Z0 ; Xi^=inp +|| [B0] XOR $H1x,$Z1,$Z1 + [B0] XOR $H2x,$Z2,$Z2 +|| [B0] XOR $H3x,$Z3,$Z3 +|| [B0] MV $Z0,$xia ; Xi[15], avoid cross-path stall + STDW $Z1:$Z0,*${xip}[1] +|| [B0] SHRU $Z0,8,$x0 ; Xi[14] +|| [B0] ZERO $Z1:$Z0 + .endif + STDW $Z3:$Z2,*${xip}[0] +|| [B0] ZERO $Z3:$Z2 +|| [B0] MV $xia,$x1 + [B0] ADDK 14,${xip} + .endasmfunc + + .sect .const + .cstring "GHASH for C64x+, CRYPTOGAMS by " + .align 4 +___ + +print $code; +close STDOUT; diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index e638e42be8..8dfeae5ed5 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -674,6 +674,8 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif +# elif defined(_TMS320C6400_PLUS) +# define GHASH_ASM_C64Xplus # endif #endif @@ -746,6 +748,10 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) ctx->gmult = gcm_gmult_4bit; ctx->ghash = gcm_ghash_4bit; } +# elif defined(GHASH_ASM_C64Xplus) + /* C64x+ assembler doesn't use tables, skip gcm_init_4bit. + * This is likely to trigger "function never referenced" + * warning and code being eliminated. */ # else gcm_init_4bit(ctx->Htable,ctx->H.u); # endif diff --git a/crypto/sha/asm/sha1-c64xplus.pl b/crypto/sha/asm/sha1-c64xplus.pl new file mode 100644 index 0000000000..87000d1e8f --- /dev/null +++ b/crypto/sha/asm/sha1-c64xplus.pl @@ -0,0 +1,323 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA1 for C64x+. +# +# November 2011 +# +# If compared to compiler-generated code with similar characteristics, +# i.e. compiled with OPENSSL_SMALL_FOOTPRINT and utilizing SPLOOPs, +# this implementation is 25% smaller and >2x faster. In absolute terms +# performance is (quite impressive) ~6.5 cycles per processed byte. +# Fully unrolled assembler would be ~5x larger and is likely to be +# ~15% faster. It would be free from references to intermediate ring +# buffer, but put more pressure on L1P [both because the code would be +# larger and won't be using SPLOOP buffer]. There are no plans to +# realize fully unrolled variant though... +# +# !!! Note that this module uses AMR, which means that all interrupt +# service routines are expected to preserve it and for own well-being +# zero it upon entry. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($CTX,$INP,$NUM) = ("A4","B4","A6"); # arguments + +($A,$B,$C,$D,$E, $Arot,$F,$F0,$T,$K) = map("A$_",(16..20, 21..25)); +($X0,$X2,$X8,$X13) = ("A26","B26","A27","B27"); +($TX0,$TX1,$TX2,$TX3) = map("B$_",(28..31)); +($XPA,$XPB) = ("A5","B5"); # X circular buffer +($Actx,$Bctx,$Cctx,$Dctx,$Ectx) = map("A$_",(3,6..9)); # zaps $NUM + +$code=<<___; + .text + + .asg B3,RA + .asg A15,FP + .asg B15,SP + + .if .BIG_ENDIAN + .asg MV,SWAP2 + .asg MV,SWAP4 + .endif + + .global _sha1_block_data_order +_sha1_block_data_order: + .asmfunc stack_usage(64) + MV $NUM,A0 ; reassign $NUM +|| MVK -64,B0 + [!A0] BNOP RA ; if ($NUM==0) return; +|| [A0] STW FP,*SP--[16] ; save frame pointer and alloca(64) +|| [A0] MV SP,FP + [A0] LDW *${CTX}[0],$A ; load A-E... +|| [A0] AND B0,SP,SP ; align stack at 64 bytes + [A0] LDW *${CTX}[1],$B +|| [A0] SUBAW SP,2,SP ; reserve two words above buffer + [A0] LDW *${CTX}[2],$C +|| [A0] MVK 0x00404,B0 + [A0] LDW *${CTX}[3],$D +|| [A0] MVKH 0x50000,B0 ; 0x050404, 64 bytes for $XP[AB] + [A0] LDW *${CTX}[4],$E +|| [A0] MVC B0,AMR ; setup circular addressing + LDNW *${INP}++,$TX1 ; pre-fetch input + NOP 1 + +loop?: + MVK 0x00007999,$K +|| ADDAW SP,2,$XPA +|| SUB A0,1,A0 +|| MVK 13,B0 + MVKH 0x5a820000,$K ; K_00_19 +|| ADDAW SP,2,$XPB +|| MV $A,$Actx +|| MV $B,$Bctx +;;================================================== + SPLOOPD 5 ; BODY_00_13 +|| MV $C,$Cctx +|| MV $D,$Dctx +|| MV $E,$Ectx +|| MVC B0,ILC + + ROTL $A,5,$Arot +|| AND $C,$B,$F +|| ANDN $D,$B,$F0 +|| ADD $K,$E,$T ; T=E+K + + XOR $F0,$F,$F ; F_00_19(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C +|| SWAP2 $TX1,$TX2 +|| LDNW *${INP}++,$TX1 + + ADD $F,$T,$T ; T+=F_00_19(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| SWAP4 $TX2,$TX3 ; byte swap + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A + + ADD $TX3,$T,$A ; A=T+Xi +|| STW $TX3,*${XPB}++ + SPKERNEL +;;================================================== + ROTL $A,5,$Arot ; BODY_14 +|| AND $C,$B,$F +|| ANDN $D,$B,$F0 +|| ADD $K,$E,$T ; T=E+K + + XOR $F0,$F,$F ; F_00_19(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C +|| SWAP2 $TX1,$TX2 +|| LDNW *${INP}++,$TX1 + + ADD $F,$T,$T ; T+=F_00_19(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| SWAP4 $TX2,$TX2 ; byte swap +|| LDW *${XPA}++,$X0 ; fetches from X ring buffer are +|| LDW *${XPB}[4],$X2 ; 2 iterations ahead + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ +;;================================================== + ROTL $A,5,$Arot ; BODY_15 +|| AND $C,$B,$F +|| ANDN $D,$B,$F0 +|| ADD $K,$E,$T ; T=E+K + + XOR $F0,$F,$F ; F_00_19(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C +|| SWAP2 $TX1,$TX2 + + ADD $F,$T,$T ; T+=F_00_19(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| SWAP4 $TX2,$TX2 ; byte swap +|| XOR $X0,$X2,$TX0 ; Xupdate XORs are 1 iteration ahead +|| LDW *${XPA}++,$X0 +|| LDW *${XPB}[4],$X2 + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| XOR $X8,$X13,$TX1 +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ +|| XOR $TX0,$TX1,$TX1 +|| MVK 3,B0 +;;================================================== + SPLOOPD 5 ; BODY_16_19 +|| MVC B0,ILC + + ROTL $A,5,$Arot +|| AND $C,$B,$F +|| ANDN $D,$B,$F0 +|| ADD $K,$E,$T ; T=E+K +|| ROTL $TX1,1,$TX2 ; Xupdate output + + XOR $F0,$F,$F ; F_00_19(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C + + ADD $F,$T,$T ; T+=F_00_19(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| XOR $X0,$X2,$TX0 +|| LDW *${XPA}++,$X0 +|| LDW *${XPB}[4],$X2 + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| XOR $X8,$X13,$TX1 +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ +|| XOR $TX0,$TX1,$TX1 + SPKERNEL + + MVK 0xffffeba1,$K +|| MVK 19,B0 + MVKH 0x6ed90000,$K ; K_20_39 +___ +sub BODY_20_39 { +$code.=<<___; +;;================================================== + SPLOOPD 5 ; BODY_20_39 +|| MVC B0,ILC + + ROTL $A,5,$Arot +|| XOR $B,$C,$F +|| ADD $K,$E,$T ; T=E+K +|| ROTL $TX1,1,$TX2 ; Xupdate output + + XOR $D,$F,$F ; F_20_39(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C + + ADD $F,$T,$T ; T+=F_20_39(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| XOR $X0,$X2,$TX0 +|| LDW *${XPA}++,$X0 +|| LDW *${XPB}[4],$X2 + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| XOR $X8,$X13,$TX1 +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ ; last one is redundant +|| XOR $TX0,$TX1,$TX1 + SPKERNEL +___ +$code.=<<___ if (!shift); + MVK 0xffffbcdc,$K + MVKH 0x8f1b0000,$K ; K_40_59 +___ +} &BODY_20_39(); +$code.=<<___; +;;================================================== + SPLOOPD 5 ; BODY_40_59 +|| MVC B0,ILC +|| AND $B,$C,$F +|| AND $B,$D,$F0 + + ROTL $A,5,$Arot +|| XOR $F0,$F,$F +|| AND $C,$D,$F0 +|| ADD $K,$E,$T ; T=E+K +|| ROTL $TX1,1,$TX2 ; Xupdate output + + XOR $F0,$F,$F ; F_40_59(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C + + ADD $F,$T,$T ; T+=F_40_59(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| XOR $X0,$X2,$TX0 +|| LDW *${XPA}++,$X0 +|| LDW *${XPB}[4],$X2 + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| XOR $X8,$X13,$TX1 +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ +|| XOR $TX0,$TX1,$TX1 +|| AND $B,$C,$F +|| AND $B,$D,$F0 + SPKERNEL + + MVK 0xffffc1d6,$K +|| MVK 18,B0 + MVKH 0xca620000,$K ; K_60_79 +___ + &BODY_20_39(-1); # BODY_60_78 +$code.=<<___; +;;================================================== + [A0] B loop? +|| ROTL $A,5,$Arot ; BODY_79 +|| XOR $B,$C,$F +|| ROTL $TX1,1,$TX2 ; Xupdate output + + [A0] LDNW *${INP}++,$TX1 ; pre-fetch input +|| ADD $K,$E,$T ; T=E+K +|| XOR $D,$F,$F ; F_20_39(B,C,D) + + ADD $F,$T,$T ; T+=F_20_39(B,C,D) +|| ADD $Ectx,$D,$E ; E=D,E+=Ectx +|| ADD $Dctx,$C,$D ; D=C,D+=Dctx +|| ROTL $B,30,$C ; C=ROL(B,30) + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| ADD $Bctx,$A,$B ; B=A,B+=Bctx + + ADD $TX2,$T,$A ; A=T+Xi + + ADD $Actx,$A,$A ; A+=Actx +|| ADD $Cctx,$C,$C ; C+=Cctx +;; end of loop? + + BNOP RA ; return +|| MV FP,SP ; restore stack pointer +|| LDW *FP[0],FP ; restore frame pointer + STW $A,*${CTX}[0] ; emit A-E... +|| MVK 0,B0 + STW $B,*${CTX}[1] +|| MVC B0,AMR ; clear AMR + STW $C,*${CTX}[2] + STW $D,*${CTX}[3] + STW $E,*${CTX}[4] + .endasmfunc + + .sect .const + .cstring "SHA1 block transform for C64x+, CRYPTOGAMS by " + .align 4 +___ + +print $code; +close STDOUT; diff --git a/crypto/sha/asm/sha256-c64xplus.pl b/crypto/sha/asm/sha256-c64xplus.pl new file mode 100644 index 0000000000..8b92c84555 --- /dev/null +++ b/crypto/sha/asm/sha256-c64xplus.pl @@ -0,0 +1,292 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA256 for C64x+. +# +# January 2012 +# +# Performance is just below 10 cycles per processed byte, which is +# almost 40% faster than compiler-generated code. Unroll is unlikely +# to give more than ~8% improvement... +# +# !!! Note that this module uses AMR, which means that all interrupt +# service routines are expected to preserve it and for own well-being +# zero it upon entry. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($CTXA,$INP,$NUM) = ("A4","B4","A6"); # arguments + $K256="A3"; + +($A,$Actx,$B,$Bctx,$C,$Cctx,$D,$Dctx,$T2,$S0,$s1,$t0a,$t1a,$t2a,$X9,$X14) + =map("A$_",(16..31)); +($E,$Ectx,$F,$Fctx,$G,$Gctx,$H,$Hctx,$T1,$S1,$s0,$t0e,$t1e,$t2e,$X1,$X15) + =map("B$_",(16..31)); + +($Xia,$Xib)=("A5","B5"); # circular/ring buffer + $CTXB=$t2e; + +($Xn,$X0,$K)=("B7","B8","B9"); +($Maj,$Ch)=($T2,"B6"); + +$code.=<<___; + .text + + .asg B3,RA + .asg A15,FP + .asg B15,SP + + .if .BIG_ENDIAN + .asg SWAP2,MV + .asg SWAP4,MV + .endif + + .global _sha256_block_data_order +_sha256_block_data_order: + .asmfunc stack_usage(64) + MV $NUM,A0 ; reassign $NUM +|| MVK -64,B0 + [!A0] BNOP RA ; if ($NUM==0) return; +|| [A0] STW FP,*SP--[16] ; save frame pointer and alloca(64) +|| [A0] MV SP,FP + [A0] ADDKPC _sha256_block_data_order,B2 +|| [A0] AND B0,SP,SP ; align stack at 64 bytes + [A0] MVK 0x00404,B1 +|| [A0] MVKL (K256-_sha256_block_data_order),$K256 + [A0] MVKH 0x50000,B1 +|| [A0] MVKH (K256-_sha256_block_data_order),$K256 + [A0] MVC B1,AMR ; setup circular addressing +|| [A0] MV SP,$Xia + [A0] MV SP,$Xib +|| [A0] ADD B2,$K256,$K256 +|| [A0] MV $CTXA,$CTXB +|| [A0] SUBAW SP,2,SP ; reserve two words above buffer + LDW *${CTXA}[0],$A ; load ctx +|| LDW *${CTXB}[4],$E + LDW *${CTXA}[1],$B +|| LDW *${CTXB}[5],$F + LDW *${CTXA}[2],$C +|| LDW *${CTXB}[6],$G + LDW *${CTXA}[3],$D +|| LDW *${CTXB}[7],$H + + LDNW *$INP++,$Xn ; pre-fetch input + LDW *$K256++,$K ; pre-fetch K256[0] + MVK 14,B0 ; loop counters + MVK 47,B1 +|| ADDAW $Xia,9,$Xia +outerloop?: + SUB A0,1,A0 +|| MV $A,$Actx +|| MV $E,$Ectx +|| MVD $B,$Bctx +|| MVD $F,$Fctx + MV $C,$Cctx +|| MV $G,$Gctx +|| MVD $D,$Dctx +|| MVD $H,$Hctx +|| SWAP4 $Xn,$X0 + + SPLOOPD 8 ; BODY_00_14 +|| MVC B0,ILC +|| SWAP2 $X0,$X0 + + LDNW *$INP++,$Xn +|| ROTL $A,30,$S0 +|| OR $A,$B,$Maj +|| AND $A,$B,$t2a +|| ROTL $E,26,$S1 +|| AND $F,$E,$Ch +|| ANDN $G,$E,$t2e + ROTL $A,19,$t0a +|| AND $C,$Maj,$Maj +|| ROTL $E,21,$t0e +|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g) + ROTL $A,10,$t1a +|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b) +|| ROTL $E,7,$t1e +|| ADD $K,$H,$T1 ; T1 = h + K256[i] + ADD $X0,$T1,$T1 ; T1 += X[i]; +|| STW $X0,*$Xib++ +|| XOR $t0a,$S0,$S0 +|| XOR $t0e,$S1,$S1 + XOR $t1a,$S0,$S0 ; Sigma0(a) +|| XOR $t1e,$S1,$S1 ; Sigma1(e) +|| LDW *$K256++,$K ; pre-fetch K256[i+1] +|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g) + ADD $S1,$T1,$T1 ; T1 += Sigma1(e) +|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c) +|| ROTL $G,0,$H ; h = g +|| MV $F,$G ; g = f +|| MV $X0,$X14 +|| SWAP4 $Xn,$X0 + SWAP2 $X0,$X0 +|| MV $E,$F ; f = e +|| ADD $D,$T1,$E ; e = d + T1 +|| MV $C,$D ; d = c + MV $B,$C ; c = b +|| MV $A,$B ; b = a +|| ADD $T1,$T2,$A ; a = T1 + T2 + SPKERNEL + + ROTL $A,30,$S0 ; BODY_15 +|| OR $A,$B,$Maj +|| AND $A,$B,$t2a +|| ROTL $E,26,$S1 +|| AND $F,$E,$Ch +|| ANDN $G,$E,$t2e +|| LDW *${Xib}[1],$Xn ; modulo-scheduled + ROTL $A,19,$t0a +|| AND $C,$Maj,$Maj +|| ROTL $E,21,$t0e +|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g) +|| LDW *${Xib}[2],$X1 ; modulo-scheduled + ROTL $A,10,$t1a +|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b) +|| ROTL $E,7,$t1e +|| ADD $K,$H,$T1 ; T1 = h + K256[i] + ADD $X0,$T1,$T1 ; T1 += X[i]; +|| STW $X0,*$Xib++ +|| XOR $t0a,$S0,$S0 +|| XOR $t0e,$S1,$S1 + XOR $t1a,$S0,$S0 ; Sigma0(a) +|| XOR $t1e,$S1,$S1 ; Sigma1(e) +|| LDW *$K256++,$K ; pre-fetch K256[i+1] +|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g) + ADD $S1,$T1,$T1 ; T1 += Sigma1(e) +|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c) +|| ROTL $G,0,$H ; h = g +|| MV $F,$G ; g = f +|| MV $X0,$X15 + MV $E,$F ; f = e +|| ADD $D,$T1,$E ; e = d + T1 +|| MV $C,$D ; d = c +|| MV $Xn,$X0 ; modulo-scheduled +|| LDW *$Xia,$X9 ; modulo-scheduled +|| ROTL $X1,25,$t0e ; modulo-scheduled +|| ROTL $X14,15,$t0a ; modulo-scheduled + SHRU $X1,3,$s0 ; modulo-scheduled +|| SHRU $X14,10,$s1 ; modulo-scheduled +|| ROTL $B,0,$C ; c = b +|| MV $A,$B ; b = a +|| ADD $T1,$T2,$A ; a = T1 + T2 + + SPLOOPD 10 ; BODY_16_63 +|| MVC B1,ILC +|| ROTL $X1,14,$t1e ; modulo-scheduled +|| ROTL $X14,13,$t1a ; modulo-scheduled + + XOR $t0e,$s0,$s0 +|| XOR $t0a,$s1,$s1 +|| MV $X15,$X14 +|| MV $X1,$Xn + XOR $t1e,$s0,$s0 ; sigma0(X[i+1]) +|| XOR $t1a,$s1,$s1 ; sigma1(X[i+14]) +|| LDW *${Xib}[2],$X1 ; module-scheduled + ROTL $A,30,$S0 +|| OR $A,$B,$Maj +|| AND $A,$B,$t2a +|| ROTL $E,26,$S1 +|| AND $F,$E,$Ch +|| ANDN $G,$E,$t2e +|| ADD $X9,$X0,$X0 ; X[i] += X[i+9] + ROTL $A,19,$t0a +|| AND $C,$Maj,$Maj +|| ROTL $E,21,$t0e +|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g) +|| ADD $s0,$X0,$X0 ; X[i] += sigma1(X[i+1]) + ROTL $A,10,$t1a +|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b) +|| ROTL $E,7,$t1e +|| ADD $H,$K,$T1 ; T1 = h + K256[i] +|| ADD $s1,$X0,$X0 ; X[i] += sigma1(X[i+14]) + XOR $t0a,$S0,$S0 +|| XOR $t0e,$S1,$S1 +|| ADD $X0,$T1,$T1 ; T1 += X[i] +|| STW $X0,*$Xib++ + XOR $t1a,$S0,$S0 ; Sigma0(a) +|| XOR $t1e,$S1,$S1 ; Sigma1(e) +|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g) +|| MV $X0,$X15 +|| ROTL $G,0,$H ; h = g +|| LDW *$K256++,$K ; pre-fetch K256[i+1] + ADD $S1,$T1,$T1 ; T1 += Sigma1(e) +|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c) +|| MV $F,$G ; g = f +|| MV $Xn,$X0 ; modulo-scheduled +|| LDW *++$Xia,$X9 ; modulo-scheduled +|| ROTL $X1,25,$t0e ; module-scheduled +|| ROTL $X14,15,$t0a ; modulo-scheduled + ROTL $X1,14,$t1e ; modulo-scheduled +|| ROTL $X14,13,$t1a ; modulo-scheduled +|| MV $E,$F ; f = e +|| ADD $D,$T1,$E ; e = d + T1 +|| MV $C,$D ; d = c +|| MV $B,$C ; c = b + MV $A,$B ; b = a +|| ADD $T1,$T2,$A ; a = T1 + T2 +|| SHRU $X1,3,$s0 ; modulo-scheduled +|| SHRU $X14,10,$s1 ; modulo-scheduled + SPKERNEL + + [A0] B outerloop? +|| [A0] LDNW *$INP++,$Xn ; pre-fetch input +|| [A0] ADDK -260,$K256 ; rewind K256 +|| ADD $Actx,$A,$A ; accumulate ctx +|| ADD $Ectx,$E,$E +|| ADD $Bctx,$B,$B + ADD $Fctx,$F,$F +|| ADD $Cctx,$C,$C +|| ADD $Gctx,$G,$G +|| ADD $Dctx,$D,$D +|| ADD $Hctx,$H,$H +|| [A0] LDW *$K256++,$K ; pre-fetch K256[0] + + [!A0] BNOP RA +||[!A0] MV $CTXA,$CTXB + [!A0] MV FP,SP ; restore stack pointer +||[!A0] LDW *FP[0],FP ; restore frame pointer + [!A0] STW $A,*${CTXA}[0] ; save ctx +||[!A0] STW $E,*${CTXB}[4] +||[!A0] MVK 0,B0 + [!A0] STW $B,*${CTXA}[1] +||[!A0] STW $F,*${CTXB}[5] +||[!A0] MVC B0,AMR ; clear AMR + STW $C,*${CTXA}[2] +|| STW $G,*${CTXB}[6] + STW $D,*${CTXA}[3] +|| STW $H,*${CTXB}[7] + .endasmfunc + + .sect ".const:sha_asm" + .align 128 +K256: + .uword 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .uword 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .uword 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .uword 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .uword 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .uword 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .uword 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .uword 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .uword 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .uword 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .uword 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .uword 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .uword 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .uword 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .uword 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .uword 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + .cstring "SHA256 block transform for C64x+, CRYPTOGAMS by " + .align 4 + +___ + +print $code; diff --git a/crypto/sha/asm/sha512-c64xplus.pl b/crypto/sha/asm/sha512-c64xplus.pl new file mode 100644 index 0000000000..56c8583bf3 --- /dev/null +++ b/crypto/sha/asm/sha512-c64xplus.pl @@ -0,0 +1,410 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA512 for C64x+. +# +# January 2012 +# +# Performance is 19 cycles per processed byte. Compared to block +# transform function from sha512.c compiled with cl6x with -mv6400+ +# -o2 -DOPENSSL_SMALL_FOOTPRINT it's almost 7x faster and 2x smaller. +# Loop unroll won't make it, this implementation, any faster, because +# it's effectively dominated by SHRU||SHL pairs and you can't schedule +# more of them. +# +# !!! Note that this module uses AMR, which means that all interrupt +# service routines are expected to preserve it and for own well-being +# zero it upon entry. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($CTXA,$INP,$NUM) = ("A4","B4","A6"); # arguments + $K512="A3"; + +($Ahi,$Actxhi,$Bhi,$Bctxhi,$Chi,$Cctxhi,$Dhi,$Dctxhi, + $Ehi,$Ectxhi,$Fhi,$Fctxhi,$Ghi,$Gctxhi,$Hhi,$Hctxhi)=map("A$_",(16..31)); +($Alo,$Actxlo,$Blo,$Bctxlo,$Clo,$Cctxlo,$Dlo,$Dctxlo, + $Elo,$Ectxlo,$Flo,$Fctxlo,$Glo,$Gctxlo,$Hlo,$Hctxlo)=map("B$_",(16..31)); + +($S1hi,$CHhi,$S0hi,$t0hi)=map("A$_",(10..13)); +($S1lo,$CHlo,$S0lo,$t0lo)=map("B$_",(10..13)); +($T1hi, $T2hi)= ("A6","A7"); +($T1lo,$T1carry,$T2lo,$T2carry)=("B6","B7","B8","B9"); +($Khi,$Klo)=("A9","A8"); +($MAJhi,$MAJlo)=($T2hi,$T2lo); +($t1hi,$t1lo)=($Khi,"B2"); + $CTXB=$t1lo; + +($Xihi,$Xilo)=("A5","B5"); # circular/ring buffer + +$code.=<<___; + .text + + .asg B3,RA + .asg A15,FP + .asg B15,SP + + .if .BIG_ENDIAN + .asg $Khi,KHI + .asg $Klo,KLO + .else + .asg $Khi,KLO + .asg $Klo,KHI + .endif + + .global _sha512_block_data_order +_sha512_block_data_order: + .asmfunc stack_usage(40+128) + MV $NUM,A0 ; reassign $NUM +|| MVK -128,B0 + [!A0] BNOP RA ; if ($NUM==0) return; +|| [A0] STW FP,*SP--(40) ; save frame pointer +|| [A0] MV SP,FP + [A0] STDW B13:B12,*SP[4] +|| [A0] MVK 0x00404,B1 + [A0] STDW B11:B10,*SP[3] +|| [A0] STDW A13:A12,*FP[-3] +|| [A0] MVKH 0x60000,B1 + [A0] STDW A11:A10,*SP[1] +|| [A0] MVC B1,AMR ; setup circular addressing +|| [A0] ADD B0,SP,SP ; alloca(128) + [A0] AND B0,SP,SP ; align stack at 128 bytes +|| [A0] ADDKPC _sha512_block_data_order,B1 +|| [A0] MVKL (K512-_sha512_block_data_order),$K512 + [A0] MVKH (K512-_sha512_block_data_order),$K512 +|| [A0] SUBAW SP,2,SP ; reserve two words above buffer + ADDAW SP,3,$Xilo + ADDAW SP,2,$Xihi + +|| MV $CTXA,$CTXB + LDW *${CTXA}[0^.LITTLE_ENDIAN],$Ahi ; load ctx +|| LDW *${CTXB}[1^.LITTLE_ENDIAN],$Alo +|| ADD B1,$K512,$K512 + LDW *${CTXA}[2^.LITTLE_ENDIAN],$Bhi +|| LDW *${CTXB}[3^.LITTLE_ENDIAN],$Blo + LDW *${CTXA}[4^.LITTLE_ENDIAN],$Chi +|| LDW *${CTXB}[5^.LITTLE_ENDIAN],$Clo + LDW *${CTXA}[6^.LITTLE_ENDIAN],$Dhi +|| LDW *${CTXB}[7^.LITTLE_ENDIAN],$Dlo + LDW *${CTXA}[8^.LITTLE_ENDIAN],$Ehi +|| LDW *${CTXB}[9^.LITTLE_ENDIAN],$Elo + LDW *${CTXA}[10^.LITTLE_ENDIAN],$Fhi +|| LDW *${CTXB}[11^.LITTLE_ENDIAN],$Flo + LDW *${CTXA}[12^.LITTLE_ENDIAN],$Ghi +|| LDW *${CTXB}[13^.LITTLE_ENDIAN],$Glo + LDW *${CTXA}[14^.LITTLE_ENDIAN],$Hhi +|| LDW *${CTXB}[15^.LITTLE_ENDIAN],$Hlo + + LDNDW *$INP++,B11:B10 ; pre-fetch input + LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0] +outerloop?: + MVK 15,B0 ; loop counters +|| MVK 64,B1 +|| SUB A0,1,A0 + MV $Ahi,$Actxhi +|| MV $Alo,$Actxlo +|| MV $Bhi,$Bctxhi +|| MV $Blo,$Bctxlo +|| MV $Chi,$Cctxhi +|| MV $Clo,$Cctxlo +|| MVD $Dhi,$Dctxhi +|| MVD $Dlo,$Dctxlo + MV $Ehi,$Ectxhi +|| MV $Elo,$Ectxlo +|| MV $Fhi,$Fctxhi +|| MV $Flo,$Fctxlo +|| MV $Ghi,$Gctxhi +|| MV $Glo,$Gctxlo +|| MVD $Hhi,$Hctxhi +|| MVD $Hlo,$Hctxlo +loop0_15?: + .if .BIG_ENDIAN + MV B11,$T1hi +|| MV B10,$T1lo + .else + SWAP4 B10,$T1hi +|| SWAP4 B11,$T1lo + SWAP2 $T1hi,$T1hi +|| SWAP2 $T1lo,$T1lo + .endif +loop16_79?: + STW $T1hi,*$Xihi++[2] +|| STW $T1lo,*$Xilo++[2] ; X[i] = T1 +|| ADD $Hhi,$T1hi,$T1hi +|| ADDU $Hlo,$T1lo,$T1carry:$T1lo ; T1 += h +|| SHRU $Ehi,14,$S1hi +|| SHL $Ehi,32-14,$S1lo + XOR $Fhi,$Ghi,$CHhi +|| XOR $Flo,$Glo,$CHlo +|| ADD KHI,$T1hi,$T1hi +|| ADDU KLO,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += K512[i] +|| SHRU $Elo,14,$t0lo +|| SHL $Elo,32-14,$t0hi + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| AND $Ehi,$CHhi,$CHhi +|| AND $Elo,$CHlo,$CHlo +|| ROTL $Ghi,0,$Hhi +|| ROTL $Glo,0,$Hlo ; h = g +|| SHRU $Ehi,18,$t0hi +|| SHL $Ehi,32-18,$t0lo + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| XOR $Ghi,$CHhi,$CHhi +|| XOR $Glo,$CHlo,$CHlo ; Ch(e,f,g) = ((f^g)&e)^g +|| ROTL $Fhi,0,$Ghi +|| ROTL $Flo,0,$Glo ; g = f +|| SHRU $Elo,18,$t0lo +|| SHL $Elo,32-18,$t0hi + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| OR $Ahi,$Bhi,$MAJhi +|| OR $Alo,$Blo,$MAJlo +|| ROTL $Ehi,0,$Fhi +|| ROTL $Elo,0,$Flo ; f = e +|| SHRU $Ehi,41-32,$t0lo +|| SHL $Ehi,64-41,$t0hi + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| AND $Chi,$MAJhi,$MAJhi +|| AND $Clo,$MAJlo,$MAJlo +|| ROTL $Dhi,0,$Ehi +|| ROTL $Dlo,0,$Elo ; e = d +|| SHRU $Elo,41-32,$t0hi +|| SHL $Elo,64-41,$t0lo + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo ; Sigma1(e) +|| AND $Ahi,$Bhi,$t1hi +|| AND $Alo,$Blo,$t1lo +|| ROTL $Chi,0,$Dhi +|| ROTL $Clo,0,$Dlo ; d = c +|| SHRU $Ahi,28,$S0hi +|| SHL $Ahi,32-28,$S0lo + OR $t1hi,$MAJhi,$MAJhi +|| OR $t1lo,$MAJlo,$MAJlo ; Maj(a,b,c) = ((a|b)&c)|(a&b) +|| ADD $CHhi,$T1hi,$T1hi +|| ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Ch(e,f,g) +|| ROTL $Bhi,0,$Chi +|| ROTL $Blo,0,$Clo ; c = b +|| SHRU $Alo,28,$t0lo +|| SHL $Alo,32-28,$t0hi + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $S1hi,$T1hi,$T1hi +|| ADDU $S1lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Sigma1(e) +|| ROTL $Ahi,0,$Bhi +|| ROTL $Alo,0,$Blo ; b = a +|| SHRU $Ahi,34-32,$t0lo +|| SHL $Ahi,64-34,$t0hi + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $MAJhi,$T1hi,$T2hi +|| ADDU $MAJlo,$T1carry:$T1lo,$T2carry:$T2lo ; T2 = T1+Maj(a,b,c) +|| SHRU $Alo,34-32,$t0hi +|| SHL $Alo,64-34,$t0lo + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $Ehi,$T1hi,$T1hi +|| ADDU $Elo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += e +|| [B0] BNOP loop0_15? +|| SHRU $Ahi,39-32,$t0lo +|| SHL $Ahi,64-39,$t0hi + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| [B0] LDNDW *$INP++,B11:B10 ; pre-fetch input +||[!B1] BNOP break? +|| SHRU $Alo,39-32,$t0hi +|| SHL $Alo,64-39,$t0lo + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo ; Sigma0(a) +|| ADD $T1carry,$T1hi,$Ehi +|| MV $T1lo,$Elo ; e = T1 +||[!B0] LDW *${Xihi}[28],$T1hi +||[!B0] LDW *${Xilo}[28],$T1lo ; X[i+14] + ADD $S0hi,$T2hi,$T2hi +|| ADDU $S0lo,$T2carry:$T2lo,$T2carry:$T2lo ; T2 += Sigma0(a) +|| [B1] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[i] + NOP ; avoid cross-path stall + ADD $T2carry,$T2hi,$Ahi +|| MV $T2lo,$Alo ; a = T2 +|| [B0] SUB B0,1,B0 +;;===== branch to loop00_15? is taken here + NOP +;;===== branch to break? is taken here + LDW *${Xihi}[2],$T2hi +|| LDW *${Xilo}[2],$T2lo ; X[i+1] +|| SHRU $T1hi,19,$S1hi +|| SHL $T1hi,32-19,$S1lo + SHRU $T1lo,19,$t0lo +|| SHL $T1lo,32-19,$t0hi + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| SHRU $T1hi,61-32,$t0lo +|| SHL $T1hi,64-61,$t0hi + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| SHRU $T1lo,61-32,$t0hi +|| SHL $T1lo,64-61,$t0lo + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| SHRU $T1hi,6,$t0hi +|| SHL $T1hi,32-6,$t0lo + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| SHRU $T1lo,6,$t0lo +|| LDW *${Xihi}[18],$T1hi +|| LDW *${Xilo}[18],$T1lo ; X[i+9] + XOR $t0lo,$S1lo,$S1lo ; sigma1(Xi[i+14]) + +|| LDW *${Xihi}[0],$CHhi +|| LDW *${Xilo}[0],$CHlo ; X[i] +|| SHRU $T2hi,1,$S0hi +|| SHL $T2hi,32-1,$S0lo + SHRU $T2lo,1,$t0lo +|| SHL $T2lo,32-1,$t0hi + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| SHRU $T2hi,8,$t0hi +|| SHL $T2hi,32-8,$t0lo + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| SHRU $T2lo,8,$t0lo +|| SHL $T2lo,32-8,$t0hi + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $S1hi,$T1hi,$T1hi +|| ADDU $S1lo,$T1lo,$T1carry:$T1lo ; T1 = X[i+9]+sigma1() +|| [B1] BNOP loop16_79? +|| SHRU $T2hi,7,$t0hi +|| SHL $T2hi,32-7,$t0lo + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $CHhi,$T1hi,$T1hi +|| ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += X[i] +|| SHRU $T2lo,7,$t0lo + XOR $t0lo,$S0lo,$S0lo ; sigma0(Xi[i+1] + + ADD $S0hi,$T1hi,$T1hi +|| ADDU $S0lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += sigma0() +|| [B1] SUB B1,1,B1 + NOP ; avoid cross-path stall + ADD $T1carry,$T1hi,$T1hi +;;===== branch to loop16_79? is taken here + +break?: + ADD $Ahi,$Actxhi,$Ahi ; accumulate ctx +|| ADDU $Alo,$Actxlo,$Actxlo:$Alo +|| [A0] LDNDW *$INP++,B11:B10 ; pre-fetch input +|| [A0] ADDK -640,$K512 ; rewind pointer to K512 + ADD $Bhi,$Bctxhi,$Bhi +|| ADDU $Blo,$Bctxlo,$Bctxlo:$Blo +|| [A0] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0] + ADD $Chi,$Cctxhi,$Chi +|| ADDU $Clo,$Cctxlo,$Cctxlo:$Clo +|| ADD $Actxlo,$Ahi,$Ahi +||[!A0] MV $CTXA,$CTXB + ADD $Dhi,$Dctxhi,$Dhi +|| ADDU $Dlo,$Dctxlo,$Dctxlo:$Dlo +|| ADD $Bctxlo,$Bhi,$Bhi +||[!A0] STW $Ahi,*${CTXA}[0^.LITTLE_ENDIAN] ; save ctx +||[!A0] STW $Alo,*${CTXB}[1^.LITTLE_ENDIAN] + ADD $Ehi,$Ectxhi,$Ehi +|| ADDU $Elo,$Ectxlo,$Ectxlo:$Elo +|| ADD $Cctxlo,$Chi,$Chi +|| [A0] BNOP outerloop? +||[!A0] STW $Bhi,*${CTXA}[2^.LITTLE_ENDIAN] +||[!A0] STW $Blo,*${CTXB}[3^.LITTLE_ENDIAN] + ADD $Fhi,$Fctxhi,$Fhi +|| ADDU $Flo,$Fctxlo,$Fctxlo:$Flo +|| ADD $Dctxlo,$Dhi,$Dhi +||[!A0] STW $Chi,*${CTXA}[4^.LITTLE_ENDIAN] +||[!A0] STW $Clo,*${CTXB}[5^.LITTLE_ENDIAN] + ADD $Ghi,$Gctxhi,$Ghi +|| ADDU $Glo,$Gctxlo,$Gctxlo:$Glo +|| ADD $Ectxlo,$Ehi,$Ehi +||[!A0] STW $Dhi,*${CTXA}[6^.LITTLE_ENDIAN] +||[!A0] STW $Dlo,*${CTXB}[7^.LITTLE_ENDIAN] + ADD $Hhi,$Hctxhi,$Hhi +|| ADDU $Hlo,$Hctxlo,$Hctxlo:$Hlo +|| ADD $Fctxlo,$Fhi,$Fhi +||[!A0] STW $Ehi,*${CTXA}[8^.LITTLE_ENDIAN] +||[!A0] STW $Elo,*${CTXB}[9^.LITTLE_ENDIAN] + ADD $Gctxlo,$Ghi,$Ghi +||[!A0] STW $Fhi,*${CTXA}[10^.LITTLE_ENDIAN] +||[!A0] STW $Flo,*${CTXB}[11^.LITTLE_ENDIAN] + ADD $Hctxlo,$Hhi,$Hhi +||[!A0] STW $Ghi,*${CTXA}[12^.LITTLE_ENDIAN] +||[!A0] STW $Glo,*${CTXB}[13^.LITTLE_ENDIAN] +;;===== branch to outerloop? is taken here + + STW $Hhi,*${CTXA}[14^.LITTLE_ENDIAN] +|| STW $Hlo,*${CTXB}[15^.LITTLE_ENDIAN] +|| MVK -40,B0 + ADD FP,B0,SP ; destroy circular buffer +|| LDDW *FP[-4],A11:A10 + LDDW *SP[2],A13:A12 +|| LDDW *FP[-2],B11:B10 + LDDW *SP[4],B13:B12 +|| BNOP RA + LDW *++SP(40),FP ; restore frame pointer + MVK 0,B0 + MVC B0,AMR ; clear AMR + NOP 2 ; wait till FP is committed + .endasmfunc + + .sect ".const:sha_asm" + .align 128 +K512: + .uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd + .uword 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc + .uword 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019 + .uword 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118 + .uword 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe + .uword 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2 + .uword 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1 + .uword 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694 + .uword 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3 + .uword 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65 + .uword 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483 + .uword 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5 + .uword 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210 + .uword 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4 + .uword 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725 + .uword 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70 + .uword 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926 + .uword 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df + .uword 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8 + .uword 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b + .uword 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001 + .uword 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30 + .uword 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910 + .uword 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8 + .uword 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53 + .uword 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8 + .uword 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb + .uword 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3 + .uword 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60 + .uword 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec + .uword 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9 + .uword 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b + .uword 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207 + .uword 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178 + .uword 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6 + .uword 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b + .uword 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493 + .uword 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c + .uword 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a + .uword 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817 + .cstring "SHA512 block transform for C64x+, CRYPTOGAMS by " + .align 4 +___ + +print $code; +close STDOUT; diff --git a/crypto/uid.c b/crypto/uid.c index b1fd52bada..a1261731c9 100644 --- a/crypto/uid.c +++ b/crypto/uid.c @@ -65,7 +65,7 @@ int OPENSSL_issetugid(void) return issetugid(); } -#elif defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS) || defined(OPENSSL_SYS_NETWARE) +#elif defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS) || defined(OPENSSL_SYS_NETWARE) || defined(_TMS320C6X) int OPENSSL_issetugid(void) { diff --git a/e_os.h b/e_os.h index 79c1392573..6fec78d5ee 100644 --- a/e_os.h +++ b/e_os.h @@ -668,7 +668,7 @@ extern char *sys_errlist[]; extern int sys_nerr; #if defined(OPENSSL_SYS_WINDOWS) # define strcasecmp _stricmp # define strncasecmp _strnicmp -#elif defined(OPENSSL_SYS_VMS) +#elif defined(OPENSSL_SYS_VMS) || defined(OPENSSL_SYS_DSPBIOS) /* VMS below version 7.0 doesn't have strcasecmp() */ # include "o_str.h" # define strcasecmp OPENSSL_strcasecmp diff --git a/fips/aes/fips_aesavs.c b/fips/aes/fips_aesavs.c index ce07cac992..cc3ed6afb1 100644 --- a/fips/aes/fips_aesavs.c +++ b/fips/aes/fips_aesavs.c @@ -99,7 +99,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx, { const EVP_CIPHER *cipher = NULL; - if (strcasecmp(amode, "CBC") == 0) + if (fips_strcasecmp(amode, "CBC") == 0) { switch (akeysz) { @@ -117,7 +117,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx, } } - else if (strcasecmp(amode, "ECB") == 0) + else if (fips_strcasecmp(amode, "ECB") == 0) { switch (akeysz) { @@ -134,7 +134,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx, break; } } - else if (strcasecmp(amode, "CFB128") == 0) + else if (fips_strcasecmp(amode, "CFB128") == 0) { switch (akeysz) { @@ -169,7 +169,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx, break; } } - else if(!strcasecmp(amode,"CFB1")) + else if(!fips_strcasecmp(amode,"CFB1")) { switch (akeysz) { @@ -186,7 +186,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx, break; } } - else if(!strcasecmp(amode,"CFB8")) + else if(!fips_strcasecmp(amode,"CFB8")) { switch (akeysz) { @@ -215,7 +215,7 @@ static int AESTest(EVP_CIPHER_CTX *ctx, } if (FIPS_cipherinit(ctx, cipher, aKey, iVec, dir) <= 0) return 0; - if(!strcasecmp(amode,"CFB1")) + if(!fips_strcasecmp(amode,"CFB1")) M_EVP_CIPHER_CTX_set_flags(ctx, EVP_CIPH_FLAG_LENGTH_BITS); if (dir) FIPS_cipher(ctx, ciphertext, plaintext, len); @@ -874,11 +874,11 @@ int main(int argc, char **argv) if (argc > 1) { - if (strcasecmp(argv[1], "-d") == 0) + if (fips_strcasecmp(argv[1], "-d") == 0) { d_opt = 1; } - else if (strcasecmp(argv[1], "-f") == 0) + else if (fips_strcasecmp(argv[1], "-f") == 0) { d_opt = 0; } diff --git a/fips/aes/fips_gcmtest.c b/fips/aes/fips_gcmtest.c index 9f50857fb9..30e4bcc0f4 100644 --- a/fips/aes/fips_gcmtest.c +++ b/fips/aes/fips_gcmtest.c @@ -75,10 +75,11 @@ int main(int argc, char **argv) #include "fips_utl.h" +static char buf[204800]; +static char lbuf[204800]; + static void gcmtest(FILE *in, FILE *out, int encrypt) { - char buf[2048]; - char lbuf[2048]; char *keyword, *value; int keylen = -1, ivlen = -1, aadlen = -1, taglen = -1, ptlen = -1; int rv; @@ -266,8 +267,6 @@ static void gcmtest(FILE *in, FILE *out, int encrypt) static void xtstest(FILE *in, FILE *out) { - char buf[204800]; - char lbuf[204800]; char *keyword, *value; int inlen = 0; int encrypt = 0; @@ -340,8 +339,6 @@ static void xtstest(FILE *in, FILE *out) static void ccmtest(FILE *in, FILE *out) { - char buf[200048]; - char lbuf[200048]; char *keyword, *value; long l; unsigned char *Key = NULL, *Nonce = NULL; diff --git a/fips/dsa/fips_dsatest.c b/fips/dsa/fips_dsatest.c index 3c95d176b8..3ea600e4ab 100644 --- a/fips/dsa/fips_dsatest.c +++ b/fips/dsa/fips_dsatest.c @@ -62,8 +62,10 @@ #include #include #include +#ifndef NO_SYS_TYPES_H #include #include +#endif #include "e_os.h" diff --git a/fips/fips.c b/fips/fips.c index 36ac8d1b0c..8c9e187d7b 100644 --- a/fips/fips.c +++ b/fips/fips.c @@ -81,7 +81,7 @@ static int fips_started = 0; static int fips_is_owning_thread(void); static int fips_set_owning_thread(void); static int fips_clear_owning_thread(void); -static unsigned char *fips_signature_witness(void); +static const unsigned char *fips_signature_witness(void); #define fips_w_lock() CRYPTO_w_lock(CRYPTO_LOCK_FIPS) #define fips_w_unlock() CRYPTO_w_unlock(CRYPTO_LOCK_FIPS) @@ -148,6 +148,9 @@ void fips_set_selftest_fail(void) extern const void *FIPS_text_start(), *FIPS_text_end(); extern const unsigned char FIPS_rodata_start[], FIPS_rodata_end[]; +#ifdef _TMS320C6X +const +#endif unsigned char FIPS_signature [20] = { 0 }; __fips_constseg static const char FIPS_hmac_key[]="etaonrishdlcupfm"; @@ -413,9 +416,8 @@ int fips_clear_owning_thread(void) return ret; } -unsigned char *fips_signature_witness(void) +const unsigned char *fips_signature_witness(void) { - extern unsigned char FIPS_signature[]; return FIPS_signature; } diff --git a/fips/fips_canister.c b/fips/fips_canister.c index 7be48426d9..afe55a1bc7 100644 --- a/fips/fips_canister.c +++ b/fips/fips_canister.c @@ -35,6 +35,7 @@ const void *FIPS_text_end(void); (defined(__linux) && ((defined(__PPC__) && !defined(__PPC64__)) || \ defined(__arm__) || defined(__arm))) || \ (defined(__APPLE__) /* verified on all MacOS X & iOS flavors */)|| \ + (defined(_TMS320C6X)) || \ (defined(_WIN32) && defined(_MSC_VER)) # define FIPS_REF_POINT_IS_CROSS_COMPILER_AWARE # endif @@ -70,6 +71,10 @@ const unsigned int FIPS_text_startX[]= # pragma const_seg("fipsro$a") # pragma const_seg() __declspec(allocate("fipsro$a")) +# elif defined(_TMS320C6X) +# pragma CODE_SECTION(instruction_pointer,".fips_text:start") +# pragma CODE_SECTION(FIPS_ref_point,".fips_text:start") +# pragma DATA_SECTION(FIPS_rodata_start,".fips_const:start") # endif const unsigned int FIPS_rodata_start[]= { 0x46495053, 0x5f726f64, 0x6174615f, 0x73746172 }; @@ -87,6 +92,10 @@ const unsigned int FIPS_text_endX[]= # pragma const_seg("fipsro$z") # pragma const_seg() __declspec(allocate("fipsro$z")) +# elif defined(_TMS320C6X) +# pragma CODE_SECTION(instruction_pointer,".fips_text:end") +# pragma CODE_SECTION(FIPS_ref_point,".fips_text:end") +# pragma DATA_SECTION(FIPS_rodata_end,".fips_const:end") # endif const unsigned int FIPS_rodata_end[]= { 0x46495053, 0x5f726f64, 0x6174615f, 0x656e645b }; diff --git a/fips/fips_premain.c b/fips/fips_premain.c index 7dc5246006..c68b464e31 100644 --- a/fips/fips_premain.c +++ b/fips/fips_premain.c @@ -53,6 +53,12 @@ int lib$initialize(); globaldef int (*lib_init_ref)() = lib$initialize; # pragma __standard +#elif defined(_TMS320C6X) +# if defined(__TI_EABI__) + asm("\t.sect \".init_array\"\n\t.align 4\n\t.field FINGERPRINT_premain,32"); +# else + asm("\t.sect \".pinit\"\n\t.align 4\n\t.field _FINGERPRINT_premain,32"); +# endif #elif 0 The rest has to be taken care of through command line: diff --git a/fips/fips_premain.c.sha1 b/fips/fips_premain.c.sha1 index b9fb5dfc1d..4dbfbeae69 100644 --- a/fips/fips_premain.c.sha1 +++ b/fips/fips_premain.c.sha1 @@ -1 +1 @@ -HMAC-SHA1(fips_premain.c)= 1eaf66f76187877ff403708a2948d240f92736a0 +HMAC-SHA1(fips_premain.c)= 65b20c3cec235cec85af848e1cd2dfdfa101804a diff --git a/fips/fipssyms.h b/fips/fipssyms.h index 5b1e188785..5719aeac2a 100644 --- a/fips/fipssyms.h +++ b/fips/fipssyms.h @@ -589,6 +589,7 @@ #define AES_encrypt fips_aes_encrypt #define AES_set_decrypt_key fips_aes_set_decrypt_key #define AES_set_encrypt_key fips_aes_set_encrypt_key +#define AES_ctr32_encrypt fips_aes_ctr32_encrypt #define BN_from_montgomery fips_bn_from_montgomery #define BN_num_bits_word FIPS_bn_num_bits_word #define DES_SPtrans fips_des_sptrans diff --git a/fips/rand/fips_rand.c b/fips/rand/fips_rand.c index 5fa052746d..9904d8aa6f 100644 --- a/fips/rand/fips_rand.c +++ b/fips/rand/fips_rand.c @@ -66,7 +66,7 @@ #include #include #include -#if !(defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS)) +#if !(defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VXWORKS) || defined(OPENSSL_SYSNAME_DSPBIOS)) # include #endif #if defined(OPENSSL_SYS_VXWORKS) @@ -237,6 +237,8 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr) #endif #elif defined(OPENSSL_SYS_VXWORKS) struct timespec ts; +#elif defined(OPENSSL_SYSNAME_DSPBIOS) + unsigned long long TSC, OPENSSL_rdtsc(); #else struct timeval tv; #endif @@ -270,6 +272,16 @@ void FIPS_get_timevec(unsigned char *buf, unsigned long *pctr) buf[5] = (unsigned char) ((ts.tv_nsec >> 8) & 0xff); buf[6] = (unsigned char) ((ts.tv_nsec >> 16) & 0xff); buf[7] = (unsigned char) ((ts.tv_nsec >> 24) & 0xff); +#elif defined(OPENSSL_SYSNAME_DSPBIOS) + TSC = OPENSSL_rdtsc(); + buf[0] = (unsigned char) (TSC & 0xff); + buf[1] = (unsigned char) ((TSC >> 8) & 0xff); + buf[2] = (unsigned char) ((TSC >> 16) & 0xff); + buf[3] = (unsigned char) ((TSC >> 24) & 0xff); + buf[4] = (unsigned char) ((TSC >> 32) & 0xff); + buf[5] = (unsigned char) ((TSC >> 40) & 0xff); + buf[6] = (unsigned char) ((TSC >> 48) & 0xff); + buf[7] = (unsigned char) ((TSC >> 56) & 0xff); #else gettimeofday(&tv,NULL); buf[0] = (unsigned char) (tv.tv_sec & 0xff); diff --git a/ms/do_fips.bat b/ms/do_fips.bat index 18a3578a26..357f8fc76e 100644 --- a/ms/do_fips.bat +++ b/ms/do_fips.bat @@ -1,15 +1,10 @@ -@echo off +rem @echo off -if X%CROSS_TARGET% == X goto detect - -echo Cross compiling for %CROSS_TARGET% -SET TARGET=%CROSS_TARGET% -SET ASM=%CROSS_ASM% -goto compile - -:detect SET ASM=%1 SET EXARG= +SET MFILE=ntdll.mak + +if NOT X%OSVERSION% == X goto wince if NOT X%PROCESSOR_ARCHITECTURE% == X goto defined @@ -50,6 +45,14 @@ SET TARGET=VC-WIN64A if x%ASM% == xno-asm goto compile SET ASM=nasm +goto compile + +:wince + +echo Auto Configuring for WinCE +SET TARGET=VC-CE +SET MFILE=cedll.mak + :compile if x%ASM% == xno-asm SET EXARG=no-asm @@ -60,13 +63,13 @@ echo on perl util\mkfiles.pl >MINFO @if ERRORLEVEL 1 goto error -perl util\mk1mf.pl dll %ASM% %TARGET% >ms\ntdll.mak +perl util\mk1mf.pl dll %ASM% %TARGET% >ms\%MFILE% @if ERRORLEVEL 1 goto error -nmake -f ms\ntdll.mak clean -nmake -f ms\ntdll.mak +nmake -f ms\%MFILE% clean +nmake -f ms\%MFILE% @if ERRORLEVEL 1 goto error -nmake -f ms\ntdll.mak install +nmake -f ms\%MFILE% install @if ERRORLEVEL 1 goto error @echo. diff --git a/test/fips_algvs.c b/test/fips_algvs.c index 36d7fb3338..ed0350720a 100644 --- a/test/fips_algvs.c +++ b/test/fips_algvs.c @@ -89,6 +89,7 @@ extern int fips_rsavtest_main(int argc, char **argv); extern int fips_shatest_main(int argc, char **argv); extern int fips_test_suite_main(int argc, char **argv); +#if !defined(_TMS320C6400_PLUS) #include "fips_aesavs.c" #include "fips_cmactest.c" #include "fips_desmovs.c" @@ -106,6 +107,28 @@ extern int fips_test_suite_main(int argc, char **argv); #include "fips_shatest.c" #include "fips_test_suite.c" +#else +#include "aes/fips_aesavs.c" +#include "cmac/fips_cmactest.c" +#include "des/fips_desmovs.c" +#include "dh/fips_dhvs.c" +#include "rand/fips_drbgvs.c" +#include "dsa/fips_dssvs.c" +#include "ecdh/fips_ecdhvs.c" +#include "ecdsa/fips_ecdsavs.c" +#include "aes/fips_gcmtest.c" +#include "hmac/fips_hmactest.c" +#include "rand/fips_rngvs.c" +#include "rsa/fips_rsagtest.c" +#include "rsa/fips_rsastest.c" +#include "rsa/fips_rsavtest.c" +#include "sha/fips_shatest.c" +#include "fips_test_suite.c" + +#pragma DATA_SECTION(aucCmBootDspLoad, "BootDspSection"); +volatile unsigned char aucCmBootDspLoad[8*1024]; +#endif + typedef struct { const char *name; @@ -221,7 +244,7 @@ static int run_prg(int argc, char **argv) int main(int argc, char **argv) { - char buf[1024]; + static char buf[1024]; char **args = argv + 1; const char *sname = "fipstests.sh"; ARGS arg; @@ -238,6 +261,10 @@ int main(int argc, char **argv) CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON); #endif +#if defined(_TMS320C6400_PLUS) + SysInit(); +#endif + if (*args && *args[0] != '-') { rv = run_prg(argc - 1, args); diff --git a/util/fips_standalone_sha1 b/util/fips_standalone_sha1 new file mode 100644 index 0000000000..ea2268cb4e --- /dev/null +++ b/util/fips_standalone_sha1 @@ -0,0 +1,32 @@ +#!/usr/bin/env perl +# +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + +unshift(@INC,$dir); +require "hmac_sha1.pl"; + +(!@ARV[0] && -f @ARGV[$#ARGV]) || die "usage: $0 [-verify] file"; + +$verify=shift if (@ARGV[0] eq "-verify"); + +sysopen(FD,@ARGV[0],0) || die "$!"; +binmode(FD); + +my $ctx = HMAC->Init("etaonrishdlcupfm"); + +while (read(FD,$blob,4*1024)) { $ctx->Update($blob); } + +close(FD); + +my $signature = unpack("H*",$ctx->Final()); + +print "HMAC-SHA1(@ARGV[0])= $signature\n"; + +if ($verify) { + open(FD,"<@ARGV[0].sha1") || die "$!"; + $line = ; + close(FD); + exit(0) if ($line =~ /HMAC\-SHA1\([^\)]*\)=\s*([0-9a-f]+)/i && + $1 eq $signature); + die "signature mismatch"; +} diff --git a/util/fipsas.pl b/util/fipsas.pl index 2734a20181..9dfe0d895c 100644 --- a/util/fipsas.pl +++ b/util/fipsas.pl @@ -8,9 +8,6 @@ my @ARGS = @ARGV; my $top = shift @ARGS; my $target = shift @ARGS; -my $tmptarg = $target; - -$tmptarg =~ s/\.[^\\\/\.]+$/.tmp/; my $runasm = 1; @@ -40,43 +37,31 @@ while () last if (/assembler/) } -# Store all renames. +# Store all renames [noting minimal length]. +my $minlen=0x10000; while () { - if (/^#define\s+(\w+)\s+(\w+)\b/) + if (/^#define\s+_?(\w+)\s+_?(\w+)\b/) { $edits{$1} = $2; + my $len = length($1); + $minlen = $len if ($len<$minlen); } } -my ($from, $to); +open(IN,"$target") || die "Can't open $target for reading"; -#delete any temp file lying around +@code = ; # suck in whole file -unlink $tmptarg; +close IN; -#rename target temporarily -my $rencnt = 0; -# On windows the previous file doesn't always close straight away -# so retry the rename operation a few times if it fails. -while (!rename($target, $tmptarg)) - { - sleep 2; - die "Can't rename $target" if ($rencnt++ > 10); - } +open(OUT,">$target") || die "Can't open $target for writing"; -#edit target -open(IN,$tmptarg) || die "Can't open temporary file"; -open(OUT, ">$target") || die "Can't open output file $target"; - -while () -{ - while (($from, $to) = each %edits) - { - s/(\b_*)$from(\b)/$1$to$2/g; - } - print OUT $_; -} +foreach $line (@code) + { + $line =~ s/\b(_?)(\w{$minlen,})\b/$1.($edits{$2} or $2)/geo; + print OUT $line; + } close OUT; @@ -87,14 +72,5 @@ if ($runasm) my $rv = $?; - # restore target - unlink $target; - rename $tmptarg, $target; - die "Error executing assembler!" if $rv != 0; } -else - { - # Don't care about target - unlink $tmptarg; - } diff --git a/util/fipsdist.pl b/util/fipsdist.pl index e10a2fe8cf..53f9d3e18a 100644 --- a/util/fipsdist.pl +++ b/util/fipsdist.pl @@ -58,7 +58,7 @@ while () } else { - next unless (/^(fips\/|crypto|util|test|include|ms)/); + next unless (/^(fips\/|crypto|util|test|include|ms|c6x)/); } if (/^crypto\/([^\/]+)/) { diff --git a/util/mk1mf.pl b/util/mk1mf.pl index e2e9ffd086..2325607e8f 100755 --- a/util/mk1mf.pl +++ b/util/mk1mf.pl @@ -249,6 +249,10 @@ elsif (($platform eq "netware-clib") || ($platform eq "netware-libc") || $BSDSOCK=1 if ($platform eq "netware-libc-bsdsock") || ($platform eq "netware-clib-bsdsock"); require 'netware.pl'; } +elsif ($platform eq "c64xplus") + { + require "TI_CGTOOLS.pl"; + } else { require "unix.pl"; @@ -730,7 +734,7 @@ LIBS_DEP=$libs_dep EOF $rules=<<"EOF"; -all: banner \$(TMP_D) \$(BIN_D) \$(TEST_D) \$(LIB_D) \$(INCO_D) headers \$(FIPS_SHA1_EXE) $build_targets +all: banner \$(TMP_D) \$(BIN_D) \$(TEST_D) \$(LIB_D) \$(INCO_D) headers $build_targets banner: $banner @@ -980,7 +984,7 @@ if ($fips) "\$(OBJ_D)${o}fips_start$obj", "\$(FIPSOBJ)", "\$(OBJ_D)${o}fips_end$obj", - "\$(FIPS_SHA1_EXE)", ""); + ""); # FIXME $rules.=&do_link_rule("\$(FIPS_SHA1_EXE)", "\$(OBJ_D)${o}fips_standalone_sha1$obj \$(OBJ_D)${o}sha1dgst$obj $sha1_asm_obj", @@ -1217,6 +1221,10 @@ sub do_compile_rule { $ret.=&Sasm_compile_target("$to${o}$n$obj",$s,$n); } + elsif (-f ($s="${d}${o}asm${o}${n}.asm")) + { + $ret.=&cc_compile_target("$to${o}$n$obj","$s",$ex); + } else { die "no rule for $_"; } } return($ret); diff --git a/util/pl/TI_CGTOOLS.pl b/util/pl/TI_CGTOOLS.pl new file mode 100644 index 0000000000..d12d318062 --- /dev/null +++ b/util/pl/TI_CGTOOLS.pl @@ -0,0 +1,274 @@ +#!/usr/local/bin/perl +# +# TI_CGTOOLS.pl, Texas Instruments CGTOOLS under Unix or MSYS. +# + +$ssl= "ssl"; +$crypto="crypto"; + +if ($fips && !$shlib) + { + $crypto="fips"; + $crypto_compat = "cryptocompat.lib"; + } +else + { + $crypto="crypto"; + } + +if ($fipscanisterbuild) + { + $fips_canister_path = "\$(LIB_D)/fipscanister.obj"; + } + +$o='/'; +$cp='cp'; +$cp2='$(PERL) util/copy.pl -stripcr'; +$mkdir='$(PERL) util/mkdir-p.pl'; +$rm='rm -f'; + +$zlib_lib="zlib1.lib"; + +# Santize -L options for ms link +$l_flags =~ s/-L("\[^"]+")/\/libpath:$1/g; +$l_flags =~ s/-L(\S+)/\/libpath:$1/g; + +# C compiler stuff +$cc='cl6x'; +$base_cflags= " $mf_cflag"; +my $f; +$opt_cflags=''; +$dbg_cflags=$f.' -g -DDEBUG -D_DEBUG'; +$lflags=''; + +*::cc_compile_target = sub { + my ($target,$source,$ex_flags)=@_; + my $ret; + + $ex_flags.=" -DMK1MF_BUILD" if ($source =~/cversion/); + $ret ="$target: \$(SRC_D)$o$source\n\t"; + if ($fipscanisterbuild && $source=~/\.asm$/) { + $ret.="\$(PERL) util${o}fipsas.pl . \$< norunasm \$(CFLAG)\n\t"; + } + $ret.="\$(CC) --obj_directory=\$(OBJ_D) $ex_flags -c \$(SRC_D)$o$source\n"; + $target =~ s/.*${o}([^${o}]+)/$1/; + $source =~ s/.*${o}([^${o}\.]+)\..*/$1${obj}/; + $ret.="\tmv \$(OBJ_D)${o}$source \$(OBJ_D)${o}$target\n" if ($target ne $source); + $ret.="\n"; + return($ret); +}; +*::perlasm_compile_target = sub { + my ($target,$source,$bname)=@_; + my $ret; + + $bname =~ s/(.*)\.[^\.]$/$1/; + $ret=<<___; +\$(TMP_D)$o$bname.asm: $source + \$(PERL) $source \$\@ +___ + $ret .= "\t\$(PERL) util${o}fipsas.pl . \$@ norunasm \$(CFLAG)\n" if $fipscanisterbuild; + + $ret.=<<___; + +$target: \$(TMP_D)$o$bname.asm + \$(ASM) --obj_directory=\$(OBJ_D) \$(TMP_D)$o$bname.asm + +___ +}; + +$mlflags=''; + +$out_def ="c6x"; +$tmp_def ="$out_def/tmp"; +$inc_def="$out_def/inc"; + +if ($debug) + { + $cflags=$dbg_cflags.$base_cflags; + } +else + { + $cflags=$opt_cflags.$base_cflags; + } + +$obj='.obj'; +$asm_suffix='.asm'; +$ofile=""; + +# EXE linking stuff +$link='$(CC) -z'; +$efile="-o "; +$exep='.out'; +$ex_libs=''; + +# static library stuff +$mklib='ar6x'; +$ranlib=''; +$plib=""; +$libp=".lib"; +$shlibp=($shlib)?".dll":".lib"; +$lfile='-o '; + +$shlib_ex_obj=""; +$asm='$(CC) $(CFLAG) -c'; + +$bn_asm_obj=''; +$bn_asm_src=''; +$des_enc_obj=''; +$des_enc_src=''; +$bf_enc_obj=''; +$bf_enc_src=''; + +if (!$no_asm) + { + import_asm($mf_bn_asm, "bn", \$bn_asm_obj, \$bn_asm_src); + import_asm($mf_aes_asm, "aes", \$aes_asm_obj, \$aes_asm_src); + import_asm($mf_des_asm, "des", \$des_enc_obj, \$des_enc_src); + import_asm($mf_bf_asm, "bf", \$bf_enc_obj, \$bf_enc_src); + import_asm($mf_cast_asm, "cast", \$cast_enc_obj, \$cast_enc_src); + import_asm($mf_rc4_asm, "rc4", \$rc4_enc_obj, \$rc4_enc_src); + import_asm($mf_rc5_asm, "rc5", \$rc5_enc_obj, \$rc5_enc_src); + import_asm($mf_md5_asm, "md5", \$md5_asm_obj, \$md5_asm_src); + import_asm($mf_sha_asm, "sha", \$sha1_asm_obj, \$sha1_asm_src); + import_asm($mf_rmd_asm, "ripemd", \$rmd160_asm_obj, \$rmd160_asm_src); + import_asm($mf_wp_asm, "whrlpool", \$whirlpool_asm_obj, \$whirlpool_asm_src); + import_asm($mf_modes_asm, "modes", \$modes_asm_obj, \$modes_asm_src); + import_asm($mf_cpuid_asm, "", \$cpuid_asm_obj, \$cpuid_asm_src); + $perl_asm = 1; + } + +sub do_lib_rule + { + my($objs,$target,$name,$shlib,$ign,$base_addr) = @_; + local($ret); + + $taget =~ s/\//$o/g if $o ne '/'; + my $base_arg; + if ($base_addr ne "") + { + $base_arg= " /base:$base_addr"; + } + else + { + $base_arg = ""; + } + if ($name ne "") + { + $name =~ tr/a-z/A-Z/; + $name = "/def:ms/${name}.def"; + } + +# $target="\$(LIB_D)$o$target"; +# $ret.="$target: $objs\n"; + if (!$shlib) + { +# $ret.="\t\$(RM) \$(O_$Name)\n"; + $ret.="$target: $objs\n"; + $ret.="\t\$(MKLIB) $lfile$target $objs\n"; + } + else + { + local($ex)=($target =~ /O_CRYPTO/)?'':' $(L_CRYPTO)'; + $ex.=" $zlib_lib" if $zlib_opt == 1 && $target =~ /O_CRYPTO/; + + if ($fips && $target =~ /O_CRYPTO/) + { + $ret.="$target: $objs \$(PREMAIN_DSO_EXE)"; + $ret.="\n\tFIPS_LINK=\"\$(LINK)\" \\\n"; + $ret.="\tFIPS_CC=\$(CC)\\\n"; + $ret.="\tFIPS_CC_ARGS=/Fo\$(OBJ_D)${o}fips_premain.obj \$(SHLIB_CFLAGS) -c\\\n"; + $ret.="\tPREMAIN_DSO_EXE=\$(PREMAIN_DSO_EXE)\\\n"; + $ret.="\tFIPS_SHA1_EXE=\$(FIPS_SHA1_EXE)\\\n"; + $ret.="\tFIPS_TARGET=$target\\\n"; + $ret.="\tFIPSLIB_D=\$(FIPSLIB_D)\\\n"; + $ret.="\t\$(FIPSLINK) \$(MLFLAGS) /map $base_arg $efile$target "; + $ret.="$name \$(SHLIB_EX_OBJ) $objs \$(EX_LIBS) "; + $ret.="\$(OBJ_D)${o}fips_premain.obj $ex\n"; + } + else + { + $ret.="$target: $objs"; + $ret.="\n\t\$(LINK) \$(MLFLAGS) $efile$target $name \$(SHLIB_EX_OBJ) $objs $ex \$(EX_LIBS)\n"; + } + + $ret.="\tIF EXIST \$@.manifest mt -nologo -manifest \$@.manifest -outputresource:\$@;2\n\n"; + } + $ret.="\n"; + return($ret); + } + +sub do_link_rule + { + my($target,$files,$dep_libs,$libs,$standalone)=@_; + local($ret,$_); + $file =~ s/\//$o/g if $o ne '/'; + $n=&bname($targer); + $ret.="$target: $files $dep_libs\n"; + if ($standalone == 1) + { + $ret.=" \$(LINK) \$(LFLAGS) $efile$target "; + $ret.= "\$(EX_LIBS) " if ($files =~ /O_FIPSCANISTER/ && !$fipscanisterbuild); + $ret.="$files $libs\n"; + } + elsif ($standalone == 2) + { + $ret.="\t\$(LINK) \$(LFLAGS) $efile$target $files \$(O_FIPSCANISTER) $out_def/application.cmd\n"; + $ret.="\t$out_def/incore6x $target\n\n"; + } + else + { + $ret.="\t\$(LINK) \$(LFLAGS) $efile$target "; + $ret.="\t\$(APP_EX_OBJ) $files $libs\n"; + } + return($ret); + } + +sub do_rlink_rule + { + local($target,$rl_start, $rl_mid, $rl_end,$dep_libs,$libs)=@_; + local($ret,$_); + my $files = "$rl_start $rl_mid $rl_end"; + + $file =~ s/\//$o/g if $o ne '/'; + $n=&bname($target); + $ret.="$target: $files $dep_libs\n"; + $ret.="\t\$(LINK) -r $lfile$target $files $out_def/fipscanister.cmd\n"; + $ret.="\t\$(PERL) $out_def${o}fips_standalone_sha1 $target > ${target}.sha1\n"; + $ret.="\t\$(PERL) util${o}copy.pl -stripcr fips${o}fips_premain.c \$(LIB_D)${o}fips_premain.c\n"; + $ret.="\t\$(CP) fips${o}fips_premain.c.sha1 \$(LIB_D)${o}fips_premain.c.sha1\n"; + $ret.="\n"; + return($ret); + } + +sub import_asm + { + my ($mf_var, $asm_name, $oref, $sref) = @_; + my $asm_dir; + if ($asm_name eq "") + { + $asm_dir = "crypto$o"; + } + else + { + $asm_dir = "crypto$o$asm_name$oasm$o"; + } + + $$oref = ""; + $$sref = ""; + $mf_var =~ s/\.o//g; + + foreach (split(/ /, $mf_var)) + { + $$sref .= $asm_dir . $_ . ".asm "; + } + foreach (split(/ /, $mf_var)) + { + $$oref .= "\$(TMP_D)\\" . $_ . ".obj "; + } + $$oref =~ s/ $//; + $$sref =~ s/ $//; + + } + + +1; diff --git a/util/pl/VC-32.pl b/util/pl/VC-32.pl index 24b8172ffc..aef3de23ea 100644 --- a/util/pl/VC-32.pl +++ b/util/pl/VC-32.pl @@ -174,12 +174,12 @@ $rsc="rc"; $efile="/out:"; $exep='.exe'; if ($no_sock) { $ex_libs=''; } -elsif ($FLAVOR =~ /CE/) { $ex_libs='winsock.lib'; } +elsif ($FLAVOR =~ /CE/) { $ex_libs='ws2.lib'; } else { $ex_libs='ws2_32.lib'; } if ($FLAVOR =~ /CE/) { - $ex_libs.=' $(WCECOMPAT)/lib/wcecompatex.lib' if (defined($ENV{'WCECOMPAT'})); + $ex_libs.=' $(WCECOMPAT)/lib/wcecompatex.lib crypt32.lib coredll.lib corelibc.lib' if (defined($ENV{'WCECOMPAT'})); $ex_libs.=' $(PORTSDK_LIBPATH)/portlib.lib' if (defined($ENV{'PORTSDK_LIBPATH'})); $ex_libs.=' /nodefaultlib:oldnames.lib coredll.lib corelibc.lib' if ($ENV{'TARGETCPU'} eq "X86"); } @@ -389,8 +389,9 @@ sub do_rlink_rule $file =~ s/\//$o/g if $o ne '/'; $n=&bname($targer); - $ret.="$target: $files $dep_libs \$(FIPS_SHA1_EXE)\n"; - $ret.="\t\$(PERL) ms\\segrenam.pl \$\$a $rl_start\n"; + $ret.="$target: $files $dep_libs"; + $ret.=" \$(FIPS_SHA1_EXE)" unless defined $ENV{"FIPS_SHA1_PATH"}; + $ret.="\n\t\$(PERL) ms\\segrenam.pl \$\$a $rl_start\n"; $ret.="\t\$(PERL) ms\\segrenam.pl \$\$b $rl_mid\n"; $ret.="\t\$(PERL) ms\\segrenam.pl \$\$c $rl_end\n"; $ret.="\t\$(MKLIB) $lfile$target @<<\n\t$files\n<<\n"; From 3e1beaf43e741b47c4bd03e864881d7294cebea7 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 14 Oct 2012 12:03:34 +0000 Subject: [PATCH 082/120] Add BSD-ppc85xx support and avoid copying overlapping buffers in fips_dssvs.c --- Configure | 2 ++ config | 34 ++++++++++++++++++++++------------ fips/aes/fips_gcmtest.c | 4 ++++ fips/dsa/fips_dssvs.c | 3 ++- fips/fips_canister.c | 1 + 5 files changed, 31 insertions(+), 13 deletions(-) diff --git a/Configure b/Configure index 90b108baaa..99b386b7c0 100755 --- a/Configure +++ b/Configure @@ -409,6 +409,8 @@ my %table=( "BSD-x86-elf", "gcc:-DL_ENDIAN -DTERMIOS -O3 -fomit-frame-pointer -Wall::${BSDthreads}:::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:bsd-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-BSD-x86-elf", "gcc:-DL_ENDIAN -DTERMIOS -O3 -Wall -g::${BSDthreads}:::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:bsd-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-sparcv8", "gcc:-DB_ENDIAN -DTERMIOS -O3 -mv8 -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${sparcv8_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"BSD-ppc85xx","gcc:-DTERMIOS -O3 -fomit-frame-pointer -msoft-float -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"debug-BSD-ppc85xx","gcc:-DTERMIOS -O0 -fomit-frame-pointer -msoft-float -Wall -g::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-generic64","gcc:-DTERMIOS -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # -DMD32_REG_T=int doesn't actually belong in sparc64 target, it diff --git a/config b/config index 851a161136..93dde30168 100755 --- a/config +++ b/config @@ -219,7 +219,11 @@ case "${SYSTEM}:${RELEASE}:${VERSION}:${MACHINE}" in ;; NetBSD:*:*:*386*) - echo "`(/usr/sbin/sysctl -n hw.model || /sbin/sysctl -n hw.model) | sed 's,.*\(.\)86-class.*,i\186,'`-whatever-netbsd"; exit 0 + if [ -z ${CROSS_COMPILE} ]; then + echo "`(/usr/sbin/sysctl -n hw.model || /sbin/sysctl -n hw.model) | sed 's,.*\(.\)86-class.*,i\186,'`-whatever-netbsd"; exit 0 + else + echo "${MACHINE}-whatever-netbsd"; exit 0 + fi ;; NetBSD:*) @@ -734,17 +738,23 @@ case "$GUESSOS" in sparc64-*-*bsd*) OUT="BSD-sparc64" ;; ia64-*-*bsd*) OUT="BSD-ia64" ;; amd64-*-*bsd*) OUT="BSD-x86_64" ;; - *86*-*-*bsd*) # mimic ld behaviour when it's looking for libc... - if [ -L /usr/lib/libc.so ]; then # [Free|Net]BSD - libc=/usr/lib/libc.so - else # OpenBSD - # ld searches for highest libc.so.* and so do we - libc=`(ls /usr/lib/libc.so.* | tail -1) 2>/dev/null` - fi - case "`(file -L $libc) 2>/dev/null`" in - *ELF*) OUT="BSD-x86-elf" ;; - *) OUT="BSD-x86"; options="$options no-sse2" ;; - esac ;; + *86*-*-*bsd*) if [ -z ${CROSS_COMPILE} ]; then + # mimic ld behaviour when it's looking for libc... + if [ -L /usr/lib/libc.so ]; then # [Free|Net]BSD + libc=/usr/lib/libc.so + else # OpenBSD + # ld searches for highest libc.so.* and so do we + libc=`(ls /usr/lib/libc.so.* | tail -1) 2>/dev/null` + fi + echo "libc = $libc" + case "`(file -L $libc) 2>/dev/null`" in + *ELF*) OUT="BSD-x86-elf" ;; + *) OUT="BSD-x86"; options="$options no-sse2" ;; + esac + else + OUT="BSD-x86-elf" + fi;; + ppc85xx-*-*bsd*) OUT="BSD-ppc85xx" ;; # MPC85XX has no hardware FP accelerator *-*-*bsd*) OUT="BSD-generic32" ;; *-*-osf) OUT="osf1-alpha-cc" ;; diff --git a/fips/aes/fips_gcmtest.c b/fips/aes/fips_gcmtest.c index 30e4bcc0f4..4000e0763f 100644 --- a/fips/aes/fips_gcmtest.c +++ b/fips/aes/fips_gcmtest.c @@ -208,6 +208,8 @@ static void gcmtest(FILE *in, FILE *out, int encrypt) ct = OPENSSL_malloc(ptlen); rv = FIPS_cipher(&ctx, ct, pt, ptlen); } + else + FIPS_cipher(&ctx, iv, iv, 0); FIPS_cipher(&ctx, NULL, NULL, 0); FIPS_cipher_ctx_ctrl(&ctx, EVP_CTRL_GCM_GET_TAG, taglen, tag); @@ -242,6 +244,8 @@ static void gcmtest(FILE *in, FILE *out, int encrypt) pt = OPENSSL_malloc(ptlen); rv = FIPS_cipher(&ctx, pt, ct, ptlen); } + else + FIPS_cipher(&ctx, iv, iv, 0); rv = FIPS_cipher(&ctx, NULL, NULL, 0); if (rv < 0) fprintf(out, "FAIL" RESP_EOL); diff --git a/fips/dsa/fips_dssvs.c b/fips/dsa/fips_dssvs.c index e2f2297f00..cee5fb398e 100644 --- a/fips/dsa/fips_dssvs.c +++ b/fips/dsa/fips_dssvs.c @@ -46,7 +46,8 @@ static int parse_mod(char *line, int *pdsa2, int *pL, int *pN, if (strcmp(keyword, "L")) return 0; *pL = atoi(value); - strcpy(line, p + 1); + strcpy(lbuf, p + 1); + strcpy(line, lbuf); if (pmd) p = strchr(line, ','); else diff --git a/fips/fips_canister.c b/fips/fips_canister.c index afe55a1bc7..016d94c976 100644 --- a/fips/fips_canister.c +++ b/fips/fips_canister.c @@ -32,6 +32,7 @@ const void *FIPS_text_end(void); defined(__i386__)|| defined(__i386))) || \ (defined(__vxworks) && (defined(__ppc__) || defined(__ppc) || \ defined(__mips__)|| defined(__mips))) || \ + (defined(__NetBSD__) && (defined(__powerpc__) || defined(__i386))) || \ (defined(__linux) && ((defined(__PPC__) && !defined(__PPC64__)) || \ defined(__arm__) || defined(__arm))) || \ (defined(__APPLE__) /* verified on all MacOS X & iOS flavors */)|| \ From fd9d2eaf160a48b39ee76a4fc3ab0b8f45cd0483 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 14 Oct 2012 12:24:24 +0000 Subject: [PATCH 083/120] CMAC reset fix (from HEAD) --- crypto/cmac/cmac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/cmac/cmac.c b/crypto/cmac/cmac.c index e6cade6120..ccc7e7a3bd 100644 --- a/crypto/cmac/cmac.c +++ b/crypto/cmac/cmac.c @@ -153,6 +153,8 @@ int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, return 0; if (!M_EVP_EncryptInit_ex(&ctx->cctx, NULL, NULL, NULL, zero_iv)) return 0; + memset(ctx->tbl, 0, M_EVP_CIPHER_CTX_block_size(&ctx->cctx)); + ctx->nlast_block = 0; return 1; } /* Initialiase context */ From 82607b291f2e1ebf31fde8956b9d6cfbee060d30 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 14 Oct 2012 12:26:02 +0000 Subject: [PATCH 084/120] optimize make_kn (from HEAD, by Andy) --- crypto/cmac/cmac.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/crypto/cmac/cmac.c b/crypto/cmac/cmac.c index ccc7e7a3bd..5ff0fa7028 100644 --- a/crypto/cmac/cmac.c +++ b/crypto/cmac/cmac.c @@ -77,19 +77,17 @@ struct CMAC_CTX_st /* Make temporary keys K1 and K2 */ -static void make_kn(unsigned char *k1, unsigned char *l, int bl) +static void make_kn(unsigned char *k1, const unsigned char *l, int bl) { int i; + unsigned char c = l[0], carry = c>>7, cnext; + /* Shift block to left, including carry */ - for (i = 0; i < bl; i++) - { - k1[i] = l[i] << 1; - if (i < bl - 1 && l[i + 1] & 0x80) - k1[i] |= 1; - } + for (i = 0; i < bl-1; i++, c = cnext) + k1[i] = (c << 1) | ((cnext=l[i+1]) >> 7); + /* If MSB set fixup with R */ - if (l[0] & 0x80) - k1[bl - 1] ^= bl == 16 ? 0x87 : 0x1b; + k1[i] = (c << 1) ^ ((0-carry)&(bl==16?0x87:0x1b)); } CMAC_CTX *CMAC_CTX_new(void) From 799602e489b92fdc0bdf4bc8f0920ef3707bf6f3 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 14 Oct 2012 12:29:25 +0000 Subject: [PATCH 085/120] gcm128.c: fix AAD-only case with AAD length not divisible by 16. PR: 2859 Submitted by: John Foley (backport from HEAD) --- crypto/modes/gcm128.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index 8dfeae5ed5..a52ffb1d22 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -1403,7 +1403,7 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag, void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; #endif - if (ctx->mres) + if (ctx->mres || ctx->ares) GCM_MUL(ctx,Xi); if (is_endian.little) { From aaf8b56fc8be572739104d79324e95adb30049f3 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 14 Oct 2012 12:30:12 +0000 Subject: [PATCH 086/120] sha1-armv4-large.pl: comply with ABI. (backport from HEAD) --- crypto/sha/asm/sha1-armv4-large.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl index fe8207f77f..33da3e0e3c 100644 --- a/crypto/sha/asm/sha1-armv4-large.pl +++ b/crypto/sha/asm/sha1-armv4-large.pl @@ -177,6 +177,7 @@ for($i=0;$i<5;$i++) { $code.=<<___; teq $Xi,sp bne .L_00_15 @ [((11+4)*5+2)*3] + sub sp,sp,#25*4 ___ &BODY_00_15(@V); unshift(@V,pop(@V)); &BODY_16_19(@V); unshift(@V,pop(@V)); @@ -186,7 +187,6 @@ ___ $code.=<<___; ldr $K,.LK_20_39 @ [+15+16*4] - sub sp,sp,#25*4 cmn sp,#0 @ [+3], clear carry to denote 20_39 .L_20_39_or_60_79: ___ From 933c9d00dad666cc1391560ff362e806070a37af Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 14 Oct 2012 12:38:58 +0000 Subject: [PATCH 087/120] reset ctx->num for CTR mode for FIPS EVP --- fips/utl/fips_enc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fips/utl/fips_enc.c b/fips/utl/fips_enc.c index 1358b1f4a4..13ac4ac9f5 100644 --- a/fips/utl/fips_enc.c +++ b/fips/utl/fips_enc.c @@ -208,6 +208,7 @@ int FIPS_cipherinit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, break; case EVP_CIPH_CTR_MODE: + ctx->num = 0; /* Don't reuse IV for CTR mode */ if(iv) memcpy(ctx->iv, iv, M_EVP_CIPHER_CTX_iv_length(ctx)); From b6c1d4b7f0e1ef546c28e1f8138957618a08c4e7 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 15 Oct 2012 11:28:59 +0000 Subject: [PATCH 088/120] e_aes.c: uninitialized variable in aes_ccm_init_key. PR: 2874 Submitted by: Tomas Mraz (backport from HEAD) --- crypto/evp/e_aes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index 71f9e037d5..d1357f7e38 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -1195,6 +1195,7 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f)vpaes_encrypt); + cctx->str = NULL; cctx->key_set = 1; break; } From add13802cf06e7b18ddb5889d755a8380b6fdce4 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 16 Oct 2012 22:47:00 +0000 Subject: [PATCH 089/120] Don't require tag before ciphertext in AESGCM mode --- crypto/evp/e_aes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index d1357f7e38..4066a00523 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -956,8 +956,6 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, if (!gctx->iv_set) return -1; - if (!ctx->encrypt && gctx->taglen < 0) - return -1; if (in) { if (out == NULL) @@ -999,6 +997,8 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, { if (!ctx->encrypt) { + if (gctx->taglen < 0) + return -1; if (CRYPTO_gcm128_finish(&gctx->gcm, ctx->buf, gctx->taglen) != 0) return -1; From 986b927fb393e19de3d3697d6f58e4d1f7a2b692 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 16 Oct 2012 22:47:44 +0000 Subject: [PATCH 090/120] aix[64]-cc: get MT support right (gcc targets are not affected). (backport from HEAD) --- Configure | 4 ++-- TABLE | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Configure b/Configure index 99b386b7c0..57e5899718 100755 --- a/Configure +++ b/Configure @@ -463,8 +463,8 @@ my %table=( "aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64", # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE # at build time. $OBJECT_MODE is respected at ./config stage! -"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-q32 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", -"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-q64 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64", +"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded -D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-q32 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", +"aix64-cc", "cc:-q64 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded -D_THREAD_SAFE:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-q64 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 64", # # Cray T90 and similar (SDSC) diff --git a/TABLE b/TABLE index bf974b56f4..cdc0bf1c98 100644 --- a/TABLE +++ b/TABLE @@ -862,7 +862,7 @@ $multilib = $cc = cc $cflags = -q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst $unistd = -$thread_cflag = -qthreaded +$thread_cflag = -qthreaded -D_THREAD_SAFE $sys_id = AIX $lflags = $bn_ops = BN_LLONG RC4_CHAR @@ -961,7 +961,7 @@ $multilib = $cc = cc $cflags = -q64 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst $unistd = -$thread_cflag = -qthreaded +$thread_cflag = -qthreaded -D_THREAD_SAFE $sys_id = AIX $lflags = $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR From 3b4f1f302dd1d3a8b71d82bf3900802945a0b431 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 19 Oct 2012 20:53:35 +0000 Subject: [PATCH 091/120] update DRBG to handle new file format --- fips/rand/fips_drbgvs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fips/rand/fips_drbgvs.c b/fips/rand/fips_drbgvs.c index 9aae88c3e1..214e3c340a 100644 --- a/fips/rand/fips_drbgvs.c +++ b/fips/rand/fips_drbgvs.c @@ -182,7 +182,7 @@ int main(int argc,char **argv) int r, nid = 0; int pr = 0; char buf[2048], lbuf[2048]; - unsigned char randout[2048]; + unsigned char *randout = NULL; char *keyword = NULL, *value = NULL; unsigned char *ent = NULL, *nonce = NULL, *pers = NULL, *adin = NULL; @@ -298,6 +298,8 @@ int main(int argc,char **argv) else exit(1); } + if (!strcmp(keyword, "[ReturnedBitsLen")) + randoutlen = atoi(value) / 8; if (!strcmp(keyword, "EntropyInput")) { @@ -327,7 +329,11 @@ int main(int argc,char **argv) FIPS_drbg_set_callbacks(dctx, test_entropy, 0, 0, test_nonce, 0); FIPS_drbg_set_app_data(dctx, &t); - randoutlen = (int)FIPS_drbg_get_blocklength(dctx); + if (randoutlen == 0) + randoutlen = (int)FIPS_drbg_get_blocklength(dctx); + if (randout) + OPENSSL_free(randout); + randout = OPENSSL_malloc(randoutlen); r = FIPS_drbg_instantiate(dctx, pers, perslen); if (!r) { @@ -406,6 +412,8 @@ int main(int argc,char **argv) } } + if (randout) + OPENSSL_free(randout); if (in && in != stdin) fclose(in); if (out && out != stdout) From 9abbb6aa89f16192bb1a10fd1216ee480e79ba76 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 29 Oct 2012 22:26:27 +0000 Subject: [PATCH 092/120] Cumulative updates from HEAD. --- e_os.h | 8 +++++--- fips/sha/Makefile | 3 ++- test/Makefile | 1 + util/fipslink.pl | 45 +++++++++++++++++++++++++++++---------------- util/mk1mf.pl | 6 +++--- util/pl/VC-32.pl | 19 ++++++++++++------- 6 files changed, 52 insertions(+), 30 deletions(-) diff --git a/e_os.h b/e_os.h index 6fec78d5ee..efe58fb97e 100644 --- a/e_os.h +++ b/e_os.h @@ -306,7 +306,7 @@ static unsigned int _strlen31(const char *str) # undef isupper # undef isxdigit # endif -# if defined(_MSC_VER) && !defined(_DLL) && defined(stdin) +# if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(_DLL) && defined(stdin) # if _MSC_VER>=1300 # undef stdin # undef stdout @@ -332,8 +332,10 @@ static unsigned int _strlen31(const char *str) # endif # endif # endif -# include -# include +# if !defined(OPENSSL_FIPSCANISTER) +# include +# include +# endif # ifdef OPENSSL_SYS_WINCE # define OPENSSL_NO_POSIX_IO diff --git a/fips/sha/Makefile b/fips/sha/Makefile index 9bc598301f..0878e7bf64 100644 --- a/fips/sha/Makefile +++ b/fips/sha/Makefile @@ -30,7 +30,8 @@ LIB=$(TOP)/libcrypto.a LIBSRC=fips_sha1_selftest.c LIBOBJ=fips_sha1_selftest.o -SRC= $(LIBSRC) fips_standalone_sha1.c +SRC= $(LIBSRC) +PROGS= fips_standalone_sha1.c EXHEADER= HEADER= diff --git a/test/Makefile b/test/Makefile index 2fcc78d46a..3f9770663b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -12,6 +12,7 @@ PERL= perl # KRB5 stuff KRB5_INCLUDES= LIBKRB5= +TEST= fips_algvs.c PEX_LIBS= EX_LIBS= #-lnsl -lsocket diff --git a/util/fipslink.pl b/util/fipslink.pl index 331c456878..0f87f7dbc9 100644 --- a/util/fipslink.pl +++ b/util/fipslink.pl @@ -27,33 +27,30 @@ if (exists $ENV{"PREMAIN_DSO_EXE"}) $fips_premain_dso = ""; } -my $fips_sig = $ENV{"FIPS_SIG"}; -if (defined $fips_sig) - { - if ($fips_premain_dso ne "") - { - $fips_premain_dso = "$fips_sig -dso"; - } - else - { - $fips_premain_dso = "$fips_sig -exe"; - } - } - check_hash($sha1_exe, "fips_premain.c"); check_hash($sha1_exe, "fipscanister.lib"); print "Integrity check OK\n"; -print "$fips_cc $fips_cc_args $fips_libdir/fips_premain.c\n"; -system "$fips_cc $fips_cc_args $fips_libdir/fips_premain.c"; -die "First stage Compile failure" if $? != 0; +if (is_premain_linked(@ARGV)) { + print "$fips_cc $fips_cc_args $fips_libdir/fips_premain.c\n"; + system "$fips_cc $fips_cc_args $fips_libdir/fips_premain.c"; + die "First stage Compile failure" if $? != 0; +} elsif (!defined($ENV{FIPS_SIG})) { + die "no fips_premain.obj linked"; +} print "$fips_link @ARGV\n"; system "$fips_link @ARGV"; die "First stage Link failure" if $? != 0; +if (defined($ENV{FIPS_SIG})) { + print "$ENV{FIPS_SIG} $fips_target\n"; + system "$ENV{FIPS_SIG} $fips_target"; + die "$ENV{FIPS_SIG} $fips_target failed" if $? != 0; + exit; +} print "$fips_premain_dso $fips_target\n"; system("$fips_premain_dso $fips_target >$fips_target.sha1"); @@ -74,6 +71,22 @@ print "$fips_link @ARGV\n"; system "$fips_link @ARGV"; die "Second stage Link failure" if $? != 0; +sub is_premain_linked + { + return 1 if (grep /fips_premain\.obj/,@_); + foreach (@_) + { + if (/^@(.*)/ && -f $1) + { + open FD,$1 or die "can't open $1"; + my $ret = (grep /fips_premain\.obj/,)?1:0; + close FD; + return $ret; + } + } + return 0; + } + sub check_hash { my ($sha1_exe, $filename) = @_; diff --git a/util/mk1mf.pl b/util/mk1mf.pl index 2325607e8f..8934ababa1 100755 --- a/util/mk1mf.pl +++ b/util/mk1mf.pl @@ -864,13 +864,13 @@ if ($fips) } $rules.=&cc_compile_target("\$(OBJ_D)${o}fips_standalone_sha1$obj", "fips${o}sha${o}fips_standalone_sha1.c", - "\$(SHLIB_CFLAGS)"); + "\$(APP_CFLAGS)"); $rules.=&cc_compile_target("\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj", "fips${o}fips_premain.c", - "-DFINGERPRINT_PREMAIN_DSO_LOAD \$(SHLIB_CFLAGS)"); + "-DFINGERPRINT_PREMAIN_DSO_LOAD \$(APP_CFLAGS)"); $rules.=&cc_compile_target("\$(OBJ_D)${o}fips_algvs$obj", "test${o}fips_algvs.c", - "\$(SHLIB_CFLAGS)"); + "\$(APP_CFLAGS)"); } foreach (values %lib_nam) diff --git a/util/pl/VC-32.pl b/util/pl/VC-32.pl index aef3de23ea..85299cccbc 100644 --- a/util/pl/VC-32.pl +++ b/util/pl/VC-32.pl @@ -49,8 +49,7 @@ if ($FLAVOR =~ /WIN64/) # considered safe to ignore. # $base_cflags= " $mf_cflag"; - my $f = $shlib?' /MD':' /MT'; - $lib_cflag='/Zl' if (!$shlib); # remove /DEFAULTLIBs from static lib + my $f = ($shlib and !$fipscanisterbuild)?' /MD':' /MT'; $opt_cflags=$f.' /Ox'; $dbg_cflags=$f.'d /Od -DDEBUG -D_DEBUG'; $lflags="/nologo /subsystem:console /opt:ref"; @@ -127,19 +126,24 @@ elsif ($FLAVOR =~ /CE/) $base_cflags.=" $wcecdefs"; $base_cflags.=' -I$(WCECOMPAT)/include' if (defined($ENV{'WCECOMPAT'})); $base_cflags.=' -I$(PORTSDK_LIBPATH)/../../include' if (defined($ENV{'PORTSDK_LIBPATH'})); - $opt_cflags=' /MC /O1i'; # optimize for space, but with intrinsics... - $dbg_clfags=' /MC /Od -DDEBUG -D_DEBUG'; + if (`cl 2>&1` =~ /Version 1[4-9]\./) { + $base_cflags.=($shlib and !$fipscanisterbuild)?' /MD':' /MT'; + } else { + $base_cflags.=' /MC'; + } + $opt_cflags=' /O1i'; # optimize for space, but with intrinsics... + $dbg_clfags=' /Od -DDEBUG -D_DEBUG'; $lflags="/nologo /opt:ref $wcelflag"; } else # Win32 { $base_cflags= " $mf_cflag"; - my $f = $shlib?' /MD':' /MT'; - $lib_cflag='/Zl' if (!$shlib); # remove /DEFAULTLIBs from static lib + my $f = ($shlib and !$fipscanisterbuild)?' /MD':' /MT'; $opt_cflags=$f.' /Ox /O2 /Ob2'; $dbg_cflags=$f.'d /Od -DDEBUG -D_DEBUG'; $lflags="/nologo /subsystem:console /opt:ref"; } +$lib_cflag='/Zl' if (!$shlib or $fipscanisterbuild); # remove /DEFAULTLIBs $mlflags=''; $out_def ="out32"; $out_def.="dll" if ($shlib); @@ -284,7 +288,8 @@ elsif ($shlib && $FLAVOR =~ /CE/) { $mlflags.=" $lflags /dll"; $lflags.=' /entry:mainCRTstartup' if(defined($ENV{'PORTSDK_LIBPATH'})); - $lib_cflag.=" -D_WINDLL -D_DLL"; + $lib_cflag.=" -D_WINDLL"; + $lib_cflag.=" -D_DLL" if (!$fipscanisterbuild); } sub do_lib_rule From 9f3f7ce9e8d58f56d254ff8a44fa56b5c4738de9 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 16 Dec 2012 19:42:44 +0000 Subject: [PATCH 093/120] VC-32.pl: fix typo [from HEAD]. Submitted by: Pierre Delaage --- util/pl/VC-32.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/pl/VC-32.pl b/util/pl/VC-32.pl index 85299cccbc..db9113fcbb 100644 --- a/util/pl/VC-32.pl +++ b/util/pl/VC-32.pl @@ -132,7 +132,7 @@ elsif ($FLAVOR =~ /CE/) $base_cflags.=' /MC'; } $opt_cflags=' /O1i'; # optimize for space, but with intrinsics... - $dbg_clfags=' /Od -DDEBUG -D_DEBUG'; + $dbg_cflags=' /Od -DDEBUG -D_DEBUG'; $lflags="/nologo /opt:ref $wcelflag"; } else # Win32 From b1adc971b446ce041a5aa0296bfca3759cdb9d3b Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Fri, 28 Dec 2012 20:19:10 +0000 Subject: [PATCH 094/120] Make DES3 and ECDSA self tests continue with remaining cases on failure. Make fips_test_suite induced failure work on every possible subtest instead of just categories of subtest. --- fips/des/fips_des_selftest.c | 6 +-- fips/ecdsa/fips_ecdsa_selftest.c | 8 ++-- fips/fips_test_suite.c | 82 ++++++++++++++------------------ 3 files changed, 42 insertions(+), 54 deletions(-) diff --git a/fips/des/fips_des_selftest.c b/fips/des/fips_des_selftest.c index a014f6f33f..fdf1eb6945 100644 --- a/fips/des/fips_des_selftest.c +++ b/fips/des/fips_des_selftest.c @@ -83,7 +83,7 @@ static const struct int FIPS_selftest_des() { - int n, ret = 0; + int n, ret = 1; EVP_CIPHER_CTX ctx; FIPS_cipher_ctx_init(&ctx); @@ -93,10 +93,8 @@ int FIPS_selftest_des() if (!fips_cipher_test(FIPS_TEST_CIPHER, &ctx, EVP_des_ede3_ecb(), tests3[n].key, NULL, tests3[n].plaintext, tests3[n].ciphertext, 8)) - goto err; + ret = 0; } - ret = 1; - err: FIPS_cipher_ctx_cleanup(&ctx); if (ret == 0) FIPSerr(FIPS_F_FIPS_SELFTEST_DES,FIPS_R_SELFTEST_FAILED); diff --git a/fips/ecdsa/fips_ecdsa_selftest.c b/fips/ecdsa/fips_ecdsa_selftest.c index 7d1007e19d..6ceb1c37b8 100644 --- a/fips/ecdsa/fips_ecdsa_selftest.c +++ b/fips/ecdsa/fips_ecdsa_selftest.c @@ -143,7 +143,7 @@ int FIPS_selftest_ecdsa() EC_KEY *ec = NULL; BIGNUM *x = NULL, *y = NULL, *d = NULL; EVP_PKEY pk; - int rv = 0; + int rv = 0, test_err = 0; size_t i; for (i = 0; i < sizeof(test_ec_data)/sizeof(EC_SELFTEST_DATA); i++) @@ -173,12 +173,12 @@ int FIPS_selftest_ecdsa() if (!fips_pkey_signature_test(FIPS_TEST_SIGNATURE, &pk, NULL, 0, NULL, 0, EVP_sha512(), 0, ecd->name)) - goto err; + test_err = 1; EC_KEY_free(ec); ec = NULL; } - - rv = 1; + if (test_err == 0) + rv = 1; err: diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index cf8f085e95..0eccc777fe 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -810,13 +810,15 @@ static const char *lookup_id(int id) static int fail_id = -1; static int fail_sub = -1; static int fail_key = -1; +static int sub_num = -1, sub_count = -1; +static int sub_fail_num = -1; static int st_err, post_quiet = 0; static int post_cb(int op, int id, int subid, void *ex) { const char *idstr, *exstr = ""; - char asctmp[20]; + char asctmp[20], teststr[80]; int keytype = -1; int exp_fail = 0; #ifdef FIPS_POST_TIME @@ -935,6 +937,16 @@ static int post_cb(int op, int id, int subid, void *ex) && (fail_sub == -1 || fail_sub == subid)) exp_fail = 1; + if (sub_num > 0) + { + if (sub_fail_num == sub_num) + exp_fail = 1; + sprintf(teststr, "\t\t%s %s (POST subtest #%d) test", + idstr, exstr, sub_num); + } + else + sprintf(teststr, "\t\t%s %s test", idstr, exstr); + switch(op) { case FIPS_POST_BEGIN: @@ -945,9 +957,16 @@ static int post_cb(int op, int id, int subid, void *ex) clock_gettime(CLOCK_REALTIME, &tstart); #endif printf("\tPOST started\n"); + sub_num = 1; break; case FIPS_POST_END: + if (sub_count == -1) + sub_count = sub_num; + else if (sub_num != sub_count) + printf("Inconsistent POST count %d != %d\n", + sub_num, sub_count); + sub_num = -1; printf("\tPOST %s\n", id ? "Success" : "Failed"); #ifdef FIPS_POST_TIME clock_gettime(CLOCK_REALTIME, &tend); @@ -959,21 +978,22 @@ static int post_cb(int op, int id, int subid, void *ex) case FIPS_POST_STARTED: if (!post_quiet && !exp_fail) - printf("\t\t%s %s test started\n", idstr, exstr); + printf("%s started\n", teststr); #ifdef FIPS_POST_TIME clock_gettime(CLOCK_REALTIME, &start); #endif break; case FIPS_POST_SUCCESS: + if (sub_num > 0) + sub_num++; if (exp_fail) { - printf("\t\t%s %s test OK but should've failed\n", - idstr, exstr); + printf("%s OK but should've failed\n", teststr); st_err++; } else if (!post_quiet) - printf("\t\t%s %s test OK\n", idstr, exstr); + printf("%s OK\n", teststr); #ifdef FIPS_POST_TIME clock_gettime(CLOCK_REALTIME, &end); printf("\t\t\tTook %f seconds\n", @@ -983,15 +1003,13 @@ static int post_cb(int op, int id, int subid, void *ex) break; case FIPS_POST_FAIL: + if (sub_num > 0) + sub_num++; if (exp_fail) - { - printf("\t\t%s %s test failed as expected\n", - idstr, exstr); - } + printf("%s failed as expected\n", teststr); else { - printf("\t\t%s %s test Failed Incorrectly!!\n", - idstr, exstr); + printf("%s Failed Incorrectly!!\n", teststr); st_err++; } break; @@ -999,7 +1017,7 @@ static int post_cb(int op, int id, int subid, void *ex) case FIPS_POST_CORRUPT: if (exp_fail) { - printf("\t\t%s %s test failure induced\n", idstr, exstr); + printf("%s failure induced\n", teststr); return 0; } break; @@ -1008,39 +1026,11 @@ static int post_cb(int op, int id, int subid, void *ex) return 1; } -/* Test POST induced failures */ - -typedef struct - { - const char *name; - int id, subid, keyid; - } fail_list; - -static fail_list flist[] = - { - {"Integrity", FIPS_TEST_INTEGRITY, -1, -1}, - {"AES", FIPS_TEST_CIPHER, NID_aes_128_ecb, -1}, - {"DES3", FIPS_TEST_CIPHER, NID_des_ede3_ecb, -1}, - {"AES-GCM", FIPS_TEST_GCM, -1, -1}, - {"AES-CCM", FIPS_TEST_CCM, -1, -1}, - {"AES-XTS", FIPS_TEST_XTS, -1, -1}, - {"Digest", FIPS_TEST_DIGEST, -1, -1}, - {"HMAC", FIPS_TEST_HMAC, -1, -1}, - {"CMAC", FIPS_TEST_CMAC, -1, -1}, - {"DRBG", FIPS_TEST_DRBG, -1, -1}, - {"X9.31 PRNG", FIPS_TEST_X931, -1, -1}, - {"RSA", FIPS_TEST_SIGNATURE, -1, EVP_PKEY_RSA}, - {"DSA", FIPS_TEST_SIGNATURE, -1, EVP_PKEY_DSA}, - {"ECDSA", FIPS_TEST_SIGNATURE, -1, EVP_PKEY_EC}, - {"ECDH", FIPS_TEST_ECDH, -1, -1}, - {NULL, -1, -1, -1} - }; - static int do_fail_all(int fullpost, int fullerr) { - fail_list *ftmp; int rv; size_t i; + int sub_fail; RSA *rsa = NULL; DSA *dsa = NULL; DRBG_CTX *dctx = NULL, *defctx = NULL; @@ -1052,12 +1042,11 @@ static int do_fail_all(int fullpost, int fullerr) if (!fullerr) no_err = 1; FIPS_module_mode_set(0, NULL); - for (ftmp = flist; ftmp->name; ftmp++) + for (sub_fail = 1; sub_fail < sub_count; sub_fail++) { - printf(" Testing induced failure of %s test\n", ftmp->name); - fail_id = ftmp->id; - fail_sub = ftmp->subid; - fail_key = ftmp->keyid; + sub_fail_num = sub_fail; + printf(" Testing induced failure of POST subtest %d\n", + sub_fail); rv = FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS); if (rv) { @@ -1065,6 +1054,7 @@ static int do_fail_all(int fullpost, int fullerr) st_err++; } } + sub_fail_num = -1; printf(" Testing induced failure of RSA keygen test\n"); /* NB POST will succeed with a pairwise test failures as * it is not used during POST. From 043c341366e8289dae143ad7a78d8f5d7fc18688 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Thu, 10 Jan 2013 23:29:59 +0000 Subject: [PATCH 095/120] Add .gitignore --- .gitignore | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..ad8728ec2a --- /dev/null +++ b/.gitignore @@ -0,0 +1,70 @@ +# Object files +*.o + +# Top level excludes +/Makefile.bak +/Makefile +/*.a +/include +/*.pc +/rehash.time + +# Most *.c files under test/ are symlinks +/test/*.c +# Apart from these +!/test/asn1test.c +!/test/methtest.c +!/test/dummytest.c +!/test/igetest.c +!/test/r160test.c +!/test/fips_algvs.c + +# Certificate symbolic links +*.0 + +# Links under apps +/apps/CA.pl +/apps/md4.c + + +# Auto generated headers +/crypto/buildinf.h +/crypto/opensslconf.h + +# Auto generated assembly language source files +*.s +!/crypto/bn/asm/pa-risc2.s +!/crypto/bn/asm/pa-risc2W.s + +# Executables +/apps/openssl +/test/sha256t +/test/sha512t +/test/*test +/test/fips_aesavs +/test/fips_desmovs +/test/fips_dhvs +/test/fips_drbgvs +/test/fips_dssvs +/test/fips_ecdhvs +/test/fips_ecdsavs +/test/fips_rngvs +/test/fips_test_suite +*.so* +*.dylib* +*.dll* +# Exceptions +!/test/bctest +!/crypto/des/times/486-50.sol + +# Misc auto generated files +/tools/c_rehash +/test/evptests.txt +lib +Makefile.save +*.bak +# FIPS module specific files. +/fips/fips_auth.h +/fips/fips_standalone_sha1 +/fips/fipscanister.o.sha1 + From 950e2889e1133134021fc76c779919372e1ba3a9 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 16 Jan 2013 14:20:01 +0000 Subject: [PATCH 096/120] Now GMAC is fixed remove workaround. --- fips/aes/fips_gcmtest.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fips/aes/fips_gcmtest.c b/fips/aes/fips_gcmtest.c index 4000e0763f..30e4bcc0f4 100644 --- a/fips/aes/fips_gcmtest.c +++ b/fips/aes/fips_gcmtest.c @@ -208,8 +208,6 @@ static void gcmtest(FILE *in, FILE *out, int encrypt) ct = OPENSSL_malloc(ptlen); rv = FIPS_cipher(&ctx, ct, pt, ptlen); } - else - FIPS_cipher(&ctx, iv, iv, 0); FIPS_cipher(&ctx, NULL, NULL, 0); FIPS_cipher_ctx_ctrl(&ctx, EVP_CTRL_GCM_GET_TAG, taglen, tag); @@ -244,8 +242,6 @@ static void gcmtest(FILE *in, FILE *out, int encrypt) pt = OPENSSL_malloc(ptlen); rv = FIPS_cipher(&ctx, pt, ct, ptlen); } - else - FIPS_cipher(&ctx, iv, iv, 0); rv = FIPS_cipher(&ctx, NULL, NULL, 0); if (rv < 0) fprintf(out, "FAIL" RESP_EOL); From 6fb0806b01d189301b6be8a8ab315019dc51541c Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Wed, 23 Jan 2013 02:57:36 +0000 Subject: [PATCH 097/120] Add verbose option to fips_test_suite to give additional details of all operations. Add ecdsa test. Test crypto operations are inhibited on test failures. Test on demand POST. --- fips/fips_test_suite.c | 420 ++++++++++++++++++++++++++++++++++------- 1 file changed, 356 insertions(+), 64 deletions(-) diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index 0eccc777fe..c7054db742 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -40,12 +40,46 @@ int main(int argc, char *argv[]) #include #include +#include #include #include #include #include "fips_utl.h" +static int verbose = 0; + +static int fips_module_mode_set_verbose(int mode, const char *pass) + { + int rv; + if (verbose) + printf("Attempting to %s FIPS mode\n", mode ? "Enter" : "Leave"); + rv = FIPS_module_mode_set(mode, pass); + if (verbose) + printf("FIPS_module_mode() returned %d\n", FIPS_module_mode()); + return rv; + } + +static void do_print_rsa_key(RSA *rsa) + { + if (!verbose) + return; + do_bn_print_name(stdout, "RSA key modulus value", rsa->e); + do_bn_print_name(stdout, "RSA key publicExponent value", rsa->n); + do_bn_print_name(stdout, "RSA key pricateExponent value", rsa->d); + do_bn_print_name(stdout, "RSA key prime1 value", rsa->p); + do_bn_print_name(stdout, "RSA key prime2 value", rsa->q); + do_bn_print_name(stdout, "RSA key exponent1 value", rsa->dmp1); + do_bn_print_name(stdout, "RSA key exponent2 value", rsa->dmq1); + do_bn_print_name(stdout, "RSA key coefficient value", rsa->iqmp); + } + +static void do_print_buf(char *name, unsigned char *buf, int buflen) + { + if (verbose) + OutputValue(name, buf, buflen, stdout, 0); + } + /* AES: encrypt and decrypt known plaintext, verify result matches original plaintext */ static int FIPS_aes_test(void) @@ -57,14 +91,30 @@ static int FIPS_aes_test(void) unsigned char plaintext[16] = "etaonrishdlcu"; EVP_CIPHER_CTX ctx; FIPS_cipher_ctx_init(&ctx); + if (verbose) + { + do_print_buf("Key", key, sizeof(key)); + do_print_buf("Plaintext", plaintext, sizeof(plaintext)); + } if (FIPS_cipherinit(&ctx, EVP_aes_128_ecb(), key, NULL, 1) <= 0) goto err; FIPS_cipher(&ctx, citmp, plaintext, 16); + if (verbose) + { + do_print_buf("Ciphertext", citmp, sizeof(plaintext)); + printf("AES 128 bit ECB mode decryption started\n"); + } if (FIPS_cipherinit(&ctx, EVP_aes_128_ecb(), key, NULL, 0) <= 0) goto err; FIPS_cipher(&ctx, pltmp, citmp, 16); + do_print_buf("Recovered Plaintext", pltmp, sizeof(plaintext)); if (memcmp(pltmp, plaintext, 16)) + { + printf("Comparison failure!!\n"); goto err; + } + if (verbose) + printf("Comparison success.\n"); ret = 1; err: FIPS_cipher_ctx_cleanup(&ctx); @@ -83,6 +133,13 @@ static int FIPS_aes_gcm_test(void) unsigned char plaintext[16] = "etaonrishdlcu"; EVP_CIPHER_CTX ctx; FIPS_cipher_ctx_init(&ctx); + if (verbose) + { + do_print_buf("Key", key, sizeof(key)); + do_print_buf("IV", key, sizeof(iv)); + do_print_buf("Plaintext", plaintext, sizeof(plaintext)); + do_print_buf("AAD", aad, sizeof(aad)); + } if (FIPS_cipherinit(&ctx, EVP_aes_128_gcm(), key, iv, 1) <= 0) goto err; FIPS_cipher(&ctx, NULL, aad, sizeof(aad)); @@ -91,6 +148,12 @@ static int FIPS_aes_gcm_test(void) if (!FIPS_cipher_ctx_ctrl(&ctx, EVP_CTRL_GCM_GET_TAG, 16, tagtmp)) goto err; + if (verbose) + { + do_print_buf("Ciphertext", citmp, sizeof(citmp)); + do_print_buf("Tag", tagtmp, sizeof(tagtmp)); + } + if (FIPS_cipherinit(&ctx, EVP_aes_128_gcm(), key, iv, 0) <= 0) goto err; if (!FIPS_cipher_ctx_ctrl(&ctx, EVP_CTRL_GCM_SET_TAG, 16, tagtmp)) @@ -103,8 +166,17 @@ static int FIPS_aes_gcm_test(void) if (FIPS_cipher(&ctx, NULL, NULL, 0) < 0) goto err; + if (verbose) + do_print_buf("Recovered Plaintext", pltmp, sizeof(plaintext)); + if (memcmp(pltmp, plaintext, 16)) + { + if (verbose) + printf("Comparison failure!!\n"); goto err; + } + + printf("Comparison sucess.\n"); ret = 1; err: @@ -122,20 +194,110 @@ static int FIPS_des3_test(void) unsigned char plaintext[] = { 'e', 't', 'a', 'o', 'n', 'r', 'i', 's' }; EVP_CIPHER_CTX ctx; FIPS_cipher_ctx_init(&ctx); + if (verbose) + { + do_print_buf("Key", key, sizeof(key)); + do_print_buf("Plaintext", plaintext, sizeof(plaintext)); + } if (FIPS_cipherinit(&ctx, EVP_des_ede3_ecb(), key, NULL, 1) <= 0) goto err; FIPS_cipher(&ctx, citmp, plaintext, 8); + if (verbose) + { + do_print_buf("Ciphertext", citmp, sizeof(plaintext)); + printf("DES3 ECB mode decryption\n"); + } if (FIPS_cipherinit(&ctx, EVP_des_ede3_ecb(), key, NULL, 0) <= 0) goto err; FIPS_cipher(&ctx, pltmp, citmp, 8); + if (verbose) + do_print_buf("Recovered Plaintext", pltmp, sizeof(plaintext)); if (memcmp(pltmp, plaintext, 8)) + { + if (verbose) + printf("Comparison failure!!\n"); + goto err; + } + if (verbose) + printf("Comparison success\n"); ret = 1; err: FIPS_cipher_ctx_cleanup(&ctx); return ret; } +/* + * ECDSA: generate keys and sign, verify input plaintext. + */ +static int FIPS_ecdsa_test(void) + { + EC_KEY *ec = NULL; + unsigned char dgst[] = "etaonrishdlc"; + int r = 0; + ECDSA_SIG *sig = NULL; + + ERR_clear_error(); + ec = FIPS_ec_key_new_by_curve_name(NID_X9_62_prime256v1); + if (!ec) + goto end; + if (!FIPS_ec_key_generate_key(ec)) + goto end; + + if (verbose) + { + BIGNUM *Qx, *Qy; + BN_CTX *ctx; + const EC_GROUP *grp; + const EC_POINT *pt; + const BIGNUM *priv; + Qx = BN_new(); + Qy = BN_new(); + ctx = BN_CTX_new(); + grp = EC_KEY_get0_group(ec); + pt = EC_KEY_get0_public_key(ec); + priv = EC_KEY_get0_private_key(ec); + printf("EC Key using P-256\n"); + if (!EC_POINT_get_affine_coordinates_GFp(grp, pt, Qx, Qy, ctx)) + goto end; + + do_bn_print_name(stdout, "ECDSA key x coordinate", Qx); + do_bn_print_name(stdout, "ECDSA key y coordinate", Qy); + do_bn_print_name(stdout, "ECDSA key private value", priv); + BN_free(Qx); + BN_free(Qy); + BN_CTX_free(ctx); + printf("Signing string \"%s\" using SHA256\n", dgst); + } + + sig = FIPS_ecdsa_sign(ec, dgst, sizeof(dgst) -1, EVP_sha256()); + if (!sig) + { + if (verbose) + printf("Signing Failed!!\n"); + goto end; + } + + if (verbose) + { + printf("Signing successful\n"); + do_bn_print_name(stdout, "ECDSA signature r value", sig->r); + do_bn_print_name(stdout, "ECDSA signature s value", sig->s); + } + + r = FIPS_ecdsa_verify(ec, dgst, sizeof(dgst) -1, EVP_sha256(), sig); + if (verbose) + printf("ECDSA verification %s\n", r ? "Successful." : "Failed!!"); + end: + if (sig) + FIPS_ecdsa_sig_free(sig); + if (ec) + FIPS_ec_key_free(ec); + if (r != 1) + return 0; + return 1; + } + /* * DSA: generate keys and sign, verify input plaintext. */ @@ -157,11 +319,34 @@ static int FIPS_dsa_test(int bad) if (bad) BN_add_word(dsa->pub_key, 1); + if (verbose) + { + do_bn_print_name(stdout, "DSA key p value", dsa->p); + do_bn_print_name(stdout, "DSA key q value", dsa->q); + do_bn_print_name(stdout, "DSA key g value", dsa->g); + do_bn_print_name(stdout, "DSA key public_key value", dsa->pub_key); + do_bn_print_name(stdout, "DSA key private key value", dsa->priv_key); + printf("Signing string \"%s\" using SHA256\n", dgst); + } + sig = FIPS_dsa_sign(dsa, dgst, sizeof(dgst) -1, EVP_sha256()); if (!sig) + { + if (verbose) + printf("Signing Failed!!\n"); goto end; + } + + if (verbose) + { + printf("Signing successful\n"); + do_bn_print_name(stdout, "DSA signature r value", sig->r); + do_bn_print_name(stdout, "DSA signature s value", sig->s); + } r = FIPS_dsa_verify(dsa, dgst, sizeof(dgst) -1, EVP_sha256(), sig); + if (verbose) + printf("DSA verification %s\n", r ? "Successful." : "Failed!!"); end: if (sig) FIPS_dsa_sig_free(sig); @@ -196,12 +381,30 @@ static int FIPS_rsa_test(int bad) if (bad) BN_add_word(key->n, 1); + if (verbose) + { + do_print_rsa_key(key); + printf("Signing string \"%s\" using SHA256\n", input_ptext); + } + if (!FIPS_rsa_sign(key, input_ptext, sizeof(input_ptext) - 1, EVP_sha256(), RSA_PKCS1_PADDING, 0, NULL, buf, &slen)) + { + if (verbose) + printf("RSA Signing failed!!\n"); goto end; + } + + if (verbose) + { + printf("RSA signing successul\n"); + do_print_buf("RSA signature", buf, slen); + } r = FIPS_rsa_verify(key, input_ptext, sizeof(input_ptext) - 1, EVP_sha256(), RSA_PKCS1_PADDING, 0, NULL, buf, slen); + if (verbose) + printf("RSA Verification %s\n", r == 1 ? "Successful" : "Failed!!"); end: if (key) FIPS_rsa_free(key); @@ -223,6 +426,11 @@ static int FIPS_sha1_test() ERR_clear_error(); if (!FIPS_digest(str,sizeof(str) - 1,md, NULL, EVP_sha1())) return 0; + if (verbose) + { + printf("Digesting string %s\n", str); + do_print_buf("Digest value", md, sizeof(md)); + } if (memcmp(md,digest,sizeof(md))) return 0; return 1; @@ -242,6 +450,11 @@ static int FIPS_sha256_test() ERR_clear_error(); if (!FIPS_digest(str,sizeof(str) - 1,md, NULL, EVP_sha256())) return 0; + if (verbose) + { + printf("Digesting string %s\n", str); + do_print_buf("Digest value", md, sizeof(md)); + } if (memcmp(md,digest,sizeof(md))) return 0; return 1; @@ -263,6 +476,11 @@ static int FIPS_sha512_test() ERR_clear_error(); if (!FIPS_digest(str,sizeof(str) - 1,md, NULL, EVP_sha512())) return 0; + if (verbose) + { + printf("Digesting string %s\n", str); + do_print_buf("Digest value", md, sizeof(md)); + } if (memcmp(md,digest,sizeof(md))) return 0; return 1; @@ -284,8 +502,19 @@ static int FIPS_hmac_sha1_test() ERR_clear_error(); if (!HMAC(EVP_sha1(),key,sizeof(key)-1,iv,sizeof(iv)-1,out,&outlen)) return 0; + if (verbose) + { + do_print_buf("HMAC key", key, sizeof(key) -1); + do_print_buf("HMAC input", iv, sizeof(iv) -1); + do_print_buf("HMAC output", out, outlen); + } if (memcmp(out,kaval,outlen)) + { + if (verbose) + printf("HMAC comparison failed!!\n"); return 0; + } + printf("HMAC comparison successful.\n"); return 1; } @@ -305,6 +534,19 @@ static int FIPS_hmac_sha224_test() ERR_clear_error(); if (!HMAC(EVP_sha224(),key,sizeof(key)-1,iv,sizeof(iv)-1,out,&outlen)) return 0; + if (verbose) + { + do_print_buf("HMAC key", key, sizeof(key) -1); + do_print_buf("HMAC input", iv, sizeof(iv) -1); + do_print_buf("HMAC output", out, outlen); + } + if (memcmp(out,kaval,outlen)) + { + if (verbose) + printf("HMAC comparison failed!!\n"); + return 0; + } + printf("HMAC comparison successful.\n"); if (memcmp(out,kaval,outlen)) return 0; return 1; @@ -326,8 +568,19 @@ static int FIPS_hmac_sha256_test() ERR_clear_error(); if (!HMAC(EVP_sha256(),key,sizeof(key)-1,iv,sizeof(iv)-1,out,&outlen)) return 0; + if (verbose) + { + do_print_buf("HMAC key", key, sizeof(key) -1); + do_print_buf("HMAC input", iv, sizeof(iv) -1); + do_print_buf("HMAC output", out, outlen); + } if (memcmp(out,kaval,outlen)) + { + if (verbose) + printf("HMAC comparison failed!!\n"); return 0; + } + printf("HMAC comparison successful.\n"); return 1; } @@ -348,8 +601,19 @@ static int FIPS_hmac_sha384_test() ERR_clear_error(); if (!HMAC(EVP_sha384(),key,sizeof(key)-1,iv,sizeof(iv)-1,out,&outlen)) return 0; + if (verbose) + { + do_print_buf("HMAC key", key, sizeof(key) -1); + do_print_buf("HMAC input", iv, sizeof(iv) -1); + do_print_buf("HMAC output", out, outlen); + } if (memcmp(out,kaval,outlen)) + { + if (verbose) + printf("HMAC comparison failed!!\n"); return 0; + } + printf("HMAC comparison successful.\n"); return 1; } @@ -371,8 +635,19 @@ static int FIPS_hmac_sha512_test() ERR_clear_error(); if (!HMAC(EVP_sha512(),key,sizeof(key)-1,iv,sizeof(iv)-1,out,&outlen)) return 0; + if (verbose) + { + do_print_buf("HMAC key", key, sizeof(key) -1); + do_print_buf("HMAC input", iv, sizeof(iv) -1); + do_print_buf("HMAC output", out, outlen); + } if (memcmp(out,kaval,outlen)) + { + if (verbose) + printf("HMAC comparison failed!!\n"); return 0; + } + printf("HMAC comparison successful.\n"); return 1; } @@ -407,18 +682,15 @@ static int FIPS_cmac_aes128_test() out = OPENSSL_malloc(outlen); if (!CMAC_Final(ctx, out, &outlen)) goto end; -#if 0 - { - char *hexout = OPENSSL_malloc(outlen * 2 + 1); - bin2hex(out, outlen, hexout); - printf("CMAC-AES128: res = %s\n", hexout); - OPENSSL_free(hexout); - } - r = 1; -#else if (!memcmp(out,kaval,outlen)) r = 1; -#endif + if (verbose) + { + do_print_buf("CMAC key", key, sizeof(key)); + do_print_buf("CMAC input", data, sizeof(data) -1); + do_print_buf("CMAC output", out, outlen); + printf("CMAC comparison %s\n", r == 1 ? "successful." : "Failed!!"); + } end: CMAC_CTX_free(ctx); if (out) @@ -458,18 +730,15 @@ static int FIPS_cmac_aes192_test() out = OPENSSL_malloc(outlen); if (!CMAC_Final(ctx, out, &outlen)) goto end; -#if 0 - { - char *hexout = OPENSSL_malloc(outlen * 2 + 1); - bin2hex(out, outlen, hexout); - printf("CMAC-AES192: res = %s\n", hexout); - OPENSSL_free(hexout); - } - r = 1; -#else if (!memcmp(out,kaval,outlen)) r = 1; -#endif + if (verbose) + { + do_print_buf("CMAC key", key, sizeof(key)); + do_print_buf("CMAC input", data, sizeof(data) -1); + do_print_buf("CMAC output", out, outlen); + printf("CMAC comparison %s\n", r == 1 ? "successful." : "Failed!!"); + } end: CMAC_CTX_free(ctx); if (out) @@ -510,18 +779,15 @@ static int FIPS_cmac_aes256_test() out = OPENSSL_malloc(outlen); if (!CMAC_Final(ctx, out, &outlen)) goto end; -#if 0 - { - char *hexout = OPENSSL_malloc(outlen * 2 + 1); - bin2hex(out, outlen, hexout); - printf("CMAC-AES256: res = %s\n", hexout); - OPENSSL_free(hexout); - } - r = 1; -#else if (!memcmp(out,kaval,outlen)) r = 1; -#endif + if (verbose) + { + do_print_buf("CMAC key", key, sizeof(key)); + do_print_buf("CMAC input", data, sizeof(data) -1); + do_print_buf("CMAC output", out, outlen); + printf("CMAC comparison %s\n", r == 1 ? "successful." : "Failed!!"); + } end: CMAC_CTX_free(ctx); if (out) @@ -560,18 +826,15 @@ static int FIPS_cmac_tdea3_test() out = OPENSSL_malloc(outlen); if (!CMAC_Final(ctx, out, &outlen)) goto end; -#if 0 - { - char *hexout = OPENSSL_malloc(outlen * 2 + 1); - bin2hex(out, outlen, hexout); - printf("CMAC-TDEA3: res = %s\n", hexout); - OPENSSL_free(hexout); - } - r = 1; -#else if (!memcmp(out,kaval,outlen)) r = 1; -#endif + if (verbose) + { + do_print_buf("CMAC key", key, sizeof(key)); + do_print_buf("CMAC input", data, sizeof(data) -1); + do_print_buf("CMAC output", out, outlen); + printf("CMAC comparison %s\n", r == 1 ? "successful." : "Failed!!"); + } end: CMAC_CTX_free(ctx); if (out) @@ -627,7 +890,11 @@ static int Zeroize() for(i = 0; i < sizeof(userkey); i++) printf("%02x", userkey[i]); printf("\n"); RAND_bytes(userkey, sizeof userkey); - printf("\tchar buffer key after overwriting: \n\t\t"); + printf("\tchar buffer key after overwriting with random key: \n\t\t"); + for(i = 0; i < sizeof(userkey); i++) printf("%02x", userkey[i]); + printf("\n"); + OPENSSL_cleanse(userkey, sizeof(userkey)); + printf("\tchar buffer key after zeroization: \n\t\t"); for(i = 0; i < sizeof(userkey); i++) printf("%02x", userkey[i]); printf("\n"); @@ -747,9 +1014,13 @@ static const char * Fail(const char *msg) return msg; } -static void test_msg(const char *msg, int result) - { - printf("%s...%s\n", msg, result ? "successful" : Fail("Failed!")); +#define test_msg(msg, rtest) \ + { \ + int rv; \ + if (verbose) \ + printf("%s...started\n", msg); \ + rv = rtest; \ + printf("%s...%s\n", msg, rv ? "successful" : Fail("Failed!")); \ } /* Table of IDs for POST translating between NIDs and names */ @@ -1036,23 +1307,36 @@ static int do_fail_all(int fullpost, int fullerr) DRBG_CTX *dctx = NULL, *defctx = NULL; EC_KEY *ec = NULL; BIGNUM *bn = NULL; + unsigned char key[16] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}; + EVP_CIPHER_CTX ctx; unsigned char out[10]; if (!fullpost) post_quiet = 1; if (!fullerr) no_err = 1; - FIPS_module_mode_set(0, NULL); + fips_module_mode_set_verbose(0, NULL); for (sub_fail = 1; sub_fail < sub_count; sub_fail++) { sub_fail_num = sub_fail; printf(" Testing induced failure of POST subtest %d\n", sub_fail); - rv = FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS); + rv = fips_module_mode_set_verbose(1, FIPS_AUTH_USER_PASS); if (rv) { printf("\tFIPS mode incorrectly successful!!\n"); st_err++; } + printf("\tAttempting crypto operation after failed POST... "); + FIPS_cipher_ctx_init(&ctx); + rv = FIPS_cipherinit(&ctx, EVP_aes_128_ecb(), key, NULL, 1); + if (rv > 0) + { + printf("succeeded incorrectly!!\n"); + st_err++; + } + else + printf("failed as expected.\n"); + FIPS_cipher_ctx_cleanup(&ctx); } sub_fail_num = -1; printf(" Testing induced failure of RSA keygen test\n"); @@ -1062,7 +1346,7 @@ static int do_fail_all(int fullpost, int fullerr) fail_id = FIPS_TEST_PAIRWISE; fail_key = EVP_PKEY_RSA; /* Now enter FIPS mode successfully */ - if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + if (!fips_module_mode_set_verbose(1, FIPS_AUTH_USER_PASS)) { printf("\tError entering FIPS mode\n"); st_err++; @@ -1082,12 +1366,12 @@ static int do_fail_all(int fullpost, int fullerr) printf("\tRSA key generation failed as expected.\n"); /* Leave FIPS mode to clear error */ - FIPS_module_mode_set(0, NULL); + fips_module_mode_set_verbose(0, NULL); printf(" Testing induced failure of DSA keygen test\n"); fail_key = EVP_PKEY_DSA; /* Enter FIPS mode successfully */ - if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + if (!fips_module_mode_set_verbose(1, FIPS_AUTH_USER_PASS)) { printf("\tError entering FIPS mode\n"); st_err++; @@ -1106,9 +1390,9 @@ static int do_fail_all(int fullpost, int fullerr) printf("\tDSA key generation failed as expected.\n"); /* Leave FIPS mode to clear error */ - FIPS_module_mode_set(0, NULL); + fips_module_mode_set_verbose(0, NULL); /* Enter FIPS mode successfully */ - if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + if (!fips_module_mode_set_verbose(1, FIPS_AUTH_USER_PASS)) { printf("\tError entering FIPS mode\n"); st_err++; @@ -1137,9 +1421,9 @@ static int do_fail_all(int fullpost, int fullerr) fail_sub = -1; fail_key = -1; /* Leave FIPS mode to clear error */ - FIPS_module_mode_set(0, NULL); + fips_module_mode_set_verbose(0, NULL); /* Enter FIPS mode successfully */ - if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + if (!fips_module_mode_set_verbose(1, FIPS_AUTH_USER_PASS)) { printf("\tError entering FIPS mode\n"); st_err++; @@ -1172,9 +1456,9 @@ static int do_fail_all(int fullpost, int fullerr) FIPS_drbg_stick(0); /* Leave FIPS mode to clear error */ - FIPS_module_mode_set(0, NULL); + fips_module_mode_set_verbose(0, NULL); /* Enter FIPS mode successfully */ - if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + if (!fips_module_mode_set_verbose(1, FIPS_AUTH_USER_PASS)) { printf("\tError entering FIPS mode\n"); st_err++; @@ -1202,9 +1486,9 @@ static int do_fail_all(int fullpost, int fullerr) else printf("\tDRBG continuous PRNG entropy failed as expected\n"); /* Leave FIPS mode to clear error */ - FIPS_module_mode_set(0, NULL); + fips_module_mode_set_verbose(0, NULL); /* Enter FIPS mode successfully */ - if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + if (!fips_module_mode_set_verbose(1, FIPS_AUTH_USER_PASS)) { printf("\tError entering FIPS mode\n"); st_err++; @@ -1212,9 +1496,9 @@ static int do_fail_all(int fullpost, int fullerr) FIPS_drbg_free(dctx); /* Leave FIPS mode to clear error */ - FIPS_module_mode_set(0, NULL); + fips_module_mode_set_verbose(0, NULL); /* Enter FIPS mode successfully */ - if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + if (!fips_module_mode_set_verbose(1, FIPS_AUTH_USER_PASS)) { printf("\tError entering FIPS mode\n"); st_err++; @@ -1242,9 +1526,9 @@ static int do_fail_all(int fullpost, int fullerr) FIPS_x931_stick(0); /* Leave FIPS mode to clear error */ - FIPS_module_mode_set(0, NULL); + fips_module_mode_set_verbose(0, NULL); /* Enter FIPS mode successfully */ - if (!FIPS_module_mode_set(1, FIPS_AUTH_USER_PASS)) + if (!fips_module_mode_set_verbose(1, FIPS_AUTH_USER_PASS)) { printf("\tError entering FIPS mode\n"); st_err++; @@ -1416,6 +1700,9 @@ int main(int argc, char **argv) } else if (!strcmp(*args, "fullerr")) { fullerr = 1; no_exit = 1; + } else if (!strcmp(*args, "verbose")) { + verbose = 1; + no_exit = 1; } else { printf("Bad argument \"%s\"\n", *args); return 1; @@ -1425,7 +1712,7 @@ int main(int argc, char **argv) if ((argc != 1) && !no_exit) { fips_algtest_init_nofips(); - if (!FIPS_module_mode_set(1, pass)) { + if (!fips_module_mode_set_verbose(1, pass)) { printf("Power-up self test failed\n"); return 1; } @@ -1446,7 +1733,7 @@ int main(int argc, char **argv) /* Power-up self test */ ERR_clear_error(); - test_msg("2. Automatic power-up self test", FIPS_module_mode_set(1, pass)); + test_msg("2a. Automatic power-up self test", fips_module_mode_set_verbose(1, pass)); if (!FIPS_module_mode()) return 1; if (do_drbg_stick) @@ -1454,6 +1741,8 @@ int main(int argc, char **argv) if (do_rng_stick) FIPS_x931_stick(1); + test_msg("2b. On demand self test", FIPS_selftest()); + /* AES encryption/decryption */ test_msg("3a. AES encryption/decryption", FIPS_aes_test()); @@ -1554,7 +1843,10 @@ int main(int argc, char **argv) printf("\t%s\n", do_drbg_all() ? "successful as expected" : Fail("failed INCORRECTLY!") ); - printf("13. Induced test failure check...\n"); + test_msg("13. ECDSA key generation and signature validation", + FIPS_ecdsa_test()); + + printf("14. Induced test failure check...\n"); printf("\t%s\n", do_fail_all(fullpost, fullerr) ? "successful as expected" : Fail("failed INCORRECTLY!") ); printf("\nAll tests completed with %d errors\n", Error); From 493119b1a81ccd447a88654cbc95aaab73289d85 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 2 Apr 2014 21:48:56 +0200 Subject: [PATCH 098/120] cryptlib.c: fix typo in WIN32 version of OPENSSL_showfatal. --- crypto/cryptlib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 118fca1ee9..cf96011cc5 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -297,7 +297,7 @@ void OPENSSL_showfatal (const char *fmta,...) DWORD out; va_start (ap,fmta); - len=_vsnprintf((char *)buf,sizeof(buf),fmt,ap); + len=_vsnprintf((char *)buf,sizeof(buf),fmta,ap); WriteFile(h,buf,len<0?sizeof(buf):(DWORD)len,&out,NULL); va_end (ap); return; From 114216bca07d6bc03530a8352993ae3f0f1e9b06 Mon Sep 17 00:00:00 2001 From: Alan Hryngle Date: Sat, 5 Jul 2014 22:24:03 +0100 Subject: [PATCH 099/120] Check return smaller of ret and f. PR#3418. (cherry picked from commit d4909f9a8dbbda9c5d140476b34a8f80b02b51f3) --- crypto/rsa/rsa_eay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/rsa/rsa_eay.c b/crypto/rsa/rsa_eay.c index 16f000ff48..64c23f7cdb 100644 --- a/crypto/rsa/rsa_eay.c +++ b/crypto/rsa/rsa_eay.c @@ -494,7 +494,7 @@ static int RSA_eay_private_encrypt(int flen, const unsigned char *from, if (padding == RSA_X931_PADDING) { BN_sub(f, rsa->n, ret); - if (BN_cmp(ret, f)) + if (BN_cmp(ret, f) > 0) res = f; else res = ret; From 6ea511211ce190cc336e94ccde7723fadf0b5a07 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 5 Jul 2014 22:32:39 +0100 Subject: [PATCH 100/120] Only cleanse sbuf if it is not NULL. PR#2339 --- fips/rsa/fips_rsa_sign.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fips/rsa/fips_rsa_sign.c b/fips/rsa/fips_rsa_sign.c index a4e03e7417..4956971f04 100644 --- a/fips/rsa/fips_rsa_sign.c +++ b/fips/rsa/fips_rsa_sign.c @@ -288,8 +288,11 @@ int FIPS_rsa_sign_digest(RSA *rsa, const unsigned char *md, int md_len, *siglen=j; } psserr: - OPENSSL_cleanse(sbuf, i); - OPENSSL_free(sbuf); + if (sbuf) + { + OPENSSL_cleanse(sbuf, i); + OPENSSL_free(sbuf); + } return ret; } From 551ed53b2a67581f491fa729a5a5f21c1fa67323 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 29 Jun 2014 22:01:28 +0100 Subject: [PATCH 101/120] Fix copy for CCM, GCM and XTS. Internal pointers in CCM, GCM and XTS contexts should either be NULL or set to point to the appropriate key schedule. This needs to be adjusted when copying contexts. Combination of 2 commits: 370bf1d708e6d7af42e1752fb078d0822c9bc73d c2fd5d79ffc4fc9d120a0faad579ce96473e6a2f --- crypto/evp/e_aes.c | 61 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index 4066a00523..56e5fffc7b 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -795,6 +795,28 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) /* Extra padding: tag appended to record */ return EVP_GCM_TLS_TAG_LEN; + case EVP_CTRL_COPY: + { + EVP_CIPHER_CTX *out = ptr; + EVP_AES_GCM_CTX *gctx_out = out->cipher_data; + if (gctx->gcm.key) + { + if (gctx->gcm.key != &gctx->ks) + return 0; + gctx_out->gcm.key = &gctx_out->ks; + } + if (gctx->iv == c->iv) + gctx_out->iv = out->iv; + else + { + gctx_out->iv = OPENSSL_malloc(gctx->ivlen); + if (!gctx_out->iv) + return 0; + memcpy(gctx_out->iv, gctx->iv, gctx->ivlen); + } + return 1; + } + default: return -1; @@ -1016,7 +1038,8 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, #define CUSTOM_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 \ | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \ - | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT) + | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT \ + | EVP_CIPH_CUSTOM_COPY) BLOCK_CIPHER_custom(NID_aes,128,1,12,gcm,GCM, EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_AEAD_CIPHER|CUSTOM_FLAGS) @@ -1028,7 +1051,25 @@ BLOCK_CIPHER_custom(NID_aes,256,1,12,gcm,GCM, static int aes_xts_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) { EVP_AES_XTS_CTX *xctx = c->cipher_data; - if (type != EVP_CTRL_INIT) + if (type == EVP_CTRL_COPY) + { + EVP_CIPHER_CTX *out = ptr; + EVP_AES_XTS_CTX *xctx_out = out->cipher_data; + if (xctx->xts.key1) + { + if (xctx->xts.key1 != &xctx->ks1) + return 0; + xctx_out->xts.key1 = &xctx_out->ks1; + } + if (xctx->xts.key2) + { + if (xctx->xts.key2 != &xctx->ks2) + return 0; + xctx_out->xts.key2 = &xctx_out->ks2; + } + return 1; + } + else if (type != EVP_CTRL_INIT) return -1; /* key1 and key2 are used as an indicator both key and IV are set */ xctx->xts.key1 = NULL; @@ -1125,7 +1166,8 @@ static int aes_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, #define aes_xts_cleanup NULL #define XTS_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 | EVP_CIPH_CUSTOM_IV \ - | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT) + | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT \ + | EVP_CIPH_CUSTOM_COPY) BLOCK_CIPHER_custom(NID_aes,128,1,16,xts,XTS,EVP_CIPH_FLAG_FIPS|XTS_FLAGS) BLOCK_CIPHER_custom(NID_aes,256,1,16,xts,XTS,EVP_CIPH_FLAG_FIPS|XTS_FLAGS) @@ -1175,6 +1217,19 @@ static int aes_ccm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) cctx->len_set = 0; return 1; + case EVP_CTRL_COPY: + { + EVP_CIPHER_CTX *out = ptr; + EVP_AES_CCM_CTX *cctx_out = out->cipher_data; + if (cctx->ccm.key) + { + if (cctx->ccm.key != &cctx->ks) + return 0; + cctx_out->ccm.key = &cctx_out->ks; + } + return 1; + } + default: return -1; From 177118fc2b1aa04933d03187cd50ae3a4bfdf3d2 Mon Sep 17 00:00:00 2001 From: Rich Salz Date: Thu, 4 Sep 2014 12:55:31 -0400 Subject: [PATCH 102/120] RT2849: Redundant check of "dsa" variable. In the current code, the check isn't redundant. And in fact the REAL check was missing. This avoids a NULL-deref crash. Reviewed-by: Dr. Stephen Henson --- fips/dsa/fips_dssvs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fips/dsa/fips_dssvs.c b/fips/dsa/fips_dssvs.c index cee5fb398e..bd7055d463 100644 --- a/fips/dsa/fips_dssvs.c +++ b/fips/dsa/fips_dssvs.c @@ -553,6 +553,11 @@ static void keypair(FILE *in, FILE *out) int n=atoi(value); dsa = FIPS_dsa_new(); + if (!dsa) + { + fprintf(stderr, "DSA allocation error\n"); + exit(1); + } if (!dsa2 && !dsa_builtin_paramgen(dsa, L, N, NULL, NULL, 0, NULL, NULL, NULL, NULL)) { @@ -579,8 +584,7 @@ static void keypair(FILE *in, FILE *out) do_bn_print_name(out, "Y",dsa->pub_key); fputs(RESP_EOL, out); } - if (dsa) - FIPS_dsa_free(dsa); + FIPS_dsa_free(dsa); } } } From cfcd27d35d3ce6e64c0790037489c204440cf3f1 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sat, 4 Oct 2014 23:40:37 +0200 Subject: [PATCH 103/120] Add iOS-specific FIPS addendum code. Reviewed-by: Steve Marquess --- iOS/Makefile | 76 ++ iOS/fips_algvs.app/Entitlements.plist | 8 + iOS/fips_algvs.app/Info.plist | 24 + iOS/fips_algvs.app/ResourceRules.plist | 25 + iOS/fopen.m | 93 +++ iOS/incore_macho.c | 1016 ++++++++++++++++++++++++ 6 files changed, 1242 insertions(+) create mode 100644 iOS/Makefile create mode 100644 iOS/fips_algvs.app/Entitlements.plist create mode 100644 iOS/fips_algvs.app/Info.plist create mode 100644 iOS/fips_algvs.app/ResourceRules.plist create mode 100644 iOS/fopen.m create mode 100644 iOS/incore_macho.c diff --git a/iOS/Makefile b/iOS/Makefile new file mode 100644 index 0000000000..db26da6406 --- /dev/null +++ b/iOS/Makefile @@ -0,0 +1,76 @@ +# +# OpenSSL/iOS/Makefile +# + +DIR= iOS +TOP= .. +CC= cc +INCLUDES= -I$(TOP) -I$(TOP)/include +CFLAG= -g -static +MAKEFILE= Makefile +PERL= perl +RM= rm -f + +EXE=incore_macho + +CFLAGS= $(INCLUDES) $(CFLAG) + +top: + @$(MAKE) -f $(TOP)/Makefile reflect THIS=exe + +exe: fips_algvs.app/fips_algvs + +incore_macho: incore_macho.c $(TOP)/crypto/sha/sha1dgst.c + $(HOSTCC) $(HOSTCFLAGS) -I$(TOP)/include -I$(TOP)/crypto -o $@ incore_macho.c $(TOP)/crypto/sha/sha1dgst.c + +fips_algvs.app/fips_algvs: $(TOP)/test/fips_algvs.c $(TOP)/fips/fipscanister.o fopen.m incore_macho + FIPS_SIG=./incore_macho \ + $(TOP)/fips/fipsld $(CFLAGS) -I$(TOP)/fips -o $@ \ + $(TOP)/test/fips_algvs.c $(TOP)/fips/fipscanister.o \ + fopen.m -framework Foundation || rm $@ + codesign -f -s "iPhone Developer" --entitlements fips_algvs.app/Entitlements.plist fips_algvs.app || rm $@ + +install: + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... + @set -e; for i in $(EXE); \ + do \ + (echo installing $$i; \ + cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$i.new; \ + chmod 755 $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$i.new; \ + mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$i ); \ + done; + @set -e; for i in $(SCRIPTS); \ + do \ + (echo installing $$i; \ + cp $$i $(INSTALL_PREFIX)$(OPENSSLDIR)/misc/$$i.new; \ + chmod 755 $(INSTALL_PREFIX)$(OPENSSLDIR)/misc/$$i.new; \ + mv -f $(INSTALL_PREFIX)$(OPENSSLDIR)/misc/$$i.new $(INSTALL_PREFIX)$(OPENSSLDIR)/misc/$$i ); \ + done + +tags: + ctags $(SRC) + +tests: + +links: + +lint: + lint -DLINT $(INCLUDES) $(SRC)>fluff + +depend: + @if [ -z "$(THIS)" ]; then \ + $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; \ + else \ + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(SRC); \ + fi + +dclean: + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new + mv -f Makefile.new $(MAKEFILE) + +clean: + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff $(EXE) + rm -f fips_algvs.app/fips_algvs + +# DO NOT DELETE THIS LINE -- make depend depends on it. + diff --git a/iOS/fips_algvs.app/Entitlements.plist b/iOS/fips_algvs.app/Entitlements.plist new file mode 100644 index 0000000000..929c4e96d2 --- /dev/null +++ b/iOS/fips_algvs.app/Entitlements.plist @@ -0,0 +1,8 @@ + + + + + get-task-allow + + + \ No newline at end of file diff --git a/iOS/fips_algvs.app/Info.plist b/iOS/fips_algvs.app/Info.plist new file mode 100644 index 0000000000..3fd8fb4290 --- /dev/null +++ b/iOS/fips_algvs.app/Info.plist @@ -0,0 +1,24 @@ + + + + + CFBundleName + fips_algvs + CFBundleSupportedPlatforms + + iPhoneOS + + CFBundleExecutable + fips_algvs + CFBundleIdentifier + fips_algvs + CFBundleResourceSpecification + ResourceRules.plist + LSRequiresIPhoneOS + + CFBundleDisplayName + fips_algvs + CFBundleVersion + 1.0 + + diff --git a/iOS/fips_algvs.app/ResourceRules.plist b/iOS/fips_algvs.app/ResourceRules.plist new file mode 100644 index 0000000000..e7ec329dcc --- /dev/null +++ b/iOS/fips_algvs.app/ResourceRules.plist @@ -0,0 +1,25 @@ + + + + + rules + + .* + + Info.plist + + omit + + weight + 10 + + ResourceRules.plist + + omit + + weight + 100 + + + + diff --git a/iOS/fopen.m b/iOS/fopen.m new file mode 100644 index 0000000000..8d2e790845 --- /dev/null +++ b/iOS/fopen.m @@ -0,0 +1,93 @@ +#include +#include +#include +#include +#include +#include + +static FILE *(*libc_fopen)(const char *, const char *) = NULL; + +__attribute__((constructor)) +static void pre_main(void) +{ + /* + * Pull reference to fopen(3) from libc. + */ + void *handle = dlopen("libSystem.B.dylib",RTLD_LAZY); + + if (handle) { + libc_fopen = dlsym(handle,"fopen"); + dlclose(handle); + } + + /* + * Change to Documents directory. + */ + NSString *docs = [NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES) lastObject]; + + NSFileManager *filemgr = [NSFileManager defaultManager]; + [filemgr changeCurrentDirectoryPath: docs]; + [filemgr release]; +} + +char *mkdirhier(char *path) +{ + char *slash; + struct stat buf; + + if (path[0]=='.' && path[1]=='/') path+=2; + + if ((slash = strrchr(path,'/'))) { + *slash = '\0'; + if (stat(path,&buf)==0) { + *slash = '/'; + return NULL; + } + (void)mkdirhier(path); + mkdir (path,0777); + *slash = '/'; + } + + return slash; +} +/* + * Replacement fopen(3) + */ +FILE *fopen(const char *filename, const char *mode) +{ + FILE *ret; + + if ((ret = (*libc_fopen)(filename,mode)) == NULL) { + /* + * If file is not present in Documents directory, try from Bundle. + */ + NSString *nsspath = [NSString stringWithFormat:@"%@/%s", + [[NSBundle mainBundle] bundlePath], + filename]; + + if ((ret = (*libc_fopen)([nsspath cStringUsingEncoding:NSUTF8StringEncoding],mode)) == NULL && + mode[0]=='w' && + ((filename[0]!='.' && filename[0]!='/') || + (filename[0]=='.' && filename[1]=='/')) ) { + /* + * If not present in Bundle, create directory in Documents + */ + char *path = strdup(filename), *slash; + static int once = 1; + + if ((slash = mkdirhier(path)) && once) { + /* + * For some reason iOS truncates first created file + * upon program exit, so we create one preemptively... + */ + once = 0; + strcpy(slash,"/.0"); + creat(path,0444); + } + free(path); + ret = (*libc_fopen)(filename,mode); + } + } + + return ret; +} diff --git a/iOS/incore_macho.c b/iOS/incore_macho.c new file mode 100644 index 0000000000..8842764cb0 --- /dev/null +++ b/iOS/incore_macho.c @@ -0,0 +1,1016 @@ +/* incore_macho.c */ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* ==================================================================== + * Copyright 2011 Thursby Software Systems, Inc. All rights reserved. + * + * The portions of the attached software ("Contribution") is developed by + * Thursby Software Systems, Inc and is licensed pursuant to the OpenSSL + * open source license. + * + * The Contribution, originally written by Paul W. Nelson of + * Thursby Software Systems, Inc, consists of the fingerprint calculation + * required for the FIPS140 integrity check. + * + * No patent licenses or other rights except those expressly stated in + * the OpenSSL open source license shall be deemed granted or received + * expressly, by implication, estoppel, or otherwise. + * + * No assurances are provided by Thursby that the Contribution does not + * infringe the patent or other intellectual property rights of any third + * party or that the license provides you with all the necessary rights + * to make use of the Contribution. + * + * THE SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. IN + * ADDITION TO THE DISCLAIMERS INCLUDED IN THE LICENSE, THURSBY + * SPECIFICALLY DISCLAIMS ANY LIABILITY FOR CLAIMS BROUGHT BY YOU OR ANY + * OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL PROPERTY RIGHTS OR + * OTHERWISE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CPU_SUBRTPE_V7F +# define CPU_SUBRTPE_V7F ((cpu_subtype_t) 10) +#endif +/* iPhone 5 and iPad 4 (A6 Processors) */ +#ifndef CPU_SUBTYPE_ARM_V7S +# define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t) 11) +#endif +#ifndef CPU_SUBTYPE_ARM_V7K +# define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t) 12) +#endif +#ifndef CPU_SUBTYPE_ARM_V8 +# define CPU_SUBTYPE_ARM_V8 ((cpu_subtype_t) 13) +#endif + +#ifndef CPU_TYPE_ARM64 +# define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64) +#endif + +static int gVerbosity = 0; + +static void hexdump(const unsigned char *buf,size_t len, + unsigned long address,FILE* fp) +{ + unsigned long addr; + int i; + + addr = 0; + while(addrflags; + memcpy( sec->sectname, pSec->sectname, 16 ); + memcpy( sec->segname, pSec->segname, 16 ); + sec->addr = pSec->addr; + sec->size = pSec->size; + sec->offset = pSec->offset; + sec->align = pSec->align; + sec->reloff = pSec->reloff; + sec->nreloc = pSec->nreloc; + sec->flags = pSec->flags; + rval = pCommand + sizeof(struct section_64); + } + else + { + struct section* pSec = (struct section*)pCommand; + flags = pSec->flags; + memcpy( sec->sectname, pSec->sectname, 16 ); + memcpy( sec->segname, pSec->segname, 16 ); + sec->addr = pSec->addr; + sec->size = pSec->size; + sec->offset = pSec->offset; + sec->align = pSec->align; + sec->reloff = pSec->reloff; + sec->nreloc = pSec->nreloc; + sec->flags = pSec->flags; + rval = pCommand + sizeof(struct section); + } + if( gVerbosity > 2 ) + fprintf(stderr, " flags=%x\n", flags); + sec->segment = segment; + sec->_next = NULL; + if( macho->sec_head ) + macho->sec_tail->_next = sec; + else + macho->sec_head = sec; + macho->sec_tail = sec; + return rval; +} + +static section_t *lookup_section(macho_file_t* macho, uint32_t nsect) +{ + section_t *rval = macho->sec_head; + + if(nsect == 0) return NULL; + + while( rval != NULL && --nsect > 0 ) + rval = rval->_next; + return rval; +} + +static void *add_segment( macho_file_t *macho, void *pCommand, uint8_t is64bit ) +{ + void *rval = 0; + segment_t *seg = (segment_t *)calloc(1, sizeof(segment_t)); + + if(!seg) + return 0; + if(is64bit) + { + struct segment_command_64 *pSeg = (struct segment_command_64*)pCommand; + + memcpy( seg->segname, pSeg->segname, 16 ); + seg->vmaddr = pSeg->vmaddr; + seg->vmsize = pSeg->vmsize; + seg->fileoff = pSeg->fileoff; + seg->filesize = pSeg->filesize; + seg->maxprot = pSeg->maxprot; + seg->initprot = pSeg->initprot; + seg->nsects = pSeg->nsects; + seg->flags = pSeg->flags; + rval = pCommand + sizeof(struct segment_command_64); + } else { + struct segment_command *pSeg = (struct segment_command*)pCommand; + + memcpy( seg->segname, pSeg->segname, 16 ); + seg->vmaddr = pSeg->vmaddr; + seg->vmsize = pSeg->vmsize; + seg->fileoff = pSeg->fileoff; + seg->filesize = pSeg->filesize; + seg->maxprot = pSeg->maxprot; + seg->initprot = pSeg->initprot; + seg->nsects = pSeg->nsects; + seg->flags = pSeg->flags; + rval = pCommand + sizeof(struct segment_command); + } + seg->_next = NULL; + seg->mapped = macho->mapped + seg->fileoff; + + if( macho->seg_head ) + macho->seg_tail->_next = seg; + else + macho->seg_head = seg; + macho->seg_tail = seg; + + if( gVerbosity > 2 ) + fprintf(stderr, "Segment %s: flags=%x\n", seg->segname, seg->flags ); + + unsigned int ii; + for( ii=0; iinsects; ii++ ) + { + rval = add_section(macho, rval, is64bit, seg); + } + return rval; +} + +static const char *type_str(uint8_t n_type) +{ + static char result[16] = {}; + int idx = 0; + uint8_t stab; + + memset(result, 0, sizeof(result)); + if( n_type & N_PEXT ) + result[idx++] = 'P'; + if( n_type & N_EXT ) + result[idx++] = 'E'; + if( idx > 0 ) + result[idx++] = ':'; + switch( n_type & N_TYPE ) + { + case N_UNDF: result[idx++] = 'U'; break; + case N_ABS: result[idx++] = 'A'; break; + case N_PBUD: result[idx++] = 'P'; break; + case N_SECT: result[idx++] = 'S'; break; + case N_INDR: result[idx++] = 'I'; break; + default: result[idx++] = '*'; break; + } + stab = n_type & N_STAB; + if( stab ) + { + result[idx++] = ':'; + result[idx++] = '0'+(stab >> 5); + } + result[idx++] = 0; + return result; +} + +static symtab_entry_t *lookup_entry_by_name( macho_file_t *macho, + const char *name) +{ + symtab_entry_t *entry; + + for( entry = macho->sym_head; entry; entry = entry->_next ) + { + if(strcmp(entry->n_symbol,name)==0 && (entry->n_type & N_STAB)==0 ) + { + if( entry->section == NULL ) + { + entry->section = lookup_section( macho, entry->n_sect ); + if( entry->section ) + { + section_t* sec = entry->section; + segment_t* seg = sec->segment; + uint64_t offset = entry->n_value - seg->vmaddr; + + entry->mapped = seg->mapped+offset; + } + else + entry = 0; + } + break; + } + } + return entry; +} + +static void check_symtab(macho_file_t *macho,void *pCommand,uint8_t is64bit ) +{ + + struct symtab_command *pSym = (struct symtab_command *)pCommand; + void *pS = macho->mapped + pSym->symoff; + unsigned int ii = 0; + + /* collect symbols */ + for( ii=0; iinsyms; ii++ ) + { + struct nlist *pnlist=(struct nlist*)pS; + symtab_entry_t *entry=(symtab_entry_t*)calloc(1,sizeof(symtab_entry_t)); + + if(!entry) + { + fprintf(stderr, "out of memory!\n"); + _exit(1); + } + entry->n_strx = pnlist->n_un.n_strx; + entry->n_type = pnlist->n_type; + entry->n_sect = pnlist->n_sect; + entry->n_desc = pnlist->n_desc; + entry->section = NULL; + if(is64bit) + { + struct nlist_64 *pnlist64 = (struct nlist_64*)pS; + + entry->n_value = pnlist64->n_value; + pS += sizeof(struct nlist_64); + } + else + { + entry->n_value = pnlist->n_value; + pS += sizeof(struct nlist); + } + entry->n_symbol=(const char *)macho->mapped+pSym->stroff+entry->n_strx; + entry->_next = NULL; + if( macho->sym_head ) + macho->sym_tail->_next = entry; + else + macho->sym_head = entry; + macho->sym_tail = entry; + } + if( gVerbosity > 2 ) + { + /* dump info */ + symtab_entry_t* entry; + + for( entry = macho->sym_head; entry; entry=entry->_next ) + { + /* only do non-debug symbols */ + if( (entry->n_type & N_STAB) == 0 ) + fprintf(stderr, "%32.32s %18llx type=%s, sect=%d\n", + entry->n_symbol, entry->n_value, + type_str(entry->n_type), entry->n_sect); + } + } +} + +static int load_architecture( macho_file_t* inFile ) +{ + /* check the header */ + unsigned int ii; + void * pCurrent = inFile->mapped; + struct mach_header* header = (struct mach_header*)pCurrent; + + if( header->magic != MH_MAGIC && header->magic != MH_MAGIC_64 ) + { + fprintf(stderr, "%s is not a mach-o file\n", inFile->filename); + return -1; + } + else if( header->filetype == MH_BUNDLE ) + { + fprintf(stderr, "%s is not a mach-o executable file (filetype MH_BUNDLE, should be MH_EXECUTE or MH_DYLIB)\n", inFile->filename); + return -1; + } + else if( header->filetype == MH_DYLINKER ) + { + fprintf(stderr, "%s is not a mach-o executable file (filetype MH_DYLINKER, should be MH_EXECUTE or MH_DYLIB)\n", inFile->filename); + return -1; + } + else if( !(header->filetype == MH_EXECUTE || header->filetype == MH_DYLIB) ) + { + fprintf(stderr, "%s is not a mach-o executable file (filetype %d, should be MH_EXECUTE or MH_DYLIB)\n", inFile->filename, header->filetype); + return -1; + } + + if( gVerbosity > 1 ) + fprintf(stderr, "loading %s(%s)\n", inFile->filename, cputype(header->cputype, header->cpusubtype)); + + inFile->cpu_type = header->cputype; + inFile->cpu_subtype = header->cpusubtype; + + if( header->magic == MH_MAGIC ) + pCurrent += sizeof( struct mach_header ); + else if( header->magic == MH_MAGIC_64 ) + pCurrent += sizeof( struct mach_header_64 ); + for( ii=0; iincmds; ii++ ) + { + struct load_command* command = (struct load_command*)pCurrent; + const char * lc_name; + + switch( command->cmd ) + { + case LC_SEGMENT: + { + lc_name = "LC_SEGMENT"; + add_segment(inFile, pCurrent, header->magic == MH_MAGIC_64); + break; + } + case LC_SYMTAB: + { + lc_name = "LC_SYMTAB"; + check_symtab(inFile, pCurrent, header->magic == MH_MAGIC_64 ); + break; + } + case LC_SYMSEG: lc_name = "LC_SYMSEG"; break; + case LC_THREAD: lc_name = "LC_THREAD"; break; + case LC_UNIXTHREAD: lc_name = "LC_UNIXTHREAD"; break; + case LC_LOADFVMLIB: lc_name = "LC_LOADFVMLIB"; break; + case LC_IDFVMLIB: lc_name = "LC_IDFVMLIB"; break; + case LC_IDENT: lc_name = "LC_IDENT"; break; + case LC_FVMFILE: lc_name = "LC_FVMFILE"; break; + case LC_PREPAGE: lc_name = "LC_PREPAGE"; break; + case LC_DYSYMTAB: lc_name = "LC_DYSYMTAB"; break; + case LC_LOAD_DYLIB: lc_name = "LC_LOAD_DYLIB"; break; + case LC_ID_DYLIB: lc_name = "LC_ID_DYLIB"; break; + case LC_LOAD_DYLINKER: lc_name = "LC_LOAD_DYLINKER"; break; + case LC_ID_DYLINKER: lc_name = "LC_ID_DYLINKER"; break; + case LC_PREBOUND_DYLIB: lc_name = "LC_PREBOUND_DYLIB"; break; + case LC_ROUTINES: lc_name = "LC_ROUTINES"; break; + case LC_SUB_FRAMEWORK: lc_name = "LC_SUB_FRAMEWORK"; break; + case LC_SUB_UMBRELLA: lc_name = "LC_SUB_UMBRELLA"; break; + case LC_SUB_CLIENT: lc_name = "LC_SUB_CLIENT"; break; + case LC_SUB_LIBRARY: lc_name = "LC_SUB_LIBRARY"; break; + case LC_TWOLEVEL_HINTS: lc_name = "LC_TWOLEVEL_HINTS"; break; + case LC_PREBIND_CKSUM: lc_name = "LC_PREBIND_CKSUM"; break; + case LC_LOAD_WEAK_DYLIB: lc_name = "LC_LOAD_WEAK_DYLIB"; break; + case LC_SEGMENT_64: + { + lc_name = "LC_SEGMENT_64"; + add_segment(inFile, pCurrent, TRUE); + break; + } + case LC_ROUTINES_64: lc_name = "LC_ROUTINES_64"; break; + case LC_UUID: lc_name = "LC_UUID"; break; + case LC_RPATH: lc_name = "LC_RPATH"; break; + case LC_CODE_SIGNATURE: lc_name = "LC_CODE_SIGNATURE"; break; + case LC_SEGMENT_SPLIT_INFO: + lc_name = "LC_SEGMENT_SPLIT_INFO"; break; + case LC_REEXPORT_DYLIB: lc_name = "LC_REEXPORT_DYLIB"; break; + case LC_LAZY_LOAD_DYLIB: lc_name = "LC_LAZY_LOAD_DYLIB"; break; + case LC_ENCRYPTION_INFO: lc_name = "LC_ENCRYPTION_INFO"; break; + case LC_DYLD_INFO: lc_name = "LC_DYLD_INFO"; break; + case LC_DYLD_INFO_ONLY: lc_name = "LC_DYLD_INFO_ONLY"; break; + case LC_LOAD_UPWARD_DYLIB: lc_name = "LC_LOAD_UPWARD_DYLIB"; break; + case LC_VERSION_MIN_MACOSX: + lc_name = "LC_VERSION_MIN_MACOSX"; break; + case LC_VERSION_MIN_IPHONEOS: + lc_name = "LC_VERSION_MIN_IPHONEOS"; break; + case LC_FUNCTION_STARTS: lc_name = "LC_FUNCTION_STARTS"; break; + case LC_DYLD_ENVIRONMENT: lc_name = "LC_DYLD_ENVIRONMENT"; break; + default: lc_name=NULL; break; + } + if( gVerbosity > 1 ) + { + if(lc_name) + fprintf(stderr,"command %s: size=%d\n",lc_name, + command->cmdsize ); + else + fprintf(stderr,"command %x, size=%d\n",command->cmd, + command->cmdsize); + } + pCurrent += command->cmdsize; + } + return 0; +} + +#define HOSTORDER_VALUE(val) (isBigEndian ? OSSwapBigToHostInt32(val) : (val)) + +static macho_file_t *load_file(macho_file_t *inFile) +{ + macho_file_t *rval = NULL; + void *pCurrent = inFile->mapped; + struct fat_header *fat = (struct fat_header *)pCurrent; + + if( fat->magic==FAT_MAGIC || fat->magic==FAT_CIGAM ) + { + int isBigEndian = fat->magic == FAT_CIGAM; + unsigned int ii = 0; + struct fat_arch *pArch = NULL; + uint32_t nfat_arch = 0; + + pCurrent += sizeof(struct fat_header); + pArch = pCurrent; + nfat_arch = HOSTORDER_VALUE(fat->nfat_arch); + for( ii=0; iifilename = strdup(inFile->filename); + archfile->mapped = inFile->mapped + + HOSTORDER_VALUE(pArch->offset); + archfile->size = HOSTORDER_VALUE(pArch->size); + archfile->align = HOSTORDER_VALUE(pArch->align); + archfile->isBigEndian = isBigEndian; + archfile->cpu_type = HOSTORDER_VALUE(pArch->cputype); + archfile->cpu_subtype = HOSTORDER_VALUE(pArch->cpusubtype); + if( load_architecture(archfile) == 0 ) + { + archfile->next = rval; + rval = archfile; + } + } + else + return NULL; /* no memory */ + pArch++; + } + } + else + { + struct mach_header* header = (struct mach_header*)pCurrent; + + if( header->magic != MH_MAGIC && header->magic != MH_MAGIC_64 ) + { + fprintf(stderr, "%s is not a mach-o file\n", inFile->filename); + } + else if( header->filetype == MH_BUNDLE ) + { + fprintf(stderr, "%s is not a mach-o executable file " + "(filetype MH_BUNDLE, should be MH_EXECUTE or MH_DYLIB)\n", inFile->filename); + } + else if( header->filetype == MH_DYLINKER ) + { + fprintf(stderr, "%s is not a mach-o executable file " + "(filetype MH_DYLINKER, should be MH_EXECUTE or MH_DYLIB)\n", inFile->filename); + } + else if( !(header->filetype == MH_EXECUTE || header->filetype == MH_DYLIB) ) + { + fprintf(stderr, "%s is not a mach-o executable file " + "(filetype %d should be MH_EXECUTE or MH_DYLIB)\n", + inFile->filename, header->filetype ); + } + if( load_architecture(inFile) == 0 ) + { + inFile->next = 0; + rval = inFile; + } + } + return rval; +} + +#define FIPS_SIGNATURE_SIZE 20 +#define FIPS_FINGERPRINT_SIZE 40 + +static void debug_symbol( symtab_entry_t* sym ) +{ + if( gVerbosity > 1 ) + { + section_t* sec = sym->section; + segment_t* seg = sec->segment; + fprintf(stderr, "%-40.40s: %llx sect=%s, segment=%s prot=(%x->%x)\n", + sym->n_symbol, sym->n_value, sec->sectname, + seg->segname, seg->initprot, seg->maxprot ); + } +} + +/* + * Minimalistic HMAC from fips_standalone_sha1.c + */ +static void hmac_init(SHA_CTX *md_ctx,SHA_CTX *o_ctx, + const char *key) + { + size_t len=strlen(key); + int i; + unsigned char keymd[HMAC_MAX_MD_CBLOCK]; + unsigned char pad[HMAC_MAX_MD_CBLOCK]; + + if (len > SHA_CBLOCK) + { + SHA1_Init(md_ctx); + SHA1_Update(md_ctx,key,len); + SHA1_Final(keymd,md_ctx); + len=20; + } + else + memcpy(keymd,key,len); + memset(&keymd[len],'\0',HMAC_MAX_MD_CBLOCK-len); + + for(i=0 ; i < HMAC_MAX_MD_CBLOCK ; i++) + pad[i]=0x36^keymd[i]; + SHA1_Init(md_ctx); + SHA1_Update(md_ctx,pad,SHA_CBLOCK); + + for(i=0 ; i < HMAC_MAX_MD_CBLOCK ; i++) + pad[i]=0x5c^keymd[i]; + SHA1_Init(o_ctx); + SHA1_Update(o_ctx,pad,SHA_CBLOCK); + } + +static void hmac_final(unsigned char *md,SHA_CTX *md_ctx,SHA_CTX *o_ctx) + { + unsigned char buf[20]; + + SHA1_Final(buf,md_ctx); + SHA1_Update(o_ctx,buf,sizeof buf); + SHA1_Final(md,o_ctx); + } + +static int fingerprint(macho_file_t* inFile, int addFingerprint) +{ + int rval = 0; + unsigned char signature[FIPS_SIGNATURE_SIZE]; + char signature_string[FIPS_FINGERPRINT_SIZE+1]; + unsigned int len = sizeof(signature); + const char *fingerprint = NULL; + int ii = 0; + +#define LOOKUP_SYMBOL( symname, prot ) \ + symtab_entry_t *symname = \ + lookup_entry_by_name( inFile, "_" #symname ); \ + if( ! symname ) { \ + fprintf(stderr, "%s: Not a FIPS executable (" \ + #symname " not found)\n", inFile->filename ); \ + return -1;\ + } \ + if( (symname->section->segment->initprot & \ + (PROT_READ|PROT_WRITE|PROT_EXEC)) != (prot) ) { \ + fprintf(stderr, #symname \ + " segment has the wrong protection.\n"); \ + debug_symbol(symname);return -1;\ + } + + LOOKUP_SYMBOL( FIPS_rodata_start, PROT_READ | PROT_EXEC ); + LOOKUP_SYMBOL( FIPS_rodata_end, PROT_READ | PROT_EXEC ); + LOOKUP_SYMBOL( FIPS_text_startX, PROT_READ | PROT_EXEC ); + LOOKUP_SYMBOL( FIPS_text_endX, PROT_READ | PROT_EXEC ); + LOOKUP_SYMBOL( FIPS_signature, PROT_WRITE | PROT_READ ); + LOOKUP_SYMBOL( FINGERPRINT_ascii_value, PROT_READ | PROT_EXEC ); + + if( gVerbosity > 1 ) + { + debug_symbol( FIPS_rodata_start ); + debug_symbol( FIPS_rodata_end ); + debug_symbol( FIPS_text_startX ); + debug_symbol( FIPS_text_endX ); + debug_symbol( FIPS_signature ); + debug_symbol( FINGERPRINT_ascii_value ); + + fingerprint = (const char *)FINGERPRINT_ascii_value->mapped; + fprintf(stderr, "fingerprint: "); + for(ii=0; ii<40; ii++ ) + { + if( fingerprint[ii] == 0 ) + break; + putc(fingerprint[ii], stderr); + } + putc('\n', stderr); + } + + /* check for the prefix ? character */ + { + const unsigned char * p1 = FIPS_text_startX->mapped; + const unsigned char * p2 = FIPS_text_endX->mapped; + const unsigned char * p3 = FIPS_rodata_start->mapped; + const unsigned char * p4 = FIPS_rodata_end->mapped; + static const char FIPS_hmac_key[]="etaonrishdlcupfm"; + SHA_CTX md_ctx,o_ctx; + + hmac_init(&md_ctx,&o_ctx,FIPS_hmac_key); + + if (p1<=p3 && p2>=p3) + p3=p1, p4=p2>p4?p2:p4, p1=NULL, p2=NULL; + else if (p3<=p1 && p4>=p1) + p3=p3, p4=p2>p4?p2:p4, p1=NULL, p2=NULL; + + if (p1) { + + SHA1_Update(&md_ctx,p1,(size_t)p2-(size_t)p1); + } + if (FIPS_signature->mapped>=p3 && FIPS_signature->mappedmapped+FIPS_SIGNATURE_SIZE; + if (p3mapped; + inFile->fingerprint_original = strndup(fingerprint,FIPS_FINGERPRINT_SIZE); + inFile->fingerprint_computed = strdup(signature_string); + + if( addFingerprint ) + { + void *fp_page = NULL; + void *fp_end = NULL; + + if(strcmp(fingerprint,"?have to make sure this string is unique")!=0) + { + if (memcmp((char*)fingerprint, signature_string, FIPS_FINGERPRINT_SIZE)!=0) + { + fprintf(stderr, + "%s(%s) original fingerprint incorrect: %s\n", + inFile->filename, + cputype(inFile->cpu_type, inFile->cpu_subtype), + fingerprint); + } + } + + fp_page = (void*)((uintptr_t)fingerprint & ~PAGE_MASK); + fp_end = (void*)((uintptr_t)(fingerprint+(PAGE_SIZE*2)) & ~PAGE_MASK); + if( mprotect( fp_page, fp_end-fp_page, PROT_READ|PROT_WRITE ) ) + { + perror("Can't write the fingerprint - mprotect failed"); + fprintf(stderr, "fp_page=%p, fp_end=%p, len=%ld\n", + fp_page, fp_end, (size_t)(fp_end-fp_page)); + rval = 1; + } + else + { + memcpy((char*)fingerprint, signature_string, FIPS_FINGERPRINT_SIZE); + if( msync(fp_page, (fp_end-fp_page), 0) ) + perror("msync failed"); + } + if( gVerbosity > 0 ) + fprintf(stderr, "%s(%s) fingerprint: %s\n", inFile->filename, + cputype(inFile->cpu_type,inFile->cpu_subtype), + signature_string); + } + if( *fingerprint == '?' ) + { + printf("%s(%s) has no fingerprint.\n", inFile->filename, + cputype(inFile->cpu_type, inFile->cpu_subtype)); + rval = 2; + } + else if( strncmp( fingerprint, signature_string, FIPS_FINGERPRINT_SIZE) == 0 ) + { + if( ! addFingerprint ) + printf("%s(%s) fingerprint is correct: %s\n", inFile->filename, + cputype(inFile->cpu_type, inFile->cpu_subtype), + signature_string); + } + else + { + printf("%s(%s) fingerprint %.40s is not correct\n", inFile->filename, + cputype(inFile->cpu_type,inFile->cpu_subtype), fingerprint); + printf("calculated: %s\n", signature_string); + rval = -1; + } + return rval; +} + +static int make_fingerprint( const char * inApp, int addFingerprint ) +{ + int rval = 1; + int appfd = -1; + if( addFingerprint ) + appfd = open( inApp, O_RDWR ); + if( appfd < 0 ) + { + if( addFingerprint ) + fprintf(stderr, "Can't modify %s. Verifying only.\n", inApp); + addFingerprint = 0; + appfd = open( inApp, O_RDONLY ); + } + if( appfd >= 0 ) + { + struct stat stbuf; + fstat(appfd, &stbuf); + void * pApp = mmap(0, (size_t)stbuf.st_size, PROT_READ, + MAP_SHARED, appfd, (off_t)0); + if( pApp == MAP_FAILED ) + { + perror(inApp); + } + else + { + macho_file_t theFile; + macho_file_t* architectures; + macho_file_t* pArchitecture; + + memset( &theFile, 0, sizeof(theFile) ); + theFile.filename = inApp; + theFile.mapped = pApp; + architectures = load_file(&theFile); + for( pArchitecture = architectures; pArchitecture; + pArchitecture = pArchitecture->next ) + { + rval = fingerprint(pArchitecture, addFingerprint); + if( rval && addFingerprint ) + { + printf("Failure\n"); + break; + } + } + if((rval==0) && addFingerprint) + { + printf("Fingerprint Stored\n"); + } + munmap(pApp, (size_t)stbuf.st_size); + } + close(appfd); + } + else + { + fprintf(stderr, "Can't open %s\n", inApp ); + } + return rval; +} + +static void print_usage(const char * prog) +{ + fprintf(stderr, "usage:\n\t%s [--debug] [--quiet] [-exe|-dso|-dylib] executable\n", prog); + _exit(1); +} + +int main (int argc, const char * argv[]) +{ + const char * pname = argv[0]; + const char * filename = NULL; + int addFingerprint = 1; + const char * verbose_env = getenv("FIPS_SIG_VERBOSE"); + + if( verbose_env ) + gVerbosity = atoi(verbose_env); + + if( gVerbosity < 0 ) + gVerbosity = 1; + + while( --argc ) + { + ++argv; + if( strcmp(*argv,"-exe")==0 || strcmp(*argv,"--exe")==0 || + strcmp(*argv,"-dso")==0 || strcmp(*argv,"--dso")==0 || + strcmp(*argv,"-dylib")==0 || strcmp(*argv,"--dylib")==0 || + strcmp(*argv,"--verify")==0 ) + { + if(strcmp(*argv,"--verify")==0) + addFingerprint=0; + + if( argc > 0 ) + { + filename = *++argv; + argc--; + } + } + else if(strcmp(*argv,"-d")==0 || strcmp(*argv,"-debug")==0 || strcmp(*argv,"--debug")==0) + { + if( gVerbosity < 2 ) + gVerbosity = 2; + else + gVerbosity++; + } + else if(strcmp(*argv,"-q")==0 || strcmp(*argv,"-quiet")==0 || strcmp(*argv,"--quiet")==0) + gVerbosity = 0; + else if(strncmp(*argv,"-",1)!=0) { + filename = *argv; + } + } + + if( !filename ) + { + print_usage(pname); + return 1; + } + + if( access(filename, R_OK) ) + { + fprintf(stderr, "Can't access %s\n", filename); + return 1; + } + + return make_fingerprint( filename, addFingerprint ); +} + From 788715cecf22fac32fa87b812609b6e1ad227a3a Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 23 Oct 2014 16:04:01 +0200 Subject: [PATCH 104/120] Configure: add ios64 target. Reviewed-by: Steve Marquess (cherry picked from commit b06f7d9ac0752083e7443dddc9e5ac3e198063d4) --- Configure | 17 ++++++++++++++++- TABLE | 36 +++++++++++++++++++++++++++++++++++- config | 2 ++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/Configure b/Configure index 57e5899718..34856e2d9c 100755 --- a/Configure +++ b/Configure @@ -581,7 +581,22 @@ my %table=( "darwin64-x86_64-cc","cc:-arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc32_asm}:osx32:dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", # iPhoneOS/iOS -"iphoneos-cross","llvm-gcc:-O3 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +# +# It takes three prior-set environment variables to make it work: +# +# CROSS_COMPILE=/where/toolchain/is/usr/bin/ [note ending slash] +# CROSS_TOP=/where/SDKs/are +# CROSS_SDK=iPhoneOSx.y.sdk +# +# Exact paths vary with Xcode releases, but for couple of last ones +# they would look like this: +# +# CROSS_COMPILE=`xcode-select --print-path`/Toolchains/XcodeDefault.xctoolchain/usr/bin/ +# CROSS_TOP=`xcode-select --print-path`/Platforms/iPhoneOS.platform/Developer +# CROSS_SDK=iPhoneOS7.0.sdk +# +"iphoneos-cross","cc:-O3 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"ios64-cross","cc:-O3 -arch arm64 -mios-version-min=7.0.0 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR -RC4_CHUNK DES_INT DES_UNROLL -BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", ##### A/UX "aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::", diff --git a/TABLE b/TABLE index cdc0bf1c98..8bdf72045d 100644 --- a/TABLE +++ b/TABLE @@ -3465,8 +3465,42 @@ $ranlib = $arflags = $multilib = +*** ios64-cross +$cc = cc +$cflags = -O3 -arch arm64 -mios-version-min=7.0.0 -isysroot $(CROSS_TOP)/SDKs/$(CROSS_SDK) -fno-common +$unistd = +$thread_cflag = -D_REENTRANT +$sys_id = iOS +$lflags = -Wl,-search_paths_first% +$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR -RC4_CHUNK DES_INT DES_UNROLL -BF_PTR +$cpuid_obj = +$bn_obj = +$ec_obj = +$des_obj = +$aes_obj = +$bf_obj = +$md5_obj = +$sha1_obj = +$cast_obj = +$rc4_obj = +$rmd160_obj = +$rc5_obj = +$wp_obj = +$cmll_obj = +$modes_obj = +$engines_obj = +$perlasm_scheme = void +$dso_scheme = dlfcn +$shared_target= darwin-shared +$shared_cflag = -fPIC -fno-common +$shared_ldflag = -dynamiclib +$shared_extension = .$(SHLIB_MAJOR).$(SHLIB_MINOR).dylib +$ranlib = +$arflags = +$multilib = + *** iphoneos-cross -$cc = llvm-gcc +$cc = cc $cflags = -O3 -isysroot $(CROSS_TOP)/SDKs/$(CROSS_SDK) -fomit-frame-pointer -fno-common $unistd = $thread_cflag = -D_REENTRANT diff --git a/config b/config index 93dde30168..fc78a30685 100755 --- a/config +++ b/config @@ -576,6 +576,8 @@ case "$GUESSOS" in *-*-iphoneos) options="$options -arch%20${MACHINE}" OUT="iphoneos-cross" ;; + arm64-*-iphoneos|*-*-ios64) + OUT="ios64-cross" ;; alpha-*-linux2) ISA=`awk '/cpu model/{print$4;exit(0);}' /proc/cpuinfo` case ${ISA:-generic} in From 9b5db104eca33372b3acb7c7029c211c16b68b5e Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 11 May 2015 11:18:04 +0200 Subject: [PATCH 105/120] Add ARMv8 assembly pack. Reviewed-by: Dr. Stephen Henson (cherry picked from commit b84813ec017cb03b8dd0b85bce2bb3e021c45685) --- crypto/Makefile | 1 + crypto/aes/Makefile | 4 + crypto/aes/asm/aesv8-armx.pl | 968 +++++++++++++++++++++++++++++++ crypto/arm64cpuid.pl | 68 +++ crypto/modes/Makefile | 3 + crypto/modes/asm/ghashv8-armx.pl | 376 ++++++++++++ crypto/perlasm/arm-xlate.pl | 165 ++++++ crypto/sha/Makefile | 3 + crypto/sha/asm/sha1-armv8.pl | 343 +++++++++++ crypto/sha/asm/sha512-armv8.pl | 428 ++++++++++++++ 10 files changed, 2359 insertions(+) create mode 100644 crypto/aes/asm/aesv8-armx.pl create mode 100644 crypto/arm64cpuid.pl create mode 100644 crypto/modes/asm/ghashv8-armx.pl create mode 100644 crypto/perlasm/arm-xlate.pl create mode 100644 crypto/sha/asm/sha1-armv8.pl create mode 100644 crypto/sha/asm/sha512-armv8.pl diff --git a/crypto/Makefile b/crypto/Makefile index 22cb2a5013..7304684f76 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -87,6 +87,7 @@ ppccpuid.s: ppccpuid.pl; $(PERL) ppccpuid.pl $(PERLASM_SCHEME) $@ pariscid.s: pariscid.pl; $(PERL) pariscid.pl $(PERLASM_SCHEME) $@ alphacpuid.s: alphacpuid.pl $(PERL) $< | $(CC) -E - | tee $@ > /dev/null +arm64cpuid.S: arm64cpuid.pl; $(PERL) arm64cpuid.pl $(PERLASM_SCHEME) > $@ subdirs: @target=all; $(RECURSIVE_MAKE) diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile index 8edd358bd3..1d9e82aad6 100644 --- a/crypto/aes/Makefile +++ b/crypto/aes/Makefile @@ -78,6 +78,10 @@ aes-parisc.s: asm/aes-parisc.pl aes-mips.S: asm/aes-mips.pl $(PERL) asm/aes-mips.pl $(PERLASM_SCHEME) $@ +aesv8-armx.S: asm/aesv8-armx.pl + $(PERL) asm/aesv8-armx.pl $(PERLASM_SCHEME) $@ +aesv8-armx.o: aesv8-armx.S + # GNU make "catch all" aes-%.S: asm/aes-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ aes-armv4.o: aes-armv4.S diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl new file mode 100644 index 0000000000..104f417c85 --- /dev/null +++ b/crypto/aes/asm/aesv8-armx.pl @@ -0,0 +1,968 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for ARMv8 AES instructions. The +# module is endian-agnostic in sense that it supports both big- and +# little-endian cases. As does it support both 32- and 64-bit modes +# of operation. Latter is achieved by limiting amount of utilized +# registers to 16, which implies additional NEON load and integer +# instructions. This has no effect on mighty Apple A7, where results +# are literally equal to the theoretical estimates based on AES +# instruction latencies and issue rates. On Cortex-A53, an in-order +# execution core, this costs up to 10-15%, which is partially +# compensated by implementing dedicated code path for 128-bit +# CBC encrypt case. On Cortex-A57 parallelizable mode performance +# seems to be limited by sheer amount of NEON instructions... +# +# Performance in cycles per byte processed with 128-bit key: +# +# CBC enc CBC dec CTR +# Apple A7 2.39 1.20 1.20 +# Cortex-A53 2.45 1.87 1.94 +# Cortex-A57 3.64 1.34 1.32 + +$flavour = shift; +$output = shift; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +$prefix="aes_v8"; + +$code=<<___; +#include "arm_arch.h" + +#if __ARM_ARCH__>=7 +.text +___ +$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); +$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); + +# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, +# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to +# maintain both 32- and 64-bit codes within single module and +# transliterate common code to either flavour with regex vodoo. +# +{{{ +my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12"); +my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= + $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10)); + + +$code.=<<___; +.align 5 +.Lrcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl ${prefix}_set_encrypt_key +.type ${prefix}_set_encrypt_key,%function +.align 5 +${prefix}_set_encrypt_key: +.Lenc_key: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___; + mov $ptr,#-1 + cmp $inp,#0 + b.eq .Lenc_key_abort + cmp $out,#0 + b.eq .Lenc_key_abort + mov $ptr,#-2 + cmp $bits,#128 + b.lt .Lenc_key_abort + cmp $bits,#256 + b.gt .Lenc_key_abort + tst $bits,#0x3f + b.ne .Lenc_key_abort + + adr $ptr,.Lrcon + cmp $bits,#192 + + veor $zero,$zero,$zero + vld1.8 {$in0},[$inp],#16 + mov $bits,#8 // reuse $bits + vld1.32 {$rcon,$mask},[$ptr],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + vtbl.8 $key,{$in0},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in0},[$out],#16 + aese $key,$zero + subs $bits,$bits,#1 + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + vshl.u8 $rcon,$rcon,#1 + veor $in0,$in0,$key + b.ne .Loop128 + + vld1.32 {$rcon},[$ptr] + + vtbl.8 $key,{$in0},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in0},[$out],#16 + aese $key,$zero + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + vshl.u8 $rcon,$rcon,#1 + veor $in0,$in0,$key + + vtbl.8 $key,{$in0},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in0},[$out],#16 + aese $key,$zero + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + veor $in0,$in0,$key + vst1.32 {$in0},[$out] + add $out,$out,#0x50 + + mov $rounds,#10 + b .Ldone + +.align 4 +.L192: + vld1.8 {$in1},[$inp],#8 + vmov.i8 $key,#8 // borrow $key + vst1.32 {$in0},[$out],#16 + vsub.i8 $mask,$mask,$key // adjust the mask + +.Loop192: + vtbl.8 $key,{$in1},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in1},[$out],#8 + aese $key,$zero + subs $bits,$bits,#1 + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + + vdup.32 $tmp,${in0}[3] + veor $tmp,$tmp,$in1 + veor $key,$key,$rcon + vext.8 $in1,$zero,$in1,#12 + vshl.u8 $rcon,$rcon,#1 + veor $in1,$in1,$tmp + veor $in0,$in0,$key + veor $in1,$in1,$key + vst1.32 {$in0},[$out],#16 + b.ne .Loop192 + + mov $rounds,#12 + add $out,$out,#0x20 + b .Ldone + +.align 4 +.L256: + vld1.8 {$in1},[$inp] + mov $bits,#7 + mov $rounds,#14 + vst1.32 {$in0},[$out],#16 + +.Loop256: + vtbl.8 $key,{$in1},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in1},[$out],#16 + aese $key,$zero + subs $bits,$bits,#1 + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + vshl.u8 $rcon,$rcon,#1 + veor $in0,$in0,$key + vst1.32 {$in0},[$out],#16 + b.eq .Ldone + + vdup.32 $key,${in0}[3] // just splat + vext.8 $tmp,$zero,$in1,#12 + aese $key,$zero + + veor $in1,$in1,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in1,$in1,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in1,$in1,$tmp + + veor $in1,$in1,$key + b .Loop256 + +.Ldone: + str $rounds,[$out] + mov $ptr,#0 + +.Lenc_key_abort: + mov x0,$ptr // return value + `"ldr x29,[sp],#16" if ($flavour =~ /64/)` + ret +.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key + +.globl ${prefix}_set_decrypt_key +.type ${prefix}_set_decrypt_key,%function +.align 5 +${prefix}_set_decrypt_key: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___ if ($flavour !~ /64/); + stmdb sp!,{r4,lr} +___ +$code.=<<___; + bl .Lenc_key + + cmp x0,#0 + b.ne .Ldec_key_abort + + sub $out,$out,#240 // restore original $out + mov x4,#-16 + add $inp,$out,x12,lsl#4 // end of key schedule + + vld1.32 {v0.16b},[$out] + vld1.32 {v1.16b},[$inp] + vst1.32 {v0.16b},[$inp],x4 + vst1.32 {v1.16b},[$out],#16 + +.Loop_imc: + vld1.32 {v0.16b},[$out] + vld1.32 {v1.16b},[$inp] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + vst1.32 {v0.16b},[$inp],x4 + vst1.32 {v1.16b},[$out],#16 + cmp $inp,$out + b.hi .Loop_imc + + vld1.32 {v0.16b},[$out] + aesimc v0.16b,v0.16b + vst1.32 {v0.16b},[$inp] + + eor x0,x0,x0 // return value +.Ldec_key_abort: +___ +$code.=<<___ if ($flavour !~ /64/); + ldmia sp!,{r4,pc} +___ +$code.=<<___ if ($flavour =~ /64/); + ldp x29,x30,[sp],#16 + ret +___ +$code.=<<___; +.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key +___ +}}} +{{{ +sub gen_block () { +my $dir = shift; +my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc"); +my ($inp,$out,$key)=map("x$_",(0..2)); +my $rounds="w3"; +my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3)); + +$code.=<<___; +.globl ${prefix}_${dir}crypt +.type ${prefix}_${dir}crypt,%function +.align 5 +${prefix}_${dir}crypt: + ldr $rounds,[$key,#240] + vld1.32 {$rndkey0},[$key],#16 + vld1.8 {$inout},[$inp] + sub $rounds,$rounds,#2 + vld1.32 {$rndkey1},[$key],#16 + +.Loop_${dir}c: + aes$e $inout,$rndkey0 + vld1.32 {$rndkey0},[$key],#16 + aes$mc $inout,$inout + subs $rounds,$rounds,#2 + aes$e $inout,$rndkey1 + vld1.32 {$rndkey1},[$key],#16 + aes$mc $inout,$inout + b.gt .Loop_${dir}c + + aes$e $inout,$rndkey0 + vld1.32 {$rndkey0},[$key] + aes$mc $inout,$inout + aes$e $inout,$rndkey1 + veor $inout,$inout,$rndkey0 + + vst1.8 {$inout},[$out] + ret +.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt +___ +} +&gen_block("en"); +&gen_block("de"); +}}} +{{{ +my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5"; +my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12"); +my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); + +my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); + +### q8-q15 preloaded key schedule + +$code.=<<___; +.globl ${prefix}_cbc_encrypt +.type ${prefix}_cbc_encrypt,%function +.align 5 +${prefix}_cbc_encrypt: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___ if ($flavour !~ /64/); + mov ip,sp + stmdb sp!,{r4-r8,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load remaining args +___ +$code.=<<___; + subs $len,$len,#16 + mov $step,#16 + b.lo .Lcbc_abort + cclr $step,eq + + cmp $enc,#0 // en- or decrypting? + ldr $rounds,[$key,#240] + and $len,$len,#-16 + vld1.8 {$ivec},[$ivp] + vld1.8 {$dat},[$inp],$step + + vld1.32 {q8-q9},[$key] // load key schedule... + sub $rounds,$rounds,#6 + add $key_,$key,x5,lsl#4 // pointer to last 7 round keys + sub $rounds,$rounds,#2 + vld1.32 {q10-q11},[$key_],#32 + vld1.32 {q12-q13},[$key_],#32 + vld1.32 {q14-q15},[$key_],#32 + vld1.32 {$rndlast},[$key_] + + add $key_,$key,#32 + mov $cnt,$rounds + b.eq .Lcbc_dec + + cmp $rounds,#2 + veor $dat,$dat,$ivec + veor $rndzero_n_last,q8,$rndlast + b.eq .Lcbc_enc128 + +.Loop_cbc_enc: + aese $dat,q8 + vld1.32 {q8},[$key_],#16 + aesmc $dat,$dat + subs $cnt,$cnt,#2 + aese $dat,q9 + vld1.32 {q9},[$key_],#16 + aesmc $dat,$dat + b.gt .Loop_cbc_enc + + aese $dat,q8 + aesmc $dat,$dat + subs $len,$len,#16 + aese $dat,q9 + aesmc $dat,$dat + cclr $step,eq + aese $dat,q10 + aesmc $dat,$dat + add $key_,$key,#16 + aese $dat,q11 + aesmc $dat,$dat + vld1.8 {q8},[$inp],$step + aese $dat,q12 + aesmc $dat,$dat + veor q8,q8,$rndzero_n_last + aese $dat,q13 + aesmc $dat,$dat + vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] + aese $dat,q14 + aesmc $dat,$dat + aese $dat,q15 + + mov $cnt,$rounds + veor $ivec,$dat,$rndlast + vst1.8 {$ivec},[$out],#16 + b.hs .Loop_cbc_enc + + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + vld1.32 {$in0-$in1},[$key_] + aese $dat,q8 + aesmc $dat,$dat + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese $dat,q8 + aesmc $dat,$dat + vst1.8 {$ivec},[$out],#16 +.Lenter_cbc_enc128: + aese $dat,q9 + aesmc $dat,$dat + subs $len,$len,#16 + aese $dat,$in0 + aesmc $dat,$dat + cclr $step,eq + aese $dat,$in1 + aesmc $dat,$dat + aese $dat,q10 + aesmc $dat,$dat + aese $dat,q11 + aesmc $dat,$dat + vld1.8 {q8},[$inp],$step + aese $dat,q12 + aesmc $dat,$dat + aese $dat,q13 + aesmc $dat,$dat + aese $dat,q14 + aesmc $dat,$dat + veor q8,q8,$rndzero_n_last + aese $dat,q15 + veor $ivec,$dat,$rndlast + b.hs .Loop_cbc_enc128 + + vst1.8 {$ivec},[$out],#16 + b .Lcbc_done +___ +{ +my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); +$code.=<<___; +.align 5 +.Lcbc_dec: + vld1.8 {$dat2},[$inp],#16 + subs $len,$len,#32 // bias + add $cnt,$rounds,#2 + vorr $in1,$dat,$dat + vorr $dat1,$dat,$dat + vorr $in2,$dat2,$dat2 + b.lo .Lcbc_dec_tail + + vorr $dat1,$dat2,$dat2 + vld1.8 {$dat2},[$inp],#16 + vorr $in0,$dat,$dat + vorr $in1,$dat1,$dat1 + vorr $in2,$dat2,$dat2 + +.Loop3x_cbc_dec: + aesd $dat0,q8 + aesd $dat1,q8 + aesd $dat2,q8 + vld1.32 {q8},[$key_],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + subs $cnt,$cnt,#2 + aesd $dat0,q9 + aesd $dat1,q9 + aesd $dat2,q9 + vld1.32 {q9},[$key_],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + b.gt .Loop3x_cbc_dec + + aesd $dat0,q8 + aesd $dat1,q8 + aesd $dat2,q8 + veor $tmp0,$ivec,$rndlast + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + veor $tmp1,$in0,$rndlast + aesd $dat0,q9 + aesd $dat1,q9 + aesd $dat2,q9 + veor $tmp2,$in1,$rndlast + subs $len,$len,#0x30 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + vorr $ivec,$in2,$in2 + mov.lo x6,$len // x6, $cnt, is zero at this point + aesd $dat0,q12 + aesd $dat1,q12 + aesd $dat2,q12 + add $inp,$inp,x6 // $inp is adjusted in such way that + // at exit from the loop $dat1-$dat2 + // are loaded with last "words" + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + mov $key_,$key + aesd $dat0,q13 + aesd $dat1,q13 + aesd $dat2,q13 + vld1.8 {$in0},[$inp],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + vld1.8 {$in1},[$inp],#16 + aesd $dat0,q14 + aesd $dat1,q14 + aesd $dat2,q14 + vld1.8 {$in2},[$inp],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] + aesd $dat0,q15 + aesd $dat1,q15 + aesd $dat2,q15 + + add $cnt,$rounds,#2 + veor $tmp0,$tmp0,$dat0 + veor $tmp1,$tmp1,$dat1 + veor $dat2,$dat2,$tmp2 + vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] + vorr $dat0,$in0,$in0 + vst1.8 {$tmp0},[$out],#16 + vorr $dat1,$in1,$in1 + vst1.8 {$tmp1},[$out],#16 + vst1.8 {$dat2},[$out],#16 + vorr $dat2,$in2,$in2 + b.hs .Loop3x_cbc_dec + + cmn $len,#0x30 + b.eq .Lcbc_done + nop + +.Lcbc_dec_tail: + aesd $dat1,q8 + aesd $dat2,q8 + vld1.32 {q8},[$key_],#16 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + subs $cnt,$cnt,#2 + aesd $dat1,q9 + aesd $dat2,q9 + vld1.32 {q9},[$key_],#16 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + b.gt .Lcbc_dec_tail + + aesd $dat1,q8 + aesd $dat2,q8 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + aesd $dat1,q9 + aesd $dat2,q9 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + aesd $dat1,q12 + aesd $dat2,q12 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + cmn $len,#0x20 + aesd $dat1,q13 + aesd $dat2,q13 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + veor $tmp1,$ivec,$rndlast + aesd $dat1,q14 + aesd $dat2,q14 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + veor $tmp2,$in1,$rndlast + aesd $dat1,q15 + aesd $dat2,q15 + b.eq .Lcbc_dec_one + veor $tmp1,$tmp1,$dat1 + veor $tmp2,$tmp2,$dat2 + vorr $ivec,$in2,$in2 + vst1.8 {$tmp1},[$out],#16 + vst1.8 {$tmp2},[$out],#16 + b .Lcbc_done + +.Lcbc_dec_one: + veor $tmp1,$tmp1,$dat2 + vorr $ivec,$in2,$in2 + vst1.8 {$tmp1},[$out],#16 + +.Lcbc_done: + vst1.8 {$ivec},[$ivp] +.Lcbc_abort: +___ +} +$code.=<<___ if ($flavour !~ /64/); + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r8,pc} +___ +$code.=<<___ if ($flavour =~ /64/); + ldr x29,[sp],#16 + ret +___ +$code.=<<___; +.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt +___ +}}} +{{{ +my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); +my ($rounds,$cnt,$key_)=("w5","w6","x7"); +my ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12)); +my $step="x12"; # aliases with $tctr2 + +my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); +my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); + +my ($dat,$tmp)=($dat0,$tmp0); + +### q8-q15 preloaded key schedule + +$code.=<<___; +.globl ${prefix}_ctr32_encrypt_blocks +.type ${prefix}_ctr32_encrypt_blocks,%function +.align 5 +${prefix}_ctr32_encrypt_blocks: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___ if ($flavour !~ /64/); + mov ip,sp + stmdb sp!,{r4-r10,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldr r4, [ip] @ load remaining arg +___ +$code.=<<___; + ldr $rounds,[$key,#240] + + ldr $ctr, [$ivp, #12] + vld1.32 {$dat0},[$ivp] + + vld1.32 {q8-q9},[$key] // load key schedule... + sub $rounds,$rounds,#4 + mov $step,#16 + cmp $len,#2 + add $key_,$key,x5,lsl#4 // pointer to last 5 round keys + sub $rounds,$rounds,#2 + vld1.32 {q12-q13},[$key_],#32 + vld1.32 {q14-q15},[$key_],#32 + vld1.32 {$rndlast},[$key_] + add $key_,$key,#32 + mov $cnt,$rounds + cclr $step,lo +#ifndef __ARMEB__ + rev $ctr, $ctr +#endif + vorr $dat1,$dat0,$dat0 + add $tctr1, $ctr, #1 + vorr $dat2,$dat0,$dat0 + add $ctr, $ctr, #2 + vorr $ivec,$dat0,$dat0 + rev $tctr1, $tctr1 + vmov.32 ${dat1}[3],$tctr1 + b.ls .Lctr32_tail + rev $tctr2, $ctr + sub $len,$len,#3 // bias + vmov.32 ${dat2}[3],$tctr2 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + aese $dat0,q8 + aese $dat1,q8 + aese $dat2,q8 + vld1.32 {q8},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aesmc $dat2,$dat2 + subs $cnt,$cnt,#2 + aese $dat0,q9 + aese $dat1,q9 + aese $dat2,q9 + vld1.32 {q9},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aesmc $dat2,$dat2 + b.gt .Loop3x_ctr32 + + aese $dat0,q8 + aese $dat1,q8 + aese $dat2,q8 + mov $key_,$key + aesmc $tmp0,$dat0 + vld1.8 {$in0},[$inp],#16 + aesmc $tmp1,$dat1 + aesmc $dat2,$dat2 + vorr $dat0,$ivec,$ivec + aese $tmp0,q9 + vld1.8 {$in1},[$inp],#16 + aese $tmp1,q9 + aese $dat2,q9 + vorr $dat1,$ivec,$ivec + aesmc $tmp0,$tmp0 + vld1.8 {$in2},[$inp],#16 + aesmc $tmp1,$tmp1 + aesmc $tmp2,$dat2 + vorr $dat2,$ivec,$ivec + add $tctr0,$ctr,#1 + aese $tmp0,q12 + aese $tmp1,q12 + aese $tmp2,q12 + veor $in0,$in0,$rndlast + add $tctr1,$ctr,#2 + aesmc $tmp0,$tmp0 + aesmc $tmp1,$tmp1 + aesmc $tmp2,$tmp2 + veor $in1,$in1,$rndlast + add $ctr,$ctr,#3 + aese $tmp0,q13 + aese $tmp1,q13 + aese $tmp2,q13 + veor $in2,$in2,$rndlast + rev $tctr0,$tctr0 + aesmc $tmp0,$tmp0 + vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] + aesmc $tmp1,$tmp1 + aesmc $tmp2,$tmp2 + vmov.32 ${dat0}[3], $tctr0 + rev $tctr1,$tctr1 + aese $tmp0,q14 + aese $tmp1,q14 + aese $tmp2,q14 + vmov.32 ${dat1}[3], $tctr1 + rev $tctr2,$ctr + aesmc $tmp0,$tmp0 + aesmc $tmp1,$tmp1 + aesmc $tmp2,$tmp2 + vmov.32 ${dat2}[3], $tctr2 + subs $len,$len,#3 + aese $tmp0,q15 + aese $tmp1,q15 + aese $tmp2,q15 + + mov $cnt,$rounds + veor $in0,$in0,$tmp0 + veor $in1,$in1,$tmp1 + veor $in2,$in2,$tmp2 + vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] + vst1.8 {$in0},[$out],#16 + vst1.8 {$in1},[$out],#16 + vst1.8 {$in2},[$out],#16 + b.hs .Loop3x_ctr32 + + adds $len,$len,#3 + b.eq .Lctr32_done + cmp $len,#1 + mov $step,#16 + cclr $step,eq + +.Lctr32_tail: + aese $dat0,q8 + aese $dat1,q8 + vld1.32 {q8},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + subs $cnt,$cnt,#2 + aese $dat0,q9 + aese $dat1,q9 + vld1.32 {q9},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + b.gt .Lctr32_tail + + aese $dat0,q8 + aese $dat1,q8 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q9 + aese $dat1,q9 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + vld1.8 {$in0},[$inp],$step + aese $dat0,q12 + aese $dat1,q12 + vld1.8 {$in1},[$inp] + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q13 + aese $dat1,q13 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q14 + aese $dat1,q14 + veor $in0,$in0,$rndlast + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + veor $in1,$in1,$rndlast + aese $dat0,q15 + aese $dat1,q15 + + cmp $len,#1 + veor $in0,$in0,$dat0 + veor $in1,$in1,$dat1 + vst1.8 {$in0},[$out],#16 + b.eq .Lctr32_done + vst1.8 {$in1},[$out] + +.Lctr32_done: +___ +$code.=<<___ if ($flavour !~ /64/); + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r10,pc} +___ +$code.=<<___ if ($flavour =~ /64/); + ldr x29,[sp],#16 + ret +___ +$code.=<<___; +.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks +___ +}}} +$code.=<<___; +#endif +___ +######################################## +if ($flavour =~ /64/) { ######## 64-bit code + my %opcode = ( + "aesd" => 0x4e285800, "aese" => 0x4e284800, + "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 ); + + local *unaes = sub { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5), + $mnemonic,$arg; + }; + + foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers + s/@\s/\/\//o; # old->new style commentary + + #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or + s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or + s/mov\.([a-z]+)\s+([wx][0-9]+),\s*([wx][0-9]+)/csel $2,$3,$2,$1/o or + s/vmov\.i8/movi/o or # fix up legacy mnemonics + s/vext\.8/ext/o or + s/vrev32\.8/rev32/o or + s/vtst\.8/cmtst/o or + s/vshr/ushr/o or + s/^(\s+)v/$1/o or # strip off v prefix + s/\bbx\s+lr\b/ret/o; + + # fix up remainig legacy suffixes + s/\.[ui]?8//o; + m/\],#8/o and s/\.16b/\.8b/go; + s/\.[ui]?32//o and s/\.16b/\.4s/go; + s/\.[ui]?64//o and s/\.16b/\.2d/go; + s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; + + print $_,"\n"; + } +} else { ######## 32-bit code + my %opcode = ( + "aesd" => 0xf3b00340, "aese" => 0xf3b00300, + "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 ); + + local *unaes = sub { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<1) |(($2&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + }; + + sub unvtbl { + my $arg=shift; + + $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o && + sprintf "vtbl.8 d%d,{q%d},d%d\n\t". + "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; + } + + sub unvdup32 { + my $arg=shift; + + $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && + sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; + } + + sub unvmov32 { + my $arg=shift; + + $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o && + sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3; + } + + foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers + s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers + s/\/\/\s?/@ /o; # new->old style commentary + + # fix up remainig new-style suffixes + s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or + s/\],#[0-9]+/]!/o; + + s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or + s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or + s/vtbl\.8\s+(.*)/unvtbl($1)/geo or + s/vdup\.32\s+(.*)/unvdup32($1)/geo or + s/vmov\.32\s+(.*)/unvmov32($1)/geo or + s/^(\s+)b\./$1b/o or + s/^(\s+)mov\./$1mov/o or + s/^(\s+)ret/$1bx\tlr/o; + + print $_,"\n"; + } +} + +close STDOUT; diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl new file mode 100644 index 0000000000..bfec664198 --- /dev/null +++ b/crypto/arm64cpuid.pl @@ -0,0 +1,68 @@ +#!/usr/bin/env perl + +$flavour = shift; +$output = shift; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +$code.=<<___; +#include "arm_arch.h" + +.text +.arch armv8-a+crypto + +.align 5 +.globl _armv7_neon_probe +.type _armv7_neon_probe,%function +_armv7_neon_probe: + orr v15.16b, v15.16b, v15.16b + ret +.size _armv7_neon_probe,.-_armv7_neon_probe + +.globl _armv7_tick +.type _armv7_tick,%function +_armv7_tick: +#ifdef __APPLE__ + mrs x0, CNTPCT_EL0 +#else + mrs x0, CNTVCT_EL0 +#endif + ret +.size _armv7_tick,.-_armv7_tick + +.globl _armv8_aes_probe +.type _armv8_aes_probe,%function +_armv8_aes_probe: + aese v0.16b, v0.16b + ret +.size _armv8_aes_probe,.-_armv8_aes_probe + +.globl _armv8_sha1_probe +.type _armv8_sha1_probe,%function +_armv8_sha1_probe: + sha1h s0, s0 + ret +.size _armv8_sha1_probe,.-_armv8_sha1_probe + +.globl _armv8_sha256_probe +.type _armv8_sha256_probe,%function +_armv8_sha256_probe: + sha256su0 v0.4s, v0.4s + ret +.size _armv8_sha256_probe,.-_armv8_sha256_probe +.globl _armv8_pmull_probe +.type _armv8_pmull_probe,%function +_armv8_pmull_probe: + pmull v0.1q, v0.1d, v0.1d + ret +.size _armv8_pmull_probe,.-_armv8_pmull_probe +___ + +print $code; +close STDOUT; diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile index 811969304f..f4930c6bd8 100644 --- a/crypto/modes/Makefile +++ b/crypto/modes/Makefile @@ -56,11 +56,14 @@ ghash-alpha.s: asm/ghash-alpha.pl $(PERL) $< | $(CC) -E - | tee $@ > /dev/null ghash-parisc.s: asm/ghash-parisc.pl $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ +ghashv8-armx.S: asm/ghashv8-armx.pl + $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@ # GNU make "catch all" ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ ghash-armv4.o: ghash-armv4.S +ghashv8-armx.o: ghashv8-armx.S files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl new file mode 100644 index 0000000000..300e8d56cb --- /dev/null +++ b/crypto/modes/asm/ghashv8-armx.pl @@ -0,0 +1,376 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication. +# +# June 2014 +# +# Initial version was developed in tight cooperation with Ard +# Biesheuvel from bits-n-pieces from +# other assembly modules. Just like aesv8-armx.pl this module +# supports both AArch32 and AArch64 execution modes. +# +# July 2014 +# +# Implement 2x aggregated reduction [see ghash-x86.pl for background +# information]. +# +# Current performance in cycles per processed byte: +# +# PMULL[2] 32-bit NEON(*) +# Apple A7 0.92 5.62 +# Cortex-A53 1.01 8.39 +# Cortex-A57 1.17 7.61 +# +# (*) presented for reference/comparison purposes; + +$flavour = shift; +$output = shift; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +$Xi="x0"; # argument block +$Htbl="x1"; +$inp="x2"; +$len="x3"; + +$inc="x12"; + +{ +my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); +my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14)); + +$code=<<___; +#include "arm_arch.h" + +.text +___ +$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); +$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); + +$code.=<<___; +.global gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + vld1.64 {$t1},[x1] @ load H + vmov.i8 $xC2,#0xe1 + vshl.i64 $xC2,$xC2,#57 @ 0xc2.0 + vext.8 $IN,$t1,$t1,#8 + vshr.u64 $t2,$xC2,#63 + vdup.32 $t1,${t1}[1] + vext.8 $t0,$t2,$xC2,#8 @ t0=0xc2....01 + vshr.u64 $t2,$IN,#63 + vshr.s32 $t1,$t1,#31 @ broadcast carry bit + vand $t2,$t2,$t0 + vshl.i64 $IN,$IN,#1 + vext.8 $t2,$t2,$t2,#8 + vand $t0,$t0,$t1 + vorr $IN,$IN,$t2 @ H<<<=1 + veor $H,$IN,$t0 @ twisted H + vst1.64 {$H},[x0],#16 + + @ calculate H^2 + vext.8 $t0,$H,$H,#8 @ Karatsuba pre-processing + vpmull.p64 $Xl,$H,$H + veor $t0,$t0,$H + vpmull2.p64 $Xh,$H,$H + vpmull.p64 $Xm,$t0,$t0 + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase + + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $H2,$Xl,$t2 + + vext.8 $t1,$H2,$H2,#8 @ Karatsuba pre-processing + veor $t1,$t1,$H2 + vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed + vst1.64 {$Hhl-$H2},[x0] + + ret +.size gcm_init_v8,.-gcm_init_v8 + +.global gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + vld1.64 {$t1},[$Xi] @ load Xi + vmov.i8 $xC2,#0xe1 + vld1.64 {$H-$Hhl},[$Htbl] @ load twisted H, ... + vshl.u64 $xC2,$xC2,#57 +#ifndef __ARMEB__ + vrev64.8 $t1,$t1 +#endif + vext.8 $IN,$t1,$t1,#8 + + vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo + veor $t1,$t1,$IN @ Karatsuba pre-processing + vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi + vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase + + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl +#endif + vext.8 $Xl,$Xl,$Xl,#8 + vst1.64 {$Xl},[$Xi] @ write out Xi + + ret +.size gcm_gmult_v8,.-gcm_gmult_v8 + +.global gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: +___ +$code.=<<___ if ($flavour !~ /64/); + vstmdb sp!,{d8-d15} +___ +$code.=<<___; + vld1.64 {$Xl},[$Xi] @ load [rotated] Xi + subs $len,$len,#32 + vmov.i8 $xC2,#0xe1 + mov $inc,#16 + vld1.64 {$H-$Hhl},[$Htbl],#32 @ load twisted H, ..., H^2 + vld1.64 {$H2},[$Htbl] + cclr $inc,eq + vext.8 $Xl,$Xl,$Xl,#8 + vld1.64 {$t0},[$inp],#16 @ load [rotated] I[0] + vshl.u64 $xC2,$xC2,#57 @ 0xc2.0 +#ifndef __ARMEB__ + vrev64.8 $t0,$t0 + vrev64.8 $Xl,$Xl +#endif + vext.8 $IN,$t0,$t0,#8 + b.lo .Lodd_tail_v8 +___ +{ my ($Xln,$Xmn,$Xhn,$In) = map("q$_",(4..7)); + ####### + # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = + # [(H*Ii+1) + (H*Xi+1)] mod P = + # [(H*Ii+1) + H^2*(Ii+Xi)] mod P + # +$code.=<<___; + vld1.64 {$t1},[$inp],$inc @ load [rotated] I[1] +#ifndef __ARMEB__ + vrev64.8 $t1,$t1 +#endif + vext.8 $In,$t1,$t1,#8 + veor $IN,$IN,$Xl @ I[i]^=Xi + vpmull.p64 $Xln,$H,$In @ H·Ii+1 + veor $t1,$t1,$In @ Karatsuba pre-processing + vpmull2.p64 $Xhn,$H,$In + b .Loop_mod2x_v8 + +.align 4 +.Loop_mod2x_v8: + vext.8 $t2,$IN,$IN,#8 + subs $len,$len,#32 + vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo + cclr $inc,lo + + vpmull.p64 $Xmn,$Hhl,$t1 + veor $t2,$t2,$IN @ Karatsuba pre-processing + vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi + veor $Xl,$Xl,$Xln @ accumulate + vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + vld1.64 {$t0},[$inp],$inc @ load [rotated] I[i] + + veor $Xh,$Xh,$Xhn + cclr $inc,eq + veor $Xm,$Xm,$Xmn + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$t1},[$inp],$inc @ load [rotated] I[i+1] +#ifndef __ARMEB__ + vrev64.8 $t0,$t0 +#endif + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase + +#ifndef __ARMEB__ + vrev64.8 $t1,$t1 +#endif + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vext.8 $In,$t1,$t1,#8 + vext.8 $IN,$t0,$t0,#8 + veor $Xl,$Xm,$t2 + vpmull.p64 $Xln,$H,$In @ H·Ii+1 + veor $IN,$IN,$Xh @ accumulate $IN early + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase + vpmull.p64 $Xl,$Xl,$xC2 + veor $IN,$IN,$t2 + veor $t1,$t1,$In @ Karatsuba pre-processing + veor $IN,$IN,$Xl + vpmull2.p64 $Xhn,$H,$In + b.hs .Loop_mod2x_v8 + + veor $Xh,$Xh,$t2 + vext.8 $IN,$t0,$t0,#8 @ re-construct $IN + adds $len,$len,#32 + veor $Xl,$Xl,$Xh @ re-construct $Xl + b.eq .Ldone_v8 +___ +} +$code.=<<___; +.Lodd_tail_v8: + vext.8 $t2,$Xl,$Xl,#8 + veor $IN,$IN,$Xl @ inp^=Xi + veor $t1,$t0,$t2 @ $t1 is rotated inp^Xi + + vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo + veor $t1,$t1,$IN @ Karatsuba pre-processing + vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi + vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase + + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + +.Ldone_v8: +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl +#endif + vext.8 $Xl,$Xl,$Xl,#8 + vst1.64 {$Xl},[$Xi] @ write out Xi + +___ +$code.=<<___ if ($flavour !~ /64/); + vldmia sp!,{d8-d15} +___ +$code.=<<___; + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +___ +} +$code.=<<___; +.asciz "GHASH for ARMv8, CRYPTOGAMS by " +.align 2 +___ + +if ($flavour =~ /64/) { ######## 64-bit code + sub unvmov { + my $arg=shift; + + $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && + sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1; + } + foreach(split("\n",$code)) { + s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or + s/vmov\.i8/movi/o or # fix up legacy mnemonics + s/vmov\s+(.*)/unvmov($1)/geo or + s/vext\.8/ext/o or + s/vshr\.s/sshr\.s/o or + s/vshr/ushr/o or + s/^(\s+)v/$1/o or # strip off v prefix + s/\bbx\s+lr\b/ret/o; + + s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers + s/@\s/\/\//o; # old->new style commentary + + # fix up remainig legacy suffixes + s/\.[ui]?8(\s)/$1/o; + s/\.[uis]?32//o and s/\.16b/\.4s/go; + m/\.p64/o and s/\.16b/\.1q/o; # 1st pmull argument + m/l\.p64/o and s/\.16b/\.1d/go; # 2nd and 3rd pmull arguments + s/\.[uisp]?64//o and s/\.16b/\.2d/go; + s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; + + print $_,"\n"; + } +} else { ######## 32-bit code + sub unvdup32 { + my $arg=shift; + + $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && + sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; + } + sub unvpmullp64 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) { + my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + $word |= 0x00010001 if ($mnemonic =~ "2"); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } + + foreach(split("\n",$code)) { + s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers + s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers + s/\/\/\s?/@ /o; # new->old style commentary + + # fix up remainig new-style suffixes + s/\],#[0-9]+/]!/o; + + s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or + s/vdup\.32\s+(.*)/unvdup32($1)/geo or + s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo or + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/^(\s+)b\./$1b/o or + s/^(\s+)ret/$1bx\tlr/o; + + print $_,"\n"; + } +} + +close STDOUT; # enforce flush diff --git a/crypto/perlasm/arm-xlate.pl b/crypto/perlasm/arm-xlate.pl new file mode 100644 index 0000000000..22dc7e4ecc --- /dev/null +++ b/crypto/perlasm/arm-xlate.pl @@ -0,0 +1,165 @@ +#!/usr/bin/env perl + +# ARM assembler distiller by . + +my $flavour = shift; +my $output = shift; +open STDOUT,">$output" || die "can't open $output: $!"; + +$flavour = "linux32" if (!$flavour or $flavour eq "void"); + +my %GLOBALS; +my $dotinlocallabels=($flavour=~/linux/)?1:0; + +################################################################ +# directives which need special treatment on different platforms +################################################################ +my $arch = sub { + if ($flavour =~ /linux/) { ".arch\t".join(',',@_); } + else { ""; } +}; +my $fpu = sub { + if ($flavour =~ /linux/) { ".fpu\t".join(',',@_); } + else { ""; } +}; +my $hidden = sub { + if ($flavour =~ /ios/) { ".private_extern\t".join(',',@_); } + else { ".hidden\t".join(',',@_); } +}; +my $comm = sub { + my @args = split(/,\s*/,shift); + my $name = @args[0]; + my $global = \$GLOBALS{$name}; + my $ret; + + if ($flavour =~ /ios32/) { + $ret = ".comm\t_$name,@args[1]\n"; + $ret .= ".non_lazy_symbol_pointer\n"; + $ret .= "$name:\n"; + $ret .= ".indirect_symbol\t_$name\n"; + $ret .= ".long\t0"; + $name = "_$name"; + } else { $ret = ".comm\t".join(',',@args); } + + $$global = $name; + $ret; +}; +my $globl = sub { + my $name = shift; + my $global = \$GLOBALS{$name}; + my $ret; + + SWITCH: for ($flavour) { + /ios/ && do { $name = "_$name"; + last; + }; + } + + $ret = ".globl $name" if (!$ret); + $$global = $name; + $ret; +}; +my $global = $globl; +my $extern = sub { + &$globl(@_); + return; # return nothing +}; +my $type = sub { + if ($flavour =~ /linux/) { ".type\t".join(',',@_); } + else { ""; } +}; +my $size = sub { + if ($flavour =~ /linux/) { ".size\t".join(',',@_); } + else { ""; } +}; +my $inst = sub { + if ($flavour =~ /linux/) { ".inst\t".join(',',@_); } + else { ".long\t".join(',',@_); } +}; +my $asciz = sub { + my $line = join(",",@_); + if ($line =~ /^"(.*)"$/) + { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } + else + { ""; } +}; + +sub range { + my ($r,$sfx,$start,$end) = @_; + + join(",",map("$r$_$sfx",($start..$end))); +} + +sub expand_line { + my $line = shift; + my @ret = (); + + pos($line)=0; + + while ($line =~ m/\G[^@\/\{\"]*/g) { + if ($line =~ m/\G(@|\/\/|$)/gc) { + last; + } + elsif ($line =~ m/\G\{/gc) { + my $saved_pos = pos($line); + $line =~ s/\G([rdqv])([0-9]+)([^\-]*)\-\1([0-9]+)\3/range($1,$3,$2,$4)/e; + pos($line) = $saved_pos; + $line =~ m/\G[^\}]*\}/g; + } + elsif ($line =~ m/\G\"/gc) { + $line =~ m/\G[^\"]*\"/g; + } + } + + $line =~ s/\b(\w+)/$GLOBALS{$1} or $1/ge; + + return $line; +} + +while($line=<>) { + + if ($line =~ m/^\s*(#|@|\/\/)/) { print $line; next; } + + $line =~ s|/\*.*\*/||; # get rid of C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning... + $line =~ s|\s+$||; # ... and at the end + + { + $line =~ s|[\b\.]L(\w{2,})|L$1|g; # common denominator for Locallabel + $line =~ s|\bL(\w{2,})|\.L$1|g if ($dotinlocallabels); + } + + { + $line =~ s|(^[\.\w]+)\:\s*||; + my $label = $1; + if ($label) { + printf "%s:",($GLOBALS{$label} or $label); + } + } + + if ($line !~ m/^[#@]/) { + $line =~ s|^\s*(\.?)(\S+)\s*||; + my $c = $1; $c = "\t" if ($c eq ""); + my $mnemonic = $2; + my $opcode; + if ($mnemonic =~ m/([^\.]+)\.([^\.]+)/) { + $opcode = eval("\$$1_$2"); + } else { + $opcode = eval("\$$mnemonic"); + } + + my $arg=expand_line($line); + + if (ref($opcode) eq 'CODE') { + $line = &$opcode($arg); + } elsif ($mnemonic) { + $line = $c.$mnemonic; + $line.= "\t$arg" if ($arg); + } + } + + print $line if ($line); + print "\n"; +} + +close STDOUT; diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile index b1582f2cff..63e11711d9 100644 --- a/crypto/sha/Makefile +++ b/crypto/sha/Makefile @@ -90,6 +90,9 @@ sha512-%.S: asm/sha512-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ sha1-armv4-large.o: sha1-armv4-large.S sha256-armv4.o: sha256-armv4.S sha512-armv4.o: sha512-armv4.S +sha1-armv8.o: sha1-armv8.S +sha256-armv8.o: sha256-armv8.S +sha512-armv8.o: sha512-armv8.S files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO diff --git a/crypto/sha/asm/sha1-armv8.pl b/crypto/sha/asm/sha1-armv8.pl new file mode 100644 index 0000000000..6be8624342 --- /dev/null +++ b/crypto/sha/asm/sha1-armv8.pl @@ -0,0 +1,343 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA1 for ARMv8. +# +# Performance in cycles per processed byte and improvement coefficient +# over code generated with "default" compiler: +# +# hardware-assisted software(*) +# Apple A7 2.31 4.13 (+14%) +# Cortex-A53 2.19 8.73 (+108%) +# Cortex-A57 2.35 7.88 (+74%) +# +# (*) Software results are presented mostly for reference purposes. + +$flavour = shift; +$output = shift; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +($ctx,$inp,$num)=("x0","x1","x2"); +@Xw=map("w$_",(3..17,19)); +@Xx=map("x$_",(3..17,19)); +@V=($A,$B,$C,$D,$E)=map("w$_",(20..24)); +($t0,$t1,$t2,$K)=map("w$_",(25..28)); + + +sub BODY_00_19 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=($i+2)&15; + +$code.=<<___ if ($i<15 && !($i&1)); + lsr @Xx[$i+1],@Xx[$i],#32 +___ +$code.=<<___ if ($i<14 && !($i&1)); + ldr @Xx[$i+2],[$inp,#`($i+2)*4-64`] +___ +$code.=<<___ if ($i<14 && ($i&1)); +#ifdef __ARMEB__ + ror @Xx[$i+1],@Xx[$i+1],#32 +#else + rev32 @Xx[$i+1],@Xx[$i+1] +#endif +___ +$code.=<<___ if ($i<14); + bic $t0,$d,$b + and $t1,$c,$b + ror $t2,$a,#27 + add $d,$d,$K // future e+=K + orr $t0,$t0,$t1 + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) +___ +$code.=<<___ if ($i==19); + movz $K,#0xeba1 + movk $K,#0x6ed9,lsl#16 +___ +$code.=<<___ if ($i>=14); + eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] + bic $t0,$d,$b + and $t1,$c,$b + ror $t2,$a,#27 + eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] + add $d,$d,$K // future e+=K + orr $t0,$t0,$t1 + add $e,$e,$t2 // e+=rot(a,5) + eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] + ror $b,$b,#2 + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) + ror @Xw[$j],@Xw[$j],#31 +___ +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=($i+2)&15; + +$code.=<<___ if ($i==59); + movz $K,#0xc1d6 + movk $K,#0xca62,lsl#16 +___ +$code.=<<___; + orr $t0,$b,$c + and $t1,$b,$c + eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] + ror $t2,$a,#27 + and $t0,$t0,$d + add $d,$d,$K // future e+=K + eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] + add $e,$e,$t2 // e+=rot(a,5) + orr $t0,$t0,$t1 + ror $b,$b,#2 + eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) + ror @Xw[$j],@Xw[$j],#31 +___ +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=($i+2)&15; + +$code.=<<___ if ($i==39); + movz $K,#0xbcdc + movk $K,#0x8f1b,lsl#16 +___ +$code.=<<___ if ($i<78); + eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] + eor $t0,$d,$b + ror $t2,$a,#27 + add $d,$d,$K // future e+=K + eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] + eor $t0,$t0,$c + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) + ror @Xw[$j],@Xw[$j],#31 +___ +$code.=<<___ if ($i==78); + ldp @Xw[1],@Xw[2],[$ctx] + eor $t0,$d,$b + ror $t2,$a,#27 + add $d,$d,$K // future e+=K + eor $t0,$t0,$c + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) +___ +$code.=<<___ if ($i==79); + ldp @Xw[3],@Xw[4],[$ctx,#8] + eor $t0,$d,$b + ror $t2,$a,#27 + eor $t0,$t0,$c + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + ldr @Xw[5],[$ctx,#16] + add $e,$e,$t0 // e+=F(b,c,d) +___ +} + +$code.=<<___; +#include "arm_arch.h" + +.text + +.extern OPENSSL_armcap_P +.globl sha1_block_data_order +.type sha1_block_data_order,%function +.align 6 +sha1_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA1 + b.ne .Lv8_entry + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp $A,$B,[$ctx] + ldp $C,$D,[$ctx,#8] + ldr $E,[$ctx,#16] + +.Loop: + ldr @Xx[0],[$inp],#64 + movz $K,#0x7999 + sub $num,$num,#1 + movk $K,#0x5a82,lsl#16 +#ifdef __ARMEB__ + ror $Xx[0],@Xx[0],#32 +#else + rev32 @Xx[0],@Xx[0] +#endif + add $E,$E,$K // warm it up + add $E,$E,@Xw[0] +___ +for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } +for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } +for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + add $B,$B,@Xw[2] + add $C,$C,@Xw[3] + add $A,$A,@Xw[1] + add $D,$D,@Xw[4] + add $E,$E,@Xw[5] + stp $A,$B,[$ctx] + stp $C,$D,[$ctx,#8] + str $E,[$ctx,#16] + cbnz $num,.Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret +.size sha1_block_data_order,.-sha1_block_data_order +___ +{{{ +my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3)); +my @MSG=map("v$_.16b",(4..7)); +my @Kxx=map("v$_.4s",(16..19)); +my ($W0,$W1)=("v20.4s","v21.4s"); +my $ABCD_SAVE="v22.16b"; + +$code.=<<___; +.type sha1_block_armv8,%function +.align 6 +sha1_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adr x4,.Lconst + eor $E,$E,$E + ld1.32 {$ABCD},[$ctx],#16 + ld1.32 {$E}[0],[$ctx] + sub $ctx,$ctx,#16 + ld1.32 {@Kxx[0]-@Kxx[3]},[x4] + +.Loop_hw: + ld1 {@MSG[0]-@MSG[3]},[$inp],#64 + sub $num,$num,#1 + rev32 @MSG[0],@MSG[0] + rev32 @MSG[1],@MSG[1] + + add.i32 $W0,@Kxx[0],@MSG[0] + rev32 @MSG[2],@MSG[2] + orr $ABCD_SAVE,$ABCD,$ABCD // offload + + add.i32 $W1,@Kxx[0],@MSG[1] + rev32 @MSG[3],@MSG[3] + sha1h $E1,$ABCD + sha1c $ABCD,$E,$W0 // 0 + add.i32 $W0,@Kxx[$j],@MSG[2] + sha1su0 @MSG[0],@MSG[1],@MSG[2] +___ +for ($j=0,$i=1;$i<20-3;$i++) { +my $f=("c","p","m","p")[$i/5]; +$code.=<<___; + sha1h $E0,$ABCD // $i + sha1$f $ABCD,$E1,$W1 + add.i32 $W1,@Kxx[$j],@MSG[3] + sha1su1 @MSG[0],@MSG[3] +___ +$code.=<<___ if ($i<20-4); + sha1su0 @MSG[1],@MSG[2],@MSG[3] +___ + ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0); + push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0); +} +$code.=<<___; + sha1h $E0,$ABCD // $i + sha1p $ABCD,$E1,$W1 + add.i32 $W1,@Kxx[$j],@MSG[3] + + sha1h $E1,$ABCD // 18 + sha1p $ABCD,$E0,$W0 + + sha1h $E0,$ABCD // 19 + sha1p $ABCD,$E1,$W1 + + add.i32 $E,$E,$E0 + add.i32 $ABCD,$ABCD,$ABCD_SAVE + + cbnz $num,.Loop_hw + + st1.32 {$ABCD},[$ctx],#16 + st1.32 {$E}[0],[$ctx] + + ldr x29,[sp],#16 + ret +.size sha1_block_armv8,.-sha1_block_armv8 +.align 6 +.Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.LOPENSSL_armcap_P: +.quad OPENSSL_armcap_P-. +.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 +___ +}}} + +{ my %opcode = ( + "sha1c" => 0x5e000000, "sha1p" => 0x5e001000, + "sha1m" => 0x5e002000, "sha1su0" => 0x5e003000, + "sha1h" => 0x5e280800, "sha1su1" => 0x5e281800 ); + + sub unsha1 { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o + && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5)|($3<<16), + $mnemonic,$arg; + } +} + +foreach(split("\n",$code)) { + + s/\`([^\`]*)\`/eval($1)/geo; + + s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo; + + s/\.\w?32\b//o and s/\.16b/\.4s/go; + m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go; + + print $_,"\n"; +} + +close STDOUT; diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl new file mode 100644 index 0000000000..45eb719fe5 --- /dev/null +++ b/crypto/sha/asm/sha512-armv8.pl @@ -0,0 +1,428 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA256/512 for ARMv8. +# +# Performance in cycles per processed byte and improvement coefficient +# over code generated with "default" compiler: +# +# SHA256-hw SHA256(*) SHA512 +# Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +# Cortex-A53 2.38 15.6 (+110%) 10.1 (+190%(***)) +# Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +# +# (*) Software SHA256 results are of lesser relevance, presented +# mostly for informational purposes. +# (**) The result is a trade-off: it's possible to improve it by +# 10% (or by 1 cycle per round), but at the cost of 20% loss +# on Cortex-A53 (or by 4 cycles per round). +# (***) Super-impressive coefficients over gcc-generated code are +# indication of some compiler "pathology", most notably code +# generated with -mgeneral-regs-only is significanty faster +# and lags behind assembly only by 50-90%. + +$flavour=shift; +$output=shift; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +if ($output =~ /512/) { + $BITS=512; + $SZ=8; + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); + @sigma0=(1, 8, 7); + @sigma1=(19,61, 6); + $rounds=80; + $reg_t="x"; +} else { + $BITS=256; + $SZ=4; + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); + @sigma0=( 7,18, 3); + @sigma1=(17,19,10); + $rounds=64; + $reg_t="w"; +} + +$func="sha${BITS}_block_data_order"; + +($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30)); + +@X=map("$reg_t$_",(3..15,0..2)); +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27)); +($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28)); + +sub BODY_00_xx { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my $j=($i+1)&15; +my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]); + $T0=@X[$i+3] if ($i<11); + +$code.=<<___ if ($i<16); +#ifndef __ARMEB__ + rev @X[$i],@X[$i] // $i +#endif +___ +$code.=<<___ if ($i<13 && ($i&1)); + ldp @X[$i+1],@X[$i+2],[$inp],#2*$SZ +___ +$code.=<<___ if ($i==13); + ldp @X[14],@X[15],[$inp] +___ +$code.=<<___ if ($i>=14); + ldr @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`] +___ +$code.=<<___ if ($i>0 && $i<16); + add $a,$a,$t1 // h+=Sigma0(a) +___ +$code.=<<___ if ($i>=11); + str @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`] +___ +# While ARMv8 specifies merged rotate-n-logical operation such as +# 'eor x,y,z,ror#n', it was found to negatively affect performance +# on Apple A7. The reason seems to be that it requires even 'y' to +# be available earlier. This means that such merged instruction is +# not necessarily best choice on critical path... On the other hand +# Cortex-A5x handles merged instructions much better than disjoint +# rotate and logical... See (**) footnote above. +$code.=<<___ if ($i<15); + ror $t0,$e,#$Sigma1[0] + add $h,$h,$t2 // h+=K[i] + eor $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]` + and $t1,$f,$e + bic $t2,$g,$e + add $h,$h,@X[$i&15] // h+=X[i] + orr $t1,$t1,$t2 // Ch(e,f,g) + eor $t2,$a,$b // a^b, b^c in next round + eor $t0,$t0,$T0,ror#$Sigma1[1] // Sigma1(e) + ror $T0,$a,#$Sigma0[0] + add $h,$h,$t1 // h+=Ch(e,f,g) + eor $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]` + add $h,$h,$t0 // h+=Sigma1(e) + and $t3,$t3,$t2 // (b^c)&=(a^b) + add $d,$d,$h // d+=h + eor $t3,$t3,$b // Maj(a,b,c) + eor $t1,$T0,$t1,ror#$Sigma0[1] // Sigma0(a) + add $h,$h,$t3 // h+=Maj(a,b,c) + ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round + //add $h,$h,$t1 // h+=Sigma0(a) +___ +$code.=<<___ if ($i>=15); + ror $t0,$e,#$Sigma1[0] + add $h,$h,$t2 // h+=K[i] + ror $T1,@X[($j+1)&15],#$sigma0[0] + and $t1,$f,$e + ror $T2,@X[($j+14)&15],#$sigma1[0] + bic $t2,$g,$e + ror $T0,$a,#$Sigma0[0] + add $h,$h,@X[$i&15] // h+=X[i] + eor $t0,$t0,$e,ror#$Sigma1[1] + eor $T1,$T1,@X[($j+1)&15],ror#$sigma0[1] + orr $t1,$t1,$t2 // Ch(e,f,g) + eor $t2,$a,$b // a^b, b^c in next round + eor $t0,$t0,$e,ror#$Sigma1[2] // Sigma1(e) + eor $T0,$T0,$a,ror#$Sigma0[1] + add $h,$h,$t1 // h+=Ch(e,f,g) + and $t3,$t3,$t2 // (b^c)&=(a^b) + eor $T2,$T2,@X[($j+14)&15],ror#$sigma1[1] + eor $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2] // sigma0(X[i+1]) + add $h,$h,$t0 // h+=Sigma1(e) + eor $t3,$t3,$b // Maj(a,b,c) + eor $t1,$T0,$a,ror#$Sigma0[2] // Sigma0(a) + eor $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2] // sigma1(X[i+14]) + add @X[$j],@X[$j],@X[($j+9)&15] + add $d,$d,$h // d+=h + add $h,$h,$t3 // h+=Maj(a,b,c) + ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round + add @X[$j],@X[$j],$T1 + add $h,$h,$t1 // h+=Sigma0(a) + add @X[$j],@X[$j],$T2 +___ + ($t2,$t3)=($t3,$t2); +} + +$code.=<<___; +#include "arm_arch.h" + +.text + +.extern OPENSSL_armcap_P +.globl $func +.type $func,%function +.align 6 +$func: +___ +$code.=<<___ if ($SZ==4); + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry +___ +$code.=<<___; + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*$SZ + + ldp $A,$B,[$ctx] // load context + ldp $C,$D,[$ctx,#2*$SZ] + ldp $E,$F,[$ctx,#4*$SZ] + add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input + ldp $G,$H,[$ctx,#6*$SZ] + adr $Ktbl,.LK$BITS + stp $ctx,$num,[x29,#96] + +.Loop: + ldp @X[0],@X[1],[$inp],#2*$SZ + ldr $t2,[$Ktbl],#$SZ // *K++ + eor $t3,$B,$C // magic seed + str $inp,[x29,#112] +___ +for ($i=0;$i<16;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } +$code.=".Loop_16_xx:\n"; +for (;$i<32;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + cbnz $t2,.Loop_16_xx + + ldp $ctx,$num,[x29,#96] + ldr $inp,[x29,#112] + sub $Ktbl,$Ktbl,#`$SZ*($rounds+1)` // rewind + + ldp @X[0],@X[1],[$ctx] + ldp @X[2],@X[3],[$ctx,#2*$SZ] + add $inp,$inp,#14*$SZ // advance input pointer + ldp @X[4],@X[5],[$ctx,#4*$SZ] + add $A,$A,@X[0] + ldp @X[6],@X[7],[$ctx,#6*$SZ] + add $B,$B,@X[1] + add $C,$C,@X[2] + add $D,$D,@X[3] + stp $A,$B,[$ctx] + add $E,$E,@X[4] + add $F,$F,@X[5] + stp $C,$D,[$ctx,#2*$SZ] + add $G,$G,@X[6] + add $H,$H,@X[7] + cmp $inp,$num + stp $E,$F,[$ctx,#4*$SZ] + stp $G,$H,[$ctx,#6*$SZ] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*$SZ + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size $func,.-$func + +.align 6 +.type .LK$BITS,%object +.LK$BITS: +___ +$code.=<<___ if ($SZ==8); + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator +___ +$code.=<<___ if ($SZ==4); + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0 //terminator +___ +$code.=<<___; +.size .LK$BITS,.-.LK$BITS +.align 3 +.LOPENSSL_armcap_P: + .quad OPENSSL_armcap_P-. +.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by " +.align 2 +___ + +if ($SZ==4) { +my $Ktbl="x3"; + +my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2)); +my @MSG=map("v$_.16b",(4..7)); +my ($W0,$W1)=("v16.4s","v17.4s"); +my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b"); + +$code.=<<___; +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1.32 {$ABCD,$EFGH},[$ctx] + adr $Ktbl,.LK256 + +.Loop_hw: + ld1 {@MSG[0]-@MSG[3]},[$inp],#64 + sub $num,$num,#1 + ld1.32 {$W0},[$Ktbl],#16 + rev32 @MSG[0],@MSG[0] + rev32 @MSG[1],@MSG[1] + rev32 @MSG[2],@MSG[2] + rev32 @MSG[3],@MSG[3] + orr $ABCD_SAVE,$ABCD,$ABCD // offload + orr $EFGH_SAVE,$EFGH,$EFGH +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + ld1.32 {$W1},[$Ktbl],#16 + add.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + ld1.32 {$W1},[$Ktbl],#16 + add.i32 $W0,$W0,@MSG[0] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + ld1.32 {$W0},[$Ktbl],#16 + add.i32 $W1,$W1,@MSG[1] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + ld1.32 {$W1},[$Ktbl] + add.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#$rounds*$SZ-16 // rewind + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + add.i32 $W1,$W1,@MSG[3] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + add.i32 $ABCD,$ABCD,$ABCD_SAVE + add.i32 $EFGH,$EFGH,$EFGH_SAVE + + cbnz $num,.Loop_hw + + st1.32 {$ABCD,$EFGH},[$ctx] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +___ +} + +$code.=<<___; +.comm OPENSSL_armcap_P,4,4 +___ + +{ my %opcode = ( + "sha256h" => 0x5e004000, "sha256h2" => 0x5e005000, + "sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o + && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5)|($3<<16), + $mnemonic,$arg; + } +} + +foreach(split("\n",$code)) { + + s/\`([^\`]*)\`/eval($1)/geo; + + s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo; + + s/\.\w?32\b//o and s/\.16b/\.4s/go; + m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go; + + print $_,"\n"; +} + +close STDOUT; From 3b3114770a2ab3df863701b2b8518b5c01fa0cae Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 11 May 2015 11:34:56 +0200 Subject: [PATCH 106/120] Engage ARMv8 assembly pack. Reviewed-by: Dr. Stephen Henson (cherry picked from commit 083ed53defb42ab4d3488bc7f80d9170d22293e7) --- crypto/arm_arch.h | 17 ++++++- crypto/armcap.c | 26 ++++++++++ crypto/evp/e_aes.c | 113 ++++++++++++++++++++++++++++++++++++++++++ crypto/modes/gcm128.c | 27 ++++++++-- 4 files changed, 177 insertions(+), 6 deletions(-) diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h index a50c366976..7a377758eb 100644 --- a/crypto/arm_arch.h +++ b/crypto/arm_arch.h @@ -10,13 +10,22 @@ # define __ARMEL__ # endif # elif defined(__GNUC__) +# if defined(__aarch64__) +# define __ARM_ARCH__ 8 +# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ +# define __ARMEB__ +# else +# define __ARMEL__ +# endif /* * Why doesn't gcc define __ARM_ARCH__? Instead it defines * bunch of below macros. See all_architectires[] table in * gcc/config/arm/arm.c. On a side note it defines * __ARMEL__/__ARMEB__ for little-/big-endian. */ -# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ +# elif defined(__ARM_ARCH_8A__) +# define __ARM_ARCH__ 8 +# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \ defined(__ARM_ARCH_7EM__) # define __ARM_ARCH__ 7 @@ -42,10 +51,14 @@ #if !__ASSEMBLER__ extern unsigned int OPENSSL_armcap_P; +#endif #define ARMV7_NEON (1<<0) #define ARMV7_TICK (1<<1) -#endif +#define ARMV8_AES (1<<2) +#define ARMV8_SHA1 (1<<3) +#define ARMV8_SHA256 (1<<4) +#define ARMV8_PMULL (1<<5) #endif #endif diff --git a/crypto/armcap.c b/crypto/armcap.c index 5258d2fbdd..2579389ffd 100644 --- a/crypto/armcap.c +++ b/crypto/armcap.c @@ -20,6 +20,10 @@ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } */ void _armv7_neon_probe(void); unsigned int _armv7_tick(void); +void _armv8_aes_probe(void); +void _armv8_sha1_probe(void); +void _armv8_sha256_probe(void); +void _armv8_pmull_probe(void); unsigned int OPENSSL_rdtsc(void) { @@ -68,6 +72,28 @@ void OPENSSL_cpuid_setup(void) { _armv7_neon_probe(); OPENSSL_armcap_P |= ARMV7_NEON; +#ifdef __aarch64__ + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv8_pmull_probe(); + OPENSSL_armcap_P |= ARMV8_PMULL|ARMV8_AES; + } + else if (sigsetjmp(ill_jmp,1) == 0) + { + _armv8_aes_probe(); + OPENSSL_armcap_P |= ARMV8_AES; + } + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv8_sha1_probe(); + OPENSSL_armcap_P |= ARMV8_SHA1; + } + if (sigsetjmp(ill_jmp,1) == 0) + { + _armv8_sha256_probe(); + OPENSSL_armcap_P |= ARMV8_SHA256; + } +#endif } if (sigsetjmp(ill_jmp,1) == 0) { diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index 56e5fffc7b..9a2de166c5 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -471,6 +471,35 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return &aes_##keylen##_##mode; } #endif +#if defined(OPENSSL_CPUID_OBJ) && defined(__aarch64__) +#include "arm_arch.h" +#if __ARM_ARCH__>=7 +# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES) +# define HWAES_set_encrypt_key aes_v8_set_encrypt_key +# define HWAES_set_decrypt_key aes_v8_set_decrypt_key +# define HWAES_encrypt aes_v8_encrypt +# define HWAES_decrypt aes_v8_decrypt +# define HWAES_cbc_encrypt aes_v8_cbc_encrypt +# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks +#endif +#endif + +#if defined(HWAES_CAPABLE) +int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +void HWAES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void HWAES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, + unsigned char *ivec, const int enc); +void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, const unsigned char ivec[16]); +#endif + #define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ @@ -489,6 +518,19 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, mode = ctx->cipher->flags & EVP_CIPH_MODE; if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + ret = HWAES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); + dat->block = (block128_f)HWAES_decrypt; + dat->stream.cbc = NULL; +#ifdef HWAES_cbc_encrypt + if (mode==EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt; +#endif + } + else +#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE) { @@ -517,6 +559,26 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, NULL; } else +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + ret = HWAES_set_encrypt_key(key,ctx->key_len*8,&dat->ks); + dat->block = (block128_f)HWAES_encrypt; + dat->stream.cbc = NULL; +#ifdef HWAES_cbc_encrypt + if (mode==EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f)HWAES_cbc_encrypt; + else +#endif +#ifdef HWAES_ctr32_encrypt_blocks + if (mode==EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks; + else +#endif + (void)0; /* terminate potentially open 'else' */ + } + else +#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE) { @@ -831,6 +893,21 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) { do { +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + HWAES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); + CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, + (block128_f)HWAES_encrypt); +#ifdef HWAES_ctr32_encrypt_blocks + gctx->ctr = (ctr128_f)HWAES_ctr32_encrypt_blocks; +#else + gctx->ctr = NULL; +#endif + break; + } + else +#endif #ifdef BSAES_CAPABLE if (BSAES_CAPABLE) { @@ -1088,6 +1165,29 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, { xctx->stream = NULL; /* key_len is two AES keys */ +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + if (enc) + { + HWAES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + xctx->xts.block1 = (block128_f)HWAES_encrypt; + } + else + { + HWAES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + xctx->xts.block1 = (block128_f)HWAES_decrypt; + } + + HWAES_set_encrypt_key(key + ctx->key_len/2, + ctx->key_len * 4, &xctx->ks2); + xctx->xts.block2 = (block128_f)HWAES_encrypt; + + xctx->xts.key1 = &xctx->ks1; + break; + } + else +#endif #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { @@ -1244,6 +1344,19 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) do { +#ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) + { + HWAES_set_encrypt_key(key,ctx->key_len*8,&cctx->ks); + + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, + &cctx->ks, (block128_f)HWAES_encrypt); + cctx->str = NULL; + cctx->key_set = 1; + break; + } + else +#endif #ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index a52ffb1d22..2b4df392e7 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -645,7 +645,7 @@ static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) #endif -#if TABLE_BITS==4 && defined(GHASH_ASM) +#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) # if !defined(I386_ONLY) && \ (defined(__i386) || defined(__i386__) || \ defined(__x86_64) || defined(__x86_64__) || \ @@ -666,13 +666,22 @@ void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif -# elif defined(__arm__) || defined(__arm) +# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) # include "arm_arch.h" # if __ARM_ARCH__>=7 # define GHASH_ASM_ARM # define GCM_FUNCREF_4BIT +# if defined(__aarch64__) +# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) +# endif +# if defined(__arm__) || defined(__arm) +# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) +# endif void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +void gcm_init_v8(u128 Htable[16],const u64 Xi[2]); +void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif # elif defined(_TMS320C6400_PLUS) # define GHASH_ASM_C64Xplus @@ -740,10 +749,20 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) ctx->ghash = gcm_ghash_4bit; # endif # elif defined(GHASH_ASM_ARM) - if (OPENSSL_armcap_P & ARMV7_NEON) { +# ifdef PMULL_CAPABLE + if (PMULL_CAPABLE) { + gcm_init_v8(ctx->Htable,ctx->H.u); + ctx->gmult = gcm_gmult_v8; + ctx->ghash = gcm_ghash_v8; + } else +# endif +# ifdef NEON_CAPABLE + if (NEON_CAPABLE) { ctx->gmult = gcm_gmult_neon; ctx->ghash = gcm_ghash_neon; - } else { + } else +# endif + { gcm_init_4bit(ctx->Htable,ctx->H.u); ctx->gmult = gcm_gmult_4bit; ctx->ghash = gcm_ghash_4bit; From 728b53058ee6f89fa95c0ed3feaa410a85db7323 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 11 May 2015 11:36:48 +0200 Subject: [PATCH 107/120] Configure: engage ARMv8 assembly pack in ios64-cross target. Reviewed-by: Dr. Stephen Henson (cherry picked from commit c6d109051d1c2b9a453427a2a53ad3d40acc9276) Resolved Conflicts: Configure --- Configure | 7 ++++--- fips/fipssyms.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/Configure b/Configure index 34856e2d9c..2785677a62 100755 --- a/Configure +++ b/Configure @@ -136,6 +136,7 @@ my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::: my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:"; my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void"; +my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o:::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o:"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; @@ -596,7 +597,7 @@ my %table=( # CROSS_SDK=iPhoneOS7.0.sdk # "iphoneos-cross","cc:-O3 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", -"ios64-cross","cc:-O3 -arch arm64 -mios-version-min=7.0.0 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR -RC4_CHUNK DES_INT DES_UNROLL -BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"ios64-cross","cc:-O3 -arch arm64 -mios-version-min=7.0.0 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR -RC4_CHUNK DES_INT DES_UNROLL -BF_PTR:${aarch64_asm}:ios64:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", ##### A/UX "aux3-gcc","gcc:-O2 -DTERMIO::(unknown):AUX:-lbsd:RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:::", @@ -1575,7 +1576,7 @@ if ($rmd160_obj =~ /\.o$/) } if ($aes_obj =~ /\.o$/) { - $cflags.=" -DAES_ASM"; + $cflags.=" -DAES_ASM" if ($aes_obj =~ m/\baes\-/); # aes_ctr.o is not a real file, only indication that assembler # module implements AES_ctr32_encrypt... $cflags.=" -DAES_CTR_ASM" if ($aes_obj =~ s/\s*aes_ctr\.o//); @@ -1596,7 +1597,7 @@ else { $wp_obj="wp_block.o"; } $cmll_obj=$cmll_enc unless ($cmll_obj =~ /.o$/); -if ($modes_obj =~ /ghash/) +if ($modes_obj =~ /ghash\-/) { $cflags.=" -DGHASH_ASM"; } diff --git a/fips/fipssyms.h b/fips/fipssyms.h index 5719aeac2a..f5d04d073c 100644 --- a/fips/fipssyms.h +++ b/fips/fipssyms.h @@ -668,6 +668,40 @@ #define bn_mul_mont_gather5 fips_bn_mul_mont_gather5 #define bn_scatter5 fips_bn_scatter5 #define bn_gather5 fips_bn_gather5 +#define _armv8_aes_probe _fips_armv8_aes_probe +#define _armv8_pmull_probe _fips_armv8_pmull_probe +#define _armv8_sha1_probe _fips_armv8_sha1_probe +#define _armv8_sha256_probe _fips_armv8_sha256_probe +#define aes_v8_encrypt fips_aes_v8_encrypt +#define aes_v8_decrypt fips_aes_v8_decrypt +#define aes_v8_set_encrypt_key fips_aes_v8_set_encrypt_key +#define aes_v8_set_decrypt_key fips_aes_v8_set_decrypt_key +#define aes_v8_cbc_encrypt fips_aes_v8_cbc_encrypt +#define aes_v8_ctr32_encrypt_blocks fips_aes_v8_ctr32_encrypt_blocks +#define gcm_init_v8 fips_gcm_init_v8 +#define gcm_gmult_v8 fips_gcm_gmult_v8 +#define gcm_ghash_v8 fips_gcm_ghash_v8 +#if defined(__APPLE__) && __ASSEMBLER__ +#define _OPENSSL_armcap_P _fips_openssl_armcap_P +#define __armv7_neon_probe __fips_armv7_neon_probe +#define __armv7_tick __fips_armv7_tick +#define __armv8_aes_probe __fips_armv8_aes_probe +#define __armv8_pmull_probe __fips_armv8_pmull_probe +#define __armv8_sha1_probe __fips_armv8_sha1_probe +#define __armv8_sha256_probe __fips_armv8_sha256_probe +#define _aes_v8_encrypt _fips_aes_v8_encrypt +#define _aes_v8_decrypt _fips_aes_v8_decrypt +#define _aes_v8_set_encrypt_key _fips_aes_v8_set_encrypt_key +#define _aes_v8_set_decrypt_key _fips_aes_v8_set_decrypt_key +#define _aes_v8_cbc_encrypt _fips_aes_v8_cbc_encrypt +#define _aes_v8_ctr32_encrypt_blocks _fips_aes_v8_ctr32_encrypt_blocks +#define _gcm_init_v8 _fips_gcm_init_v8 +#define _gcm_gmult_v8 _fips_gcm_gmult_v8 +#define _gcm_ghash_v8 _fips_gcm_ghash_v8 +#define _sha1_block_data_order _fips_sha1_block_data_order +#define _sha256_block_data_order _fips_sha256_block_data_order +#define _sha512_block_data_order _fips_sha512_block_data_order +#endif #if defined(_MSC_VER) # pragma const_seg("fipsro$b") From bb98f6bef66dc423a3736cc9c5e5602933f58c64 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 11 May 2015 11:43:55 +0200 Subject: [PATCH 108/120] Adapt ARM assembly pack for iOS. This is achieved by filtering perlasm output through arm-xlate.pl. But note that it's done only if "flavour" argument is not 'void'. As 'void' is default value for other ARM targets, permasm output is not actually filtered on previously validated platforms. Reviewed-by: Dr. Stephen Henson (cherry picked from commit 874faf2ffb22187ad5483d9691a3a2eb7112f161) --- crypto/aes/asm/aes-armv4.pl | 31 ++++++++++++++++++++++++++-- crypto/bn/asm/armv4-gf2m.pl | 23 +++++++++++++++++++-- crypto/bn/asm/armv4-mont.pl | 16 +++++++++++++-- crypto/modes/asm/ghash-armv4.pl | 33 ++++++++++++++++++++++-------- crypto/sha/asm/sha1-armv4-large.pl | 16 +++++++++++++-- crypto/sha/asm/sha256-armv4.pl | 16 +++++++++++++-- crypto/sha/asm/sha512-armv4.pl | 22 +++++++++++++++++--- 7 files changed, 136 insertions(+), 21 deletions(-) diff --git a/crypto/aes/asm/aes-armv4.pl b/crypto/aes/asm/aes-armv4.pl index 55b6e04b67..ed5125827b 100644 --- a/crypto/aes/asm/aes-armv4.pl +++ b/crypto/aes/asm/aes-armv4.pl @@ -32,8 +32,20 @@ # Profiler-assisted and platform-specific optimization resulted in 16% # improvement on Cortex A8 core and ~21.5 cycles per byte. -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $s0="r0"; $s1="r1"; @@ -171,7 +183,12 @@ AES_encrypt: stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 +#ifdef __APPLE__ + mov $tbl,#AES_encrypt-AES_Te + sub $tbl,r3,$tbl @ Te +#else sub $tbl,r3,#AES_encrypt-AES_Te @ Te +#endif #if __ARM_ARCH__<7 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral ldrb $t1,[$rounds,#2] @ manner... @@ -425,7 +442,12 @@ AES_set_encrypt_key: bne .Labrt .Lok: stmdb sp!,{r4-r12,lr} +#ifdef __APPLE__ + mov $tbl,#AES_set_encrypt_key-AES_Te-1024 + sub $tbl,r3,$tbl @ Te4 +#else sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4 +#endif mov $rounds,r0 @ inp mov lr,r1 @ bits @@ -886,7 +908,12 @@ AES_decrypt: stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 +#ifdef __APPLE__ + mov $tbl,#AES_decrypt-AES_Td + sub $tbl,r3,$tbl @ Td +#else sub $tbl,r3,#AES_decrypt-AES_Td @ Td +#endif #if __ARM_ARCH__<7 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral ldrb $t1,[$rounds,#2] @ manner... diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl index c52e0b75b5..737659f0db 100644 --- a/crypto/bn/asm/armv4-gf2m.pl +++ b/crypto/bn/asm/armv4-gf2m.pl @@ -21,8 +21,20 @@ # runs in even less cycles, ~30, improvement is measurable only on # longer keys. One has to optimize code elsewhere to get NEON glow... -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } @@ -170,11 +182,18 @@ bn_GF2m_mul_2x2: #if __ARM_ARCH__>=7 ldr r12,.LOPENSSL_armcap .Lpic: ldr r12,[pc,r12] +#ifdef __APPLE__ + ldr r12,[r12] +#endif tst r12,#1 beq .Lialu veor $A1,$A1 +#ifdef __APPLE__ + vmov $B1,r3,r3 @ two copies of b1 +#else vmov.32 $B1,r3,r3 @ two copies of b1 +#endif vmov.32 ${A1}[0],r1 @ a1 veor $A0,$A0 diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl index f78a8b5f0f..aa00f38c2f 100644 --- a/crypto/bn/asm/armv4-mont.pl +++ b/crypto/bn/asm/armv4-mont.pl @@ -23,8 +23,20 @@ # than 1/2KB. Windows CE port would be trivial, as it's exclusively # about decorations, ABI and instruction syntax are identical. -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $num="r0"; # starts as num argument, but holds &tp[num-1] $ap="r1"; diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl index d91586ee29..3799b2b559 100644 --- a/crypto/modes/asm/ghash-armv4.pl +++ b/crypto/modes/asm/ghash-armv4.pl @@ -57,8 +57,20 @@ # *native* byte order on current platform. See gcm128.c for working # example... -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $Xi="r0"; # argument block $Htbl="r1"; @@ -112,6 +124,11 @@ $code=<<___; .text .code 32 +#ifdef __APPLE__ +#define ldrplb ldrbpl +#define ldrneb ldrbne +#endif + .type rem_4bit,%object .align 5 rem_4bit: @@ -326,9 +343,9 @@ $code.=<<___; .align 4 gcm_gmult_neon: sub $Htbl,#16 @ point at H in GCM128_CTX - vld1.64 `&Dhi("$IN")`,[$Xi,:64]!@ load Xi + vld1.64 `&Dhi("$IN")`,[$Xi]! @ load Xi vmov.i32 $mod,#0xe1 @ our irreducible polynomial - vld1.64 `&Dlo("$IN")`,[$Xi,:64]! + vld1.64 `&Dlo("$IN")`,[$Xi]! vshr.u64 $mod,#32 vldmia $Htbl,{$Hhi-$Hlo} @ load H veor $zero,$zero @@ -349,9 +366,9 @@ gcm_gmult_neon: .type gcm_ghash_neon,%function .align 4 gcm_ghash_neon: - vld1.64 `&Dhi("$Z")`,[$Xi,:64]! @ load Xi + vld1.64 `&Dhi("$Z")`,[$Xi]! @ load Xi vmov.i32 $mod,#0xe1 @ our irreducible polynomial - vld1.64 `&Dlo("$Z")`,[$Xi,:64]! + vld1.64 `&Dlo("$Z")`,[$Xi]! vshr.u64 $mod,#32 vldmia $Xi,{$Hhi-$Hlo} @ load H veor $zero,$zero @@ -410,8 +427,8 @@ gcm_ghash_neon: vrev64.8 $Z,$Z #endif sub $Xi,#16 - vst1.64 `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi - vst1.64 `&Dlo("$Z")`,[$Xi,:64] + vst1.64 `&Dhi("$Z")`,[$Xi]! @ write out Xi + vst1.64 `&Dlo("$Z")`,[$Xi] bx lr .size gcm_ghash_neon,.-gcm_ghash_neon diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl index 33da3e0e3c..6c0adb9911 100644 --- a/crypto/sha/asm/sha1-armv4-large.pl +++ b/crypto/sha/asm/sha1-armv4-large.pl @@ -52,8 +52,20 @@ # Profiler-assisted and platform-specific optimization resulted in 10% # improvement on Cortex A8 core and 12.2 cycles per byte. -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $ctx="r0"; $inp="r1"; diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl index 9c84e8d93c..252a583d06 100644 --- a/crypto/sha/asm/sha256-armv4.pl +++ b/crypto/sha/asm/sha256-armv4.pl @@ -23,8 +23,20 @@ # Profiler-assisted and platform-specific optimization resulted in 16% # improvement on Cortex A8 core and ~17 cycles per processed byte. -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $ctx="r0"; $t0="r0"; $inp="r1"; $t3="r1"; diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl index 7faf37b147..c032afdbca 100644 --- a/crypto/sha/asm/sha512-armv4.pl +++ b/crypto/sha/asm/sha512-armv4.pl @@ -38,8 +38,20 @@ $hi="HI"; $lo="LO"; # ==================================================================== -while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -open STDOUT,">$output"; +$flavour = shift; +if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} $ctx="r0"; # parameter block $inp="r1"; @@ -221,17 +233,21 @@ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 .LOPENSSL_armcap: -.word OPENSSL_armcap_P-sha512_block_data_order +.word OPENSSL_armcap_P-.Lsha512_block_data_order .skip 32-4 .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: +.Lsha512_block_data_order: sub r3,pc,#8 @ sha512_block_data_order add $len,$inp,$len,lsl#7 @ len to point at the end of inp #if __ARM_ARCH__>=7 ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P +#ifdef __APPLE__ + ldr r12,[r12] +#endif tst r12,#1 bne .LNEON #endif From 80b1e89bbc901a8f4a5e11945e367e0e0def11ec Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 11 May 2015 11:50:29 +0200 Subject: [PATCH 109/120] Add iOS-specific armv4cpud.S module. Normally it would be generated from a perlasm module, but doing so would affect existing armv4cpuid.S, which in turn would formally void previously validated platforms. Hense separate module is generated. Reviewed-by: Dr. Stephen Henson (cherry picked from commit 5837e90f08ffcf5ad84933793bc285630018ce26) --- crypto/armv4cpuid_ios.S | 210 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 crypto/armv4cpuid_ios.S diff --git a/crypto/armv4cpuid_ios.S b/crypto/armv4cpuid_ios.S new file mode 100644 index 0000000000..cce9a7902b --- /dev/null +++ b/crypto/armv4cpuid_ios.S @@ -0,0 +1,210 @@ +#include "arm_arch.h" + +.text +.code 32 + +.align 5 +.globl _OPENSSL_atomic_add + +_OPENSSL_atomic_add: +#if __ARM_ARCH__>=6 +Ladd: ldrex r2,[r0] + add r3,r2,r1 + strex r2,r3,[r0] + cmp r2,#0 + bne Ladd + mov r0,r3 + bx lr +#else + stmdb sp!,{r4,r5,r6,lr} + ldr r2,Lspinlock + adr r3,Lspinlock + mov r4,r0 + mov r5,r1 + add r6,r3,r2 @ &spinlock + b .+8 +Lspin: bl sched_yield + mov r0,#-1 + swp r0,r0,[r6] + cmp r0,#0 + bne Lspin + + ldr r2,[r4] + add r2,r2,r5 + str r2,[r4] + str r0,[r6] @ release spinlock + ldmia sp!,{r4,r5,r6,lr} + tst lr,#1 + moveq pc,lr +.word 0xe12fff1e @ bx lr +#endif + + +.globl _OPENSSL_cleanse + +_OPENSSL_cleanse: + eor ip,ip,ip + cmp r1,#7 + subhs r1,r1,#4 + bhs Lot + cmp r1,#0 + beq Lcleanse_done +Little: + strb ip,[r0],#1 + subs r1,r1,#1 + bhi Little + b Lcleanse_done + +Lot: tst r0,#3 + beq Laligned + strb ip,[r0],#1 + sub r1,r1,#1 + b Lot +Laligned: + str ip,[r0],#4 + subs r1,r1,#4 + bhs Laligned + adds r1,r1,#4 + bne Little +Lcleanse_done: +#if __ARM_ARCH__>=5 + bx lr +#else + tst lr,#1 + moveq pc,lr +.word 0xe12fff1e @ bx lr +#endif + + + + +.align 5 +.globl __armv7_neon_probe + +__armv7_neon_probe: + vorr q0,q0,q0 + bx lr + + +.globl __armv7_tick + +__armv7_tick: +#ifdef __APPLE__ + mrrc p15,0,r0,r1,c14 @ CNTPCT +#else + mrrc p15,1,r0,r1,c14 @ CNTVCT +#endif + bx lr + + +.globl __armv8_aes_probe + +__armv8_aes_probe: +.byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0 + bx lr + + +.globl __armv8_sha1_probe + +__armv8_sha1_probe: +.byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0 + bx lr + + +.globl __armv8_sha256_probe + +__armv8_sha256_probe: +.byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0 + bx lr + +.globl __armv8_pmull_probe + +__armv8_pmull_probe: +.byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0 + bx lr + +.globl _OPENSSL_wipe_cpu + +_OPENSSL_wipe_cpu: + ldr r0,LOPENSSL_armcap + adr r1,LOPENSSL_armcap + ldr r0,[r1,r0] +#ifdef __APPLE__ + ldr r0,[r0] +#endif + eor r2,r2,r2 + eor r3,r3,r3 + eor ip,ip,ip + tst r0,#1 + beq Lwipe_done + veor q0, q0, q0 + veor q1, q1, q1 + veor q2, q2, q2 + veor q3, q3, q3 + veor q8, q8, q8 + veor q9, q9, q9 + veor q10, q10, q10 + veor q11, q11, q11 + veor q12, q12, q12 + veor q13, q13, q13 + veor q14, q14, q14 + veor q15, q15, q15 +Lwipe_done: + mov r0,sp +#if __ARM_ARCH__>=5 + bx lr +#else + tst lr,#1 + moveq pc,lr +.word 0xe12fff1e @ bx lr +#endif + + +.globl _OPENSSL_instrument_bus + +_OPENSSL_instrument_bus: + eor r0,r0,r0 +#if __ARM_ARCH__>=5 + bx lr +#else + tst lr,#1 + moveq pc,lr +.word 0xe12fff1e @ bx lr +#endif + + +.globl _OPENSSL_instrument_bus2 + +_OPENSSL_instrument_bus2: + eor r0,r0,r0 +#if __ARM_ARCH__>=5 + bx lr +#else + tst lr,#1 + moveq pc,lr +.word 0xe12fff1e @ bx lr +#endif + + +.align 5 +LOPENSSL_armcap: +.word OPENSSL_armcap_P-. +#if __ARM_ARCH__>=6 +.align 5 +#else +Lspinlock: +.word atomic_add_spinlock-Lspinlock +.align 5 + +.data +.align 2 +atomic_add_spinlock: +.word +#endif + +.comm _OPENSSL_armcap_P,4 +.non_lazy_symbol_pointer +OPENSSL_armcap_P: +.indirect_symbol _OPENSSL_armcap_P +.long 0 +.private_extern _OPENSSL_armcap_P From f447329da7bf1e95691c8019af3e846002ba554d Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 11 May 2015 11:53:41 +0200 Subject: [PATCH 110/120] Configure: add ios-cross target with ARM assembly support. Reviewed-by: Dr. Stephen Henson (cherry picked from commit 97fbb0c88c2f601f98e25e57b9f6f9679d14f3a8) Resolved conflicts: Configure config --- Configure | 1 + config | 4 +++- fips/fips_test_suite.c | 6 ++++++ fips/fipssyms.h | 10 ++++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Configure b/Configure index 2785677a62..65e06f330a 100755 --- a/Configure +++ b/Configure @@ -597,6 +597,7 @@ my %table=( # CROSS_SDK=iPhoneOS7.0.sdk # "iphoneos-cross","cc:-O3 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"ios-cross","cc:-O3 -arch armv7 -mios-version-min=7.0.0 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:armcap.o armv4cpuid_ios.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::ios32:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "ios64-cross","cc:-O3 -arch arm64 -mios-version-min=7.0.0 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR -RC4_CHUNK DES_INT DES_UNROLL -BF_PTR:${aarch64_asm}:ios64:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", ##### A/UX diff --git a/config b/config index fc78a30685..12f903048a 100755 --- a/config +++ b/config @@ -576,7 +576,9 @@ case "$GUESSOS" in *-*-iphoneos) options="$options -arch%20${MACHINE}" OUT="iphoneos-cross" ;; - arm64-*-iphoneos|*-*-ios64) + armv7-*-ios) + OUT="ios-cross" ;; + arm64-*-ios*) OUT="ios64-cross" ;; alpha-*-linux2) ISA=`awk '/cpu model/{print$4;exit(0);}' /proc/cpuinfo` diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index c7054db742..7813d25ee0 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -1611,6 +1611,12 @@ int main(int argc, char **argv) FIPS_post_set_callback(post_cb); +#if defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) + extern unsigned int OPENSSL_armcap_P; + if (0 == OPENSSL_armcap_P) + fprintf(stderr, "Optimizations disabled\n"); +#endif + printf("\tFIPS-mode test application\n"); printf("\t%s\n\n", FIPS_module_version_text()); diff --git a/fips/fipssyms.h b/fips/fipssyms.h index f5d04d073c..76db619cec 100644 --- a/fips/fipssyms.h +++ b/fips/fipssyms.h @@ -701,6 +701,16 @@ #define _sha1_block_data_order _fips_sha1_block_data_order #define _sha256_block_data_order _fips_sha256_block_data_order #define _sha512_block_data_order _fips_sha512_block_data_order +#define _AES_decrypt _fips_aes_decrypt +#define _AES_encrypt _fips_aes_encrypt +#define _AES_set_decrypt_key _fips_aes_set_decrypt_key +#define _AES_set_encrypt_key _fips_aes_set_encrypt_key +#define _gcm_gmult_4bit _fips_gcm_gmult_4bit +#define _gcm_ghash_4bit _fips_gcm_ghash_4bit +#define _gcm_gmult_neon _fips_gcm_gmult_neon +#define _gcm_ghash_neon _fips_gcm_ghash_neon +#define _bn_GF2m_mul_2x2 _fips_bn_GF2m_mul_2x2 +#define _OPENSSL_cleanse _FIPS_openssl_cleanse #endif #if defined(_MSC_VER) From 292c1f34ec27f95fdc3c9dfa5c2df93dde7861e0 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 11 May 2015 11:56:30 +0200 Subject: [PATCH 111/120] Additional vxWorks target. Reviewed-by: Dr. Stephen Henson (cherry picked from commit 50e2a0ea4615124aa159e8f43317dedcf0cfcaa2) --- Configure | 1 + config | 5 ++++ test/fips_algvs.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/Configure b/Configure index 65e06f330a..38119577be 100755 --- a/Configure +++ b/Configure @@ -615,6 +615,7 @@ my %table=( ##### VxWorks for various targets "vxworks-ppc60x","ccppc:-D_REENTRANT -mrtp -mhard-float -mstrict-align -fno-implicit-fp -DPPC32_fp60x -O2 -fstrength-reduce -fno-builtin -fno-strict-aliasing -Wall -DCPU=PPC32 -DTOOL_FAMILY=gnu -DTOOL=gnu -I\$(WIND_BASE)/target/usr/h -I\$(WIND_BASE)/target/usr/h/wrn/coreip:::VXWORKS:-Wl,--defsym,__wrs_rtp_base=0xe0000000 -L \$(WIND_BASE)/target/usr/lib/ppc/PPC32/common:::::", "vxworks-ppcgen","ccppc:-D_REENTRANT -mrtp -msoft-float -mstrict-align -O1 -fno-builtin -fno-strict-aliasing -Wall -DCPU=PPC32 -DTOOL_FAMILY=gnu -DTOOL=gnu -I\$(WIND_BASE)/target/usr/h -I\$(WIND_BASE)/target/usr/h/wrn/coreip:::VXWORKS:-Wl,--defsym,__wrs_rtp_base=0xe0000000 -L \$(WIND_BASE)/target/usr/lib/ppc/PPC32/sfcommon:::::", +"vxworks-ppcgen-kernel","ccppc:-D_REENTRANT -msoft-float -mstrict-align -O1 -fno-builtin -fno-strict-aliasing -Wall -DCPU=PPC32 -DTOOL_FAMILY=gnu -DTOOL=gnu -I\$(WIND_BASE)/target/h -I\$(WIND_BASE)/target/h/wrn/coreip:::VXWORKS::::::", "vxworks-ppc405","ccppc:-g -msoft-float -mlongcall -DCPU=PPC405 -I\$(WIND_BASE)/target/h:::VXWORKS:-r:::::", "vxworks-ppc750","ccppc:-ansi -nostdinc -DPPC750 -D_REENTRANT -fvolatile -fno-builtin -fno-for-scope -fsigned-char -Wall -msoft-float -mlongcall -DCPU=PPC604 -I\$(WIND_BASE)/target/h \$(DEBUG_FLAG):::VXWORKS:-r:::::", "vxworks-ppc750-debug","ccppc:-ansi -nostdinc -DPPC750 -D_REENTRANT -fvolatile -fno-builtin -fno-for-scope -fsigned-char -Wall -msoft-float -mlongcall -DCPU=PPC604 -I\$(WIND_BASE)/target/h -DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DPEDANTIC -DDEBUG_SAFESTACK -DDEBUG -g:::VXWORKS:-r:::::", diff --git a/config b/config index 12f903048a..5c8053946b 100755 --- a/config +++ b/config @@ -375,6 +375,10 @@ case "${SYSTEM}:${RELEASE}:${VERSION}:${MACHINE}" in echo "nsr-tandem-nsk"; exit 0; ;; + vxworks:kernel*) + echo "${MACHINE}-kernel-vxworks"; exit 0; + ;; + vxworks*) echo "${MACHINE}-whatever-vxworks"; exit 0; ;; @@ -605,6 +609,7 @@ case "$GUESSOS" in ;; ppc-*-linux2) OUT="linux-ppc" ;; ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;; + ppcgen-kernel-vxworks*) OUT="vxworks-ppcgen-kernel" ;; ppcgen-*-vxworks*) OUT="vxworks-ppcgen" ;; pentium-*-vxworks*) OUT="vxworks-pentium" ;; simlinux-*-vxworks*) OUT="vxworks-simlinux" ;; diff --git a/test/fips_algvs.c b/test/fips_algvs.c index ed0350720a..1b463ea039 100644 --- a/test/fips_algvs.c +++ b/test/fips_algvs.c @@ -70,6 +70,67 @@ int main(int argc, char **argv) } #else +#if defined(__vxworks) + +#include +#include + +int fips_algvs_main(int argc, char **argv); +#define main fips_algvs_main + +static int fips_algvs_argv(char *a0) +{ + char *argv[32] = { "fips_algvs" }; + int argc = 1; + int main_ret; + + if (a0) { + char *scan = a0, *arg = a0; + + while (*scan) { + if (*scan++ == ' ') { + scan[-1] = '\0'; + argv[argc++] = arg; + if (argc == (sizeof(argv)/sizeof(argv[0])-1)) + break; + + while (*scan == ' ') scan++; + arg = scan; + } + } + if (*scan == '\0') argv[argc++] = arg; + } + + argv[argc] = NULL; + + main_ret = fips_algvs_main(argc, argv); + + if (a0) free(a0); + + return main_ret; +} + +int fips_algvs(int a0) +{ + return taskSpawn("fips_algvs", 100, (VX_FP_TASK | VX_SPE_TASK), 100000, + (FUNCPTR)fips_algvs_argv, + a0 ? strdup(a0) : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +static FILE *fips_fopen(const char *path, const char *mode) +{ + char fips_path [256]; + + if (path[0] != '/' && strlen(path) < (sizeof(fips_path)-8)) { + strcpy(fips_path,"/fips0/"); + strcat(fips_path,path); + return fopen(fips_path,mode); + } + return fopen(path,mode); +} +#define fopen fips_fopen +#endif + #define FIPS_ALGVS extern int fips_aesavs_main(int argc, char **argv); From 0ae16722876a9e424a1abc2c5285268476d99c35 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 11 May 2015 12:04:12 +0200 Subject: [PATCH 112/120] Add support for Android 5, both 32- and 64-bit cases. Special note about additional -pie flag in android-armv7. The initial reason for adding it is that Android 5 refuses to execute non-PIE binaries. But what about older systems and previously validated platforms? It should be noted that flag is not used when compiling object code, fipscanister.o in this context, only when linking applications, *supplementary* fips_algvs used during validation procedure. Reviewed-by: Dr. Stephen Henson (cherry picked from commit 6db8e3bdc9ef83d83b83f3eec9722c96daa91f82) Resolved conflicts: test/fips_algvs.c --- Configure | 3 ++- config | 1 + fips/fips_canister.c | 1 + fips/fips_test_suite.c | 2 +- test/fips_algvs.c | 10 ++++++++++ 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Configure b/Configure index 38119577be..6bb8f10c2a 100755 --- a/Configure +++ b/Configure @@ -402,7 +402,8 @@ my %table=( # Android: linux-* but without -DTERMIO and pointers to headers and libs. "android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "android-x86","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:".eval{my $asm=${x86_elf_asm};$asm=~s/:elf/:android/;$asm}.":dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"android-armv7","gcc:-march=armv7-a -mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"android-armv7","gcc:-march=armv7-a -mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-pie%-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"android64-aarch64","gcc:-mandroid -fPIC -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -Wall::-D_REENTRANT::-pie%-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### *BSD [do see comment about ${BSDthreads} above!] "BSD-generic32","gcc:-DTERMIOS -O3 -fomit-frame-pointer -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", diff --git a/config b/config index 5c8053946b..4003e2fe52 100755 --- a/config +++ b/config @@ -861,6 +861,7 @@ case "$GUESSOS" in *-*-qnx6) OUT="QNX6" ;; x86-*-android|i?86-*-android) OUT="android-x86" ;; armv[7-9]*-*-android) OUT="android-armv7" ;; + aarch64-*-android) OUT="android64-aarch64" ;; *) OUT=`echo $GUESSOS | awk -F- '{print $3}'`;; esac diff --git a/fips/fips_canister.c b/fips/fips_canister.c index 016d94c976..daf53cb40d 100644 --- a/fips/fips_canister.c +++ b/fips/fips_canister.c @@ -29,6 +29,7 @@ const void *FIPS_text_end(void); #if !defined(FIPS_REF_POINT_IS_CROSS_COMPILER_AWARE) # if (defined(__ANDROID__) && (defined(__arm__) || defined(__arm) || \ + defined(__aarch64__) || \ defined(__i386__)|| defined(__i386))) || \ (defined(__vxworks) && (defined(__ppc__) || defined(__ppc) || \ defined(__mips__)|| defined(__mips))) || \ diff --git a/fips/fips_test_suite.c b/fips/fips_test_suite.c index 7813d25ee0..cd4aafbd12 100644 --- a/fips/fips_test_suite.c +++ b/fips/fips_test_suite.c @@ -1611,7 +1611,7 @@ int main(int argc, char **argv) FIPS_post_set_callback(post_cb); -#if defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) +#if (defined(__arm__) || defined(__aarch64__)) extern unsigned int OPENSSL_armcap_P; if (0 == OPENSSL_armcap_P) fprintf(stderr, "Optimizations disabled\n"); diff --git a/test/fips_algvs.c b/test/fips_algvs.c index 1b463ea039..8ff75dcd2e 100644 --- a/test/fips_algvs.c +++ b/test/fips_algvs.c @@ -326,6 +326,16 @@ int main(int argc, char **argv) SysInit(); #endif +#if (defined(__arm__) || defined(__aarch64__)) + if (*args && !strcmp(*args, "-noaccel")) + { + extern unsigned int OPENSSL_armcap_P; + + OPENSSL_armcap_P=0; + args++; + argc--; + } +#endif if (*args && *args[0] != '-') { rv = run_prg(argc - 1, args); From 8a09500d9cc1bafcbafb4d18c1bf2238bf354171 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 11 May 2015 12:16:01 +0200 Subject: [PATCH 113/120] util/incore update that allows FINGERPRINT_premain-free build. As for complementary fips.c modification. Goal is to ensure that FIPS_signature does not end up in .bss segment, one guaranteed to be zeroed upon program start-up. One would expect explicitly initialized values to end up in .data segment, but it turned out that values explicitly initialized with zeros can end up in .bss. The modification does not affect program flow, because first byte was the only one of significance [to FINGERPRINT_premain]. Reviewed-by: Dr. Stephen Henson (cherry picked from commit 34f39b062c76fbd3082521b26edee7f53afc061d) --- fips/fips.c | 2 +- util/incore | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fips/fips.c b/fips/fips.c index 8c9e187d7b..0269609a7e 100644 --- a/fips/fips.c +++ b/fips/fips.c @@ -151,7 +151,7 @@ extern const unsigned char FIPS_rodata_start[], FIPS_rodata_end[]; #ifdef _TMS320C6X const #endif -unsigned char FIPS_signature [20] = { 0 }; +unsigned char FIPS_signature [20] = { 0, 0xff }; __fips_constseg static const char FIPS_hmac_key[]="etaonrishdlcupfm"; diff --git a/util/incore b/util/incore index e6e6ecfd89..bb765b1966 100755 --- a/util/incore +++ b/util/incore @@ -382,7 +382,7 @@ if (!$legacy_mode) { } $FINGERPRINT_ascii_value - = $exe->Lookup("FINGERPRINT_ascii_value") or die; + = $exe->Lookup("FINGERPRINT_ascii_value"); } if ($FIPS_text_startX && $FIPS_text_endX) { @@ -439,9 +439,12 @@ $fingerprint = FIPS_incore_fingerprint(); if ($legacy_mode) { print unpack("H*",$fingerprint); -} else { +} elsif (defined($FINGERPRINT_ascii_value)) { seek(FD,$FINGERPRINT_ascii_value->{st_offset},0) or die "$!"; print FD unpack("H*",$fingerprint) or die "$!"; +} else { + seek(FD,$FIPS_signature->{st_offset},0) or die "$!"; + print FD $fingerprint or die "$!"; } close (FD); From d8a23532dd3a023b7fa43db7d1dc4433a42363cb Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sat, 13 Apr 2013 20:57:37 +0200 Subject: [PATCH 114/120] crypto/modes/modes_lcl.h: let STRICT_ALIGNMENT be on ARMv7. While ARMv7 in general is capable of unaligned access, not all instructions actually are. And trouble is that compiler doesn't seem to differentiate those capable and incapable of unaligned access. Side effect is that kernel goes into endless loop retrying same instruction triggering unaligned trap. Problem was observed in xts128.c and ccm128.c modules. It's possible to resolve it by using (volatile u32*) casts, but letting STRICT_ALIGNMENT be feels more appropriate. (cherry picked from commit 3bdd80521a81d50ade4214053cd9b293f920a77b) Reviewed-by: Dr. Stephen Henson Reviewed-by: Tim Hudson --- crypto/modes/modes_lcl.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crypto/modes/modes_lcl.h b/crypto/modes/modes_lcl.h index 4dab6a67fe..fa5d3b02f6 100644 --- a/crypto/modes/modes_lcl.h +++ b/crypto/modes/modes_lcl.h @@ -29,10 +29,7 @@ typedef unsigned char u8; #if defined(__i386) || defined(__i386__) || \ defined(__x86_64) || defined(__x86_64__) || \ defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) || \ - ( (defined(__arm__) || defined(__arm)) && \ - (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ - defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)) ) + defined(__s390__) || defined(__s390x__) # undef STRICT_ALIGNMENT #endif From 4577871ca393275ac0436b2b08f1a75661ced314 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Tue, 21 Jun 2016 23:05:16 +0200 Subject: [PATCH 115/120] PowerPC assembly pack: add POWER8 support. Reviewed-by: Dr. Stephen Henson --- Configure | 4 +- crypto/aes/Makefile | 2 + crypto/aes/asm/aes-ppc.pl | 4 +- crypto/aes/asm/aesp8-ppc.pl | 3726 +++++++++++++++++++++++++++++++ crypto/bn/asm/ppc-mont.pl | 10 +- crypto/bn/asm/ppc.pl | 8 +- crypto/bn/asm/ppc64-mont.pl | 12 +- crypto/evp/e_aes.c | 26 + crypto/modes/Makefile | 2 + crypto/modes/asm/ghashp8-ppc.pl | 663 ++++++ crypto/modes/gcm128.c | 18 + crypto/perlasm/ppc-xlate.pl | 101 +- crypto/ppccap.c | 75 +- crypto/ppccpuid.pl | 10 + crypto/sha/Makefile | 2 + crypto/sha/asm/sha1-ppc.pl | 6 +- crypto/sha/asm/sha512-ppc.pl | 10 +- crypto/sha/asm/sha512p8-ppc.pl | 431 ++++ fips/fips_premain.c | 3 + fips/fips_premain.c.sha1 | 2 +- fips/fipssyms.h | 17 + 21 files changed, 5096 insertions(+), 36 deletions(-) create mode 100755 crypto/aes/asm/aesp8-ppc.pl create mode 100755 crypto/modes/asm/ghashp8-ppc.pl create mode 100755 crypto/sha/asm/sha512p8-ppc.pl diff --git a/Configure b/Configure index 6bb8f10c2a..850948ef3b 100755 --- a/Configure +++ b/Configure @@ -139,8 +139,8 @@ my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o:::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o:"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; -my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; -my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; +my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:"; +my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:"; my $no_asm=":::::::::::::::void"; # As for $BSDthreads. Idea is to maintain "collective" set of flags, diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile index 1d9e82aad6..34760d174a 100644 --- a/crypto/aes/Makefile +++ b/crypto/aes/Makefile @@ -71,6 +71,8 @@ aes-sparcv9.s: asm/aes-sparcv9.pl aes-ppc.s: asm/aes-ppc.pl $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ +aesp8-ppc.s: asm/aesp8-ppc.pl + $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@ aes-parisc.s: asm/aes-parisc.pl $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@ diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl index 7c52cbe5f9..58a98232d1 100644 --- a/crypto/aes/asm/aes-ppc.pl +++ b/crypto/aes/asm/aes-ppc.pl @@ -548,7 +548,7 @@ Lenc_loop: xor $s2,$t2,$acc14 xor $s3,$t3,$acc15 addi $key,$key,16 - bdnz- Lenc_loop + bdnz Lenc_loop addi $Tbl2,$Tbl0,2048 nop @@ -982,7 +982,7 @@ Ldec_loop: xor $s2,$t2,$acc14 xor $s3,$t3,$acc15 addi $key,$key,16 - bdnz- Ldec_loop + bdnz Ldec_loop addi $Tbl2,$Tbl0,2048 nop diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl new file mode 100755 index 0000000000..7ef189d249 --- /dev/null +++ b/crypto/aes/asm/aesp8-ppc.pl @@ -0,0 +1,3726 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for AES instructions as per PowerISA +# specification version 2.07, first implemented by POWER8 processor. +# The module is endian-agnostic in sense that it supports both big- +# and little-endian cases. Data alignment in parallelizable modes is +# handled with VSX loads and stores, which implies MSR.VSX flag being +# set. It should also be noted that ISA specification doesn't prohibit +# alignment exceptions for these instructions on page boundaries. +# Initially alignment was handled in pure AltiVec/VMX way [when data +# is aligned programmatically, which in turn guarantees exception- +# free execution], but it turned to hamper performance when vcipher +# instructions are interleaved. It's reckoned that eventual +# misalignment penalties at page boundaries are in average lower +# than additional overhead in pure AltiVec approach. +# +# May 2016 +# +# Add XTS subroutine, 9x on little- and 12x improvement on big-endian +# systems were measured. +# +###################################################################### +# Current large-block performance in cycles per byte processed with +# 128-bit key (less is better). +# +# CBC en-/decrypt CTR XTS +# POWER8[le] 3.96/0.72 0.74 1.1 +# POWER8[be] 3.75/0.65 0.66 1.0 + +$flavour = shift; + +if ($flavour =~ /64/) { + $SIZE_T =8; + $LRSAVE =2*$SIZE_T; + $STU ="stdu"; + $POP ="ld"; + $PUSH ="std"; + $UCMP ="cmpld"; + $SHL ="sldi"; +} elsif ($flavour =~ /32/) { + $SIZE_T =4; + $LRSAVE =$SIZE_T; + $STU ="stwu"; + $POP ="lwz"; + $PUSH ="stw"; + $UCMP ="cmplw"; + $SHL ="slwi"; +} else { die "nonsense $flavour"; } + +$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; + +$FRAME=8*$SIZE_T; +$prefix="aes_p8"; + +$sp="r1"; +$vrsave="r12"; + +######################################################################### +{{{ # Key setup procedures # +my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); +my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); +my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); + +$code.=<<___; +.machine "any" + +.text + +.align 7 +rcon: +.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev +.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev +.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev +.long 0,0,0,0 ?asis +Lconsts: + mflr r0 + bcl 20,31,\$+4 + mflr $ptr #vvvvv "distance between . and rcon + addi $ptr,$ptr,-0x48 + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.asciz "AES for PowerISA 2.07, CRYPTOGAMS by " + +.globl .${prefix}_set_encrypt_key +.align 5 +.${prefix}_set_encrypt_key: +Lset_encrypt_key: + mflr r11 + $PUSH r11,$LRSAVE($sp) + + li $ptr,-1 + ${UCMP}i $inp,0 + beq- Lenc_key_abort # if ($inp==0) return -1; + ${UCMP}i $out,0 + beq- Lenc_key_abort # if ($out==0) return -1; + li $ptr,-2 + cmpwi $bits,128 + blt- Lenc_key_abort + cmpwi $bits,256 + bgt- Lenc_key_abort + andi. r0,$bits,0x3f + bne- Lenc_key_abort + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + bl Lconsts + mtlr r11 + + neg r9,$inp + lvx $in0,0,$inp + addi $inp,$inp,15 # 15 is not typo + lvsr $key,0,r9 # borrow $key + li r8,0x20 + cmpwi $bits,192 + lvx $in1,0,$inp + le?vspltisb $mask,0x0f # borrow $mask + lvx $rcon,0,$ptr + le?vxor $key,$key,$mask # adjust for byte swap + lvx $mask,r8,$ptr + addi $ptr,$ptr,0x10 + vperm $in0,$in0,$in1,$key # align [and byte swap in LE] + li $cnt,8 + vxor $zero,$zero,$zero + mtctr $cnt + + ?lvsr $outperm,0,$out + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$zero,$outmask,$outperm + + blt Loop128 + addi $inp,$inp,8 + beq L192 + addi $inp,$inp,8 + b L256 + +.align 4 +Loop128: + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + bdnz Loop128 + + lvx $rcon,0,$ptr # last two round keys + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + + addi $inp,$out,15 # 15 is not typo + addi $out,$out,0x50 + + li $rounds,10 + b Ldone + +.align 4 +L192: + lvx $tmp,0,$inp + li $cnt,4 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + vspltisb $key,8 # borrow $key + mtctr $cnt + vsububm $mask,$mask,$key # adjust the mask + +Loop192: + vperm $key,$in1,$in1,$mask # roate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vcipherlast $key,$key,$rcon + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + + vsldoi $stage,$zero,$in1,8 + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vsldoi $stage,$stage,$in0,8 + + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vsldoi $stage,$in0,$in1,8 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + stvx $stage,0,$out + addi $out,$out,16 + + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdnz Loop192 + + li $rounds,12 + addi $out,$out,0x20 + b Ldone + +.align 4 +L256: + lvx $tmp,0,$inp + li $cnt,7 + li $rounds,14 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + mtctr $cnt + +Loop256: + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in1,$in1,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdz Ldone + + vspltw $key,$in0,3 # just splat + vsldoi $tmp,$zero,$in1,12 # >>32 + vsbox $key,$key + + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + + vxor $in1,$in1,$key + b Loop256 + +.align 4 +Ldone: + lvx $in1,0,$inp # redundant in aligned case + vsel $in1,$outhead,$in1,$outmask + stvx $in1,0,$inp + li $ptr,0 + mtspr 256,$vrsave + stw $rounds,0($out) + +Lenc_key_abort: + mr r3,$ptr + blr + .long 0 + .byte 0,12,0x14,1,0,0,3,0 + .long 0 +.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key + +.globl .${prefix}_set_decrypt_key +.align 5 +.${prefix}_set_decrypt_key: + $STU $sp,-$FRAME($sp) + mflr r10 + $PUSH r10,$FRAME+$LRSAVE($sp) + bl Lset_encrypt_key + mtlr r10 + + cmpwi r3,0 + bne- Ldec_key_abort + + slwi $cnt,$rounds,4 + subi $inp,$out,240 # first round key + srwi $rounds,$rounds,1 + add $out,$inp,$cnt # last round key + mtctr $rounds + +Ldeckey: + lwz r0, 0($inp) + lwz r6, 4($inp) + lwz r7, 8($inp) + lwz r8, 12($inp) + addi $inp,$inp,16 + lwz r9, 0($out) + lwz r10,4($out) + lwz r11,8($out) + lwz r12,12($out) + stw r0, 0($out) + stw r6, 4($out) + stw r7, 8($out) + stw r8, 12($out) + subi $out,$out,16 + stw r9, -16($inp) + stw r10,-12($inp) + stw r11,-8($inp) + stw r12,-4($inp) + bdnz Ldeckey + + xor r3,r3,r3 # return value +Ldec_key_abort: + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,0,3,0 + .long 0 +.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key +___ +}}} +######################################################################### +{{{ # Single block en- and decrypt procedures # +sub gen_block () { +my $dir = shift; +my $n = $dir eq "de" ? "n" : ""; +my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); + +$code.=<<___; +.globl .${prefix}_${dir}crypt +.align 5 +.${prefix}_${dir}crypt: + lwz $rounds,240($key) + lis r0,0xfc00 + mfspr $vrsave,256 + li $idx,15 # 15 is not typo + mtspr 256,r0 + + lvx v0,0,$inp + neg r11,$out + lvx v1,$idx,$inp + lvsl v2,0,$inp # inpperm + le?vspltisb v4,0x0f + ?lvsl v3,0,r11 # outperm + le?vxor v2,v2,v4 + li $idx,16 + vperm v0,v0,v1,v2 # align [and byte swap in LE] + lvx v1,0,$key + ?lvsl v5,0,$key # keyperm + srwi $rounds,$rounds,1 + lvx v2,$idx,$key + addi $idx,$idx,16 + subi $rounds,$rounds,1 + ?vperm v1,v1,v2,v5 # align round key + + vxor v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + mtctr $rounds + +Loop_${dir}c: + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + addi $idx,$idx,16 + ?vperm v1,v1,v2,v5 + v${n}cipher v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + bdnz Loop_${dir}c + + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + ?vperm v1,v1,v2,v5 + v${n}cipherlast v0,v0,v1 + + vspltisb v2,-1 + vxor v1,v1,v1 + li $idx,15 # 15 is not typo + ?vperm v2,v1,v2,v3 # outmask + le?vxor v3,v3,v4 + lvx v1,0,$out # outhead + vperm v0,v0,v0,v3 # rotate [and byte swap in LE] + vsel v1,v1,v0,v2 + lvx v4,$idx,$out + stvx v1,0,$out + vsel v0,v0,v4,v2 + stvx v0,$idx,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,3,0 + .long 0 +.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt +___ +} +&gen_block("en"); +&gen_block("de"); +}}} +######################################################################### +{{{ # CBC en- and decrypt procedures # +my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= + map("v$_",(4..10)); +$code.=<<___; +.globl .${prefix}_cbc_encrypt +.align 5 +.${prefix}_cbc_encrypt: + ${UCMP}i $len,16 + bltlr- + + cmpwi $enc,0 # test direction + lis r0,0xffe0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + beq Lcbc_dec + +Lcbc_enc: + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $inout,$inout,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + vxor $inout,$inout,$ivec + +Loop_cbc_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $ivec,$inout,$rndkey0 + ${UCMP}i $len,16 + + vperm $tmp,$ivec,$ivec,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_enc + + b Lcbc_done + +.align 4 +Lcbc_dec: + ${UCMP}i $len,128 + bge _aesp8_cbc_decrypt8x + vmr $tmp,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $tmp,$tmp,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$tmp,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + +Loop_cbc_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipherlast $inout,$inout,$rndkey0 + ${UCMP}i $len,16 + + vxor $inout,$inout,$ivec + vmr $ivec,$tmp + vperm $tmp,$inout,$inout,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_dec + +Lcbc_done: + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + neg $enc,$ivp # write [unaligned] iv + li $idx,15 # 15 is not typo + vxor $rndkey0,$rndkey0,$rndkey0 + vspltisb $outmask,-1 + le?vspltisb $tmp,0x0f + ?lvsl $outperm,0,$enc + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + lvx $outhead,0,$ivp + vperm $ivec,$ivec,$ivec,$outperm + vsel $inout,$outhead,$ivec,$outmask + lvx $inptail,$idx,$ivp + stvx $inout,0,$ivp + vsel $inout,$ivec,$inptail,$outmask + stvx $inout,$idx,$ivp + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CBC decrypt procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment + +$code.=<<___; +.align 5 +_aesp8_cbc_decrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + subi $len,$len,128 # bias + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_cbc_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_cbc_dec_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + #lvx $inptail,0,$inp # "caller" already did this + #addi $inp,$inp,15 # 15 is not typo + subi $inp,$inp,15 # undo "caller" + + le?li $idx,8 + lvx_u $in0,$x00,$inp # load first 8 "words" + le?lvsl $inpperm,0,$idx + le?vspltisb $tmp,0x0f + lvx_u $in1,$x10,$inp + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + lvx_u $in2,$x20,$inp + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in3,$x30,$inp + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in4,$x40,$inp + le?vperm $in2,$in2,$in2,$inpperm + vxor $out0,$in0,$rndkey0 + lvx_u $in5,$x50,$inp + le?vperm $in3,$in3,$in3,$inpperm + vxor $out1,$in1,$rndkey0 + lvx_u $in6,$x60,$inp + le?vperm $in4,$in4,$in4,$inpperm + vxor $out2,$in2,$rndkey0 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + le?vperm $in5,$in5,$in5,$inpperm + vxor $out3,$in3,$rndkey0 + le?vperm $in6,$in6,$in6,$inpperm + vxor $out4,$in4,$rndkey0 + le?vperm $in7,$in7,$in7,$inpperm + vxor $out5,$in5,$rndkey0 + vxor $out6,$in6,$rndkey0 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + b Loop_cbc_dec8x +.align 5 +Loop_cbc_dec8x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x + + subic $len,$len,128 # $len-=128 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + and r0,r0,$len + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vncipher $out0,$out0,v30 + vxor $ivec,$ivec,v31 # xor with last round key + vncipher $out1,$out1,v30 + vxor $in0,$in0,v31 + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + vncipherlast $out0,$out0,$ivec + vncipherlast $out1,$out1,$in0 + lvx_u $in0,$x00,$inp # load next input block + vncipherlast $out2,$out2,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out3,$out3,$in2 + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in2,$x20,$inp + vncipherlast $out4,$out4,$in3 + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in3,$x30,$inp + vncipherlast $out5,$out5,$in4 + le?vperm $in2,$in2,$in2,$inpperm + lvx_u $in4,$x40,$inp + vncipherlast $out6,$out6,$in5 + le?vperm $in3,$in3,$in3,$inpperm + lvx_u $in5,$x50,$inp + vncipherlast $out7,$out7,$in6 + le?vperm $in4,$in4,$in4,$inpperm + lvx_u $in6,$x60,$inp + vmr $ivec,$in7 + le?vperm $in5,$in5,$in5,$inpperm + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $in6,$in6,$in6,$inpperm + vxor $out0,$in0,$rndkey0 + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $in7,$in7,$in7,$inpperm + vxor $out1,$in1,$rndkey0 + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$rndkey0 + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$rndkey0 + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$rndkey0 + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + vxor $out5,$in5,$rndkey0 + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + vxor $out6,$in6,$rndkey0 + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + beq Loop_cbc_dec8x # did $len-=128 borrow? + + addic. $len,$len,128 + beq Lcbc_dec8x_done + nop + nop + +Loop_cbc_dec8x_tail: # up to 7 "words" tail... + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x_tail + + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + + vncipher $out1,$out1,v30 + vxor $ivec,$ivec,v31 # last round key + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + cmplwi $len,32 # switch($len) + blt Lcbc_dec8x_one + nop + beq Lcbc_dec8x_two + cmplwi $len,64 + blt Lcbc_dec8x_three + nop + beq Lcbc_dec8x_four + cmplwi $len,96 + blt Lcbc_dec8x_five + nop + beq Lcbc_dec8x_six + +Lcbc_dec8x_seven: + vncipherlast $out1,$out1,$ivec + vncipherlast $out2,$out2,$in1 + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out1,$out1,$out1,$inpperm + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x00,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x10,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x20,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x30,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x40,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x50,$out + stvx_u $out7,$x60,$out + addi $out,$out,0x70 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_six: + vncipherlast $out2,$out2,$ivec + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out2,$out2,$out2,$inpperm + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x00,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x10,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x20,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x30,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x40,$out + stvx_u $out7,$x50,$out + addi $out,$out,0x60 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_five: + vncipherlast $out3,$out3,$ivec + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out3,$out3,$out3,$inpperm + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x00,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x10,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x20,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x30,$out + stvx_u $out7,$x40,$out + addi $out,$out,0x50 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_four: + vncipherlast $out4,$out4,$ivec + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out4,$out4,$out4,$inpperm + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x00,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x10,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x20,$out + stvx_u $out7,$x30,$out + addi $out,$out,0x40 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_three: + vncipherlast $out5,$out5,$ivec + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out5,$out5,$out5,$inpperm + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x00,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x10,$out + stvx_u $out7,$x20,$out + addi $out,$out,0x30 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_two: + vncipherlast $out6,$out6,$ivec + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out6,$out6,$out6,$inpperm + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x00,$out + stvx_u $out7,$x10,$out + addi $out,$out,0x20 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_one: + vncipherlast $out7,$out7,$ivec + vmr $ivec,$in7 + + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out7,0,$out + addi $out,$out,0x10 + +Lcbc_dec8x_done: + le?vperm $ivec,$ivec,$ivec,$inpperm + stvx_u $ivec,0,$ivp # write [unaligned] iv + + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt +___ +}} }}} + +######################################################################### +{{{ # CTR procedure[s] # +my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= + map("v$_",(4..11)); +my $dat=$tmp; + +$code.=<<___; +.globl .${prefix}_ctr32_encrypt_blocks +.align 5 +.${prefix}_ctr32_encrypt_blocks: + ${UCMP}i $len,1 + bltlr- + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + vspltisb $one,1 + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + vsldoi $one,$rndkey0,$one,1 + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + + ${UCMP}i $len,8 + bge _aesp8_ctr32_encrypt8x + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + lvx $rndkey0,0,$key + mtctr $rounds + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + b Loop_ctr32_enc + +.align 5 +Loop_ctr32_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_ctr32_enc + + vadduwm $ivec,$ivec,$one + vmr $dat,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + subic. $len,$len,1 # blocks-- + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + vperm $dat,$dat,$inptail,$inpperm + li $idx,16 + ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm + lvx $rndkey0,0,$key + vxor $dat,$dat,$rndkey1 # last round key + vcipherlast $inout,$inout,$dat + + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + vperm $inout,$inout,$inout,$outperm + vsel $dat,$outhead,$inout,$outmask + mtctr $rounds + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vmr $outhead,$inout + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + stvx $dat,0,$out + addi $out,$out,16 + bne Loop_ctr32_enc + + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CTR procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment +my ($two,$three,$four)=($outhead,$outperm,$outmask); + +$code.=<<___; +.align 5 +_aesp8_ctr32_encrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_ctr32_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_ctr32_enc_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vadduwm $two,$one,$one + subi $inp,$inp,15 # undo "caller" + $SHL $len,$len,4 + + vadduwm $out1,$ivec,$one # counter values ... + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + le?li $idx,8 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + le?lvsl $inpperm,0,$idx + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + le?vspltisb $tmp,0x0f + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + vxor $out7,$out7,$rndkey0 + + mtctr $rounds + b Loop_ctr32_enc8x +.align 5 +Loop_ctr32_enc8x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 +Loop_ctr32_enc8x_middle: + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_ctr32_enc8x + + subic r11,$len,256 # $len-256, borrow $key_ + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 + + subfe r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + + and r0,r0,r11 + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + vcipher $out6,$out6,v26 + vcipher $out7,$out7,v26 + lvx v24,$x00,$key_ # re-pre-load round[1] + + subic $len,$len,129 # $len-=129 + vcipher $out0,$out0,v27 + addi $len,$len,1 # $len-=128 really + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + vcipher $out6,$out6,v27 + vcipher $out7,$out7,v27 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vcipher $out0,$out0,v28 + lvx_u $in0,$x00,$inp # load input + vcipher $out1,$out1,v28 + lvx_u $in1,$x10,$inp + vcipher $out2,$out2,v28 + lvx_u $in2,$x20,$inp + vcipher $out3,$out3,v28 + lvx_u $in3,$x30,$inp + vcipher $out4,$out4,v28 + lvx_u $in4,$x40,$inp + vcipher $out5,$out5,v28 + lvx_u $in5,$x50,$inp + vcipher $out6,$out6,v28 + lvx_u $in6,$x60,$inp + vcipher $out7,$out7,v28 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v29 + le?vperm $in1,$in1,$in1,$inpperm + vcipher $out2,$out2,v29 + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out3,$out3,v29 + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out4,$out4,v29 + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out5,$out5,v29 + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out6,$out6,v29 + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out7,$out7,v29 + le?vperm $in7,$in7,$in7,$inpperm + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + subfe. r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v30 + vxor $in0,$in0,v31 # xor with last round key + vcipher $out1,$out1,v30 + vxor $in1,$in1,v31 + vcipher $out2,$out2,v30 + vxor $in2,$in2,v31 + vcipher $out3,$out3,v30 + vxor $in3,$in3,v31 + vcipher $out4,$out4,v30 + vxor $in4,$in4,v31 + vcipher $out5,$out5,v30 + vxor $in5,$in5,v31 + vcipher $out6,$out6,v30 + vxor $in6,$in6,v31 + vcipher $out7,$out7,v30 + vxor $in7,$in7,v31 + + bne Lctr32_enc8x_break # did $len-129 borrow? + + vcipherlast $in0,$out0,$in0 + vcipherlast $in1,$out1,$in1 + vadduwm $out1,$ivec,$one # counter values ... + vcipherlast $in2,$out2,$in2 + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + vcipherlast $in3,$out3,$in3 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + vcipherlast $in4,$out4,$in4 + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + vcipherlast $in5,$out5,$in5 + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + vcipherlast $in6,$out6,$in6 + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vcipherlast $in7,$out7,$in7 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + le?vperm $in0,$in0,$in0,$inpperm + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + le?vperm $in1,$in1,$in1,$inpperm + vxor $out7,$out7,$rndkey0 + mtctr $rounds + + vcipher $out0,$out0,v24 + stvx_u $in0,$x00,$out + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out1,$out1,v24 + stvx_u $in1,$x10,$out + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out2,$out2,v24 + stvx_u $in2,$x20,$out + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out3,$out3,v24 + stvx_u $in3,$x30,$out + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out4,$out4,v24 + stvx_u $in4,$x40,$out + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out5,$out5,v24 + stvx_u $in5,$x50,$out + le?vperm $in7,$in7,$in7,$inpperm + vcipher $out6,$out6,v24 + stvx_u $in6,$x60,$out + vcipher $out7,$out7,v24 + stvx_u $in7,$x70,$out + addi $out,$out,0x80 + + b Loop_ctr32_enc8x_middle + +.align 5 +Lctr32_enc8x_break: + cmpwi $len,-0x60 + blt Lctr32_enc8x_one + nop + beq Lctr32_enc8x_two + cmpwi $len,-0x40 + blt Lctr32_enc8x_three + nop + beq Lctr32_enc8x_four + cmpwi $len,-0x20 + blt Lctr32_enc8x_five + nop + beq Lctr32_enc8x_six + cmpwi $len,0x00 + blt Lctr32_enc8x_seven + +Lctr32_enc8x_eight: + vcipherlast $out0,$out0,$in0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + vcipherlast $out5,$out5,$in5 + vcipherlast $out6,$out6,$in6 + vcipherlast $out7,$out7,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_seven: + vcipherlast $out0,$out0,$in1 + vcipherlast $out1,$out1,$in2 + vcipherlast $out2,$out2,$in3 + vcipherlast $out3,$out3,$in4 + vcipherlast $out4,$out4,$in5 + vcipherlast $out5,$out5,$in6 + vcipherlast $out6,$out6,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + stvx_u $out6,$x60,$out + addi $out,$out,0x70 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_six: + vcipherlast $out0,$out0,$in2 + vcipherlast $out1,$out1,$in3 + vcipherlast $out2,$out2,$in4 + vcipherlast $out3,$out3,$in5 + vcipherlast $out4,$out4,$in6 + vcipherlast $out5,$out5,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + stvx_u $out5,$x50,$out + addi $out,$out,0x60 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_five: + vcipherlast $out0,$out0,$in3 + vcipherlast $out1,$out1,$in4 + vcipherlast $out2,$out2,$in5 + vcipherlast $out3,$out3,$in6 + vcipherlast $out4,$out4,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_four: + vcipherlast $out0,$out0,$in4 + vcipherlast $out1,$out1,$in5 + vcipherlast $out2,$out2,$in6 + vcipherlast $out3,$out3,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_three: + vcipherlast $out0,$out0,$in5 + vcipherlast $out1,$out1,$in6 + vcipherlast $out2,$out2,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + b Lcbc_dec8x_done + +.align 5 +Lctr32_enc8x_two: + vcipherlast $out0,$out0,$in6 + vcipherlast $out1,$out1,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + b Lcbc_dec8x_done + +.align 5 +Lctr32_enc8x_one: + vcipherlast $out0,$out0,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + stvx_u $out0,0,$out + addi $out,$out,0x10 + +Lctr32_enc8x_done: + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks +___ +}} }}} + +######################################################################### +{{{ # XTS procedures # +my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); +my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); +my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); +my $taillen = $key2; + + ($inp,$idx) = ($idx,$inp); # reassign + +$code.=<<___; +.globl .${prefix}_xts_encrypt +.align 5 +.${prefix}_xts_encrypt: + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff0 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_encrypt6x + + andi. $taillen,$len,15 + subic r0,$len,32 + subi $taillen,$taillen,16 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + b Loop_xts_enc + +.align 5 +Loop_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vcipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_enc_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + subic r0,$len,32 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $output,$output,$rndkey0 # just in case $len<16 + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_enc + + vxor $output,$output,$tweak + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + subi r11,$out,17 + subi $out,$out,16 + mtctr $len + li $len,16 +Loop_xts_enc_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_enc_steal + + mtctr $rounds + b Loop_xts_enc # one more time... + +Lxts_enc_done: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt + +.globl .${prefix}_xts_decrypt +.align 5 +.${prefix}_xts_decrypt: + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff8 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + andi. r0,$len,15 + neg r0,r0 + andi. r0,r0,16 + sub $len,$len,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_decrypt6x + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + + ${UCMP}i $len,16 + blt Ltail_xts_dec + be?b Loop_xts_dec + +.align 5 +Loop_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_dec_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_dec + +Ltail_xts_dec: + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak1,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak1,$tweak1,$tmp + + subi $inp,$inp,16 + add $inp,$inp,$len + + vxor $inout,$inout,$tweak # :-( + vxor $inout,$inout,$tweak1 # :-) + +Loop_xts_dec_short: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec_short + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak1 + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + + vmr $inout,$inptail + lvx $inptail,0,$inp + #addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + vxor $rndkey0,$rndkey0,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + subi r11,$out,1 + mtctr $len + li $len,16 +Loop_xts_dec_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_dec_steal + + mtctr $rounds + b Loop_xts_dec # one more time... + +Lxts_dec_done: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt +___ +######################################################################### +{{ # Optimized XTS procedures # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); +my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); +my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($keyperm)=($out0); # aliases with "caller", redundant assignment +my $taillen=$x70; + +$code.=<<___; +.align 5 +_aesp8_xts_encrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r0 + li r7,`$FRAME+8*16+15` + li r8,`$FRAME+8*16+31` + $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r8,$sp + addi r8,r8,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r8,$sp + addi r8,r8,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r8,$sp + addi r8,r8,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r8,$sp + addi r8,r8,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r8,$sp + addi r8,r8,32 + stvx v30,r7,$sp + stvx v31,r8,$sp + mr r7,r0 + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_enc_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_enc6x + +.align 5 +Loop_xts_enc6x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out0,$out0,v27 + vcipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v28 + vcipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vcipher $out0,$out0,v29 + vcipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vcipher $out4,$out4,v29 + vcipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vcipher $out0,$out0,v30 + vcipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v30 + vcipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vcipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vcipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vcipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vcipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vcipherlast $tmp,$out5,$in5 # last block might be needed + # in stealing mode + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$tmp,$tmp,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + le?stvx_u $out5,$x50,$out + be?stvx_u $tmp, $x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_enc6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_enc6x_zero + cmpwi $len,0x20 + blt Lxts_enc6x_one + nop + beq Lxts_enc6x_two + cmpwi $len,0x40 + blt Lxts_enc6x_three + nop + beq Lxts_enc6x_four + +Lxts_enc6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $tmp,$out4,$twk5 # last block prep for stealing + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $tmp,$out3,$twk4 # last block prep for stealing + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $tmp,$out2,$twk3 # last block prep for stealing + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vxor $tmp,$out1,$twk2 # last block prep for stealing + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_enc1x: + vcipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc1x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + + lvsr $inpperm,0,$taillen + vcipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vcipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vcipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vcipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vcipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vxor $tmp,$out0,$twk1 # last block prep for stealing + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_zero: + cmpwi $taillen,0 + beq Lxts_enc6x_done + + add $inp,$inp,$taillen + subi $inp,$inp,16 + lvx_u $in0,0,$inp + lvsr $inpperm,0,$taillen # $in5 is no more + le?vperm $in0,$in0,$in0,$leperm + vperm $in0,$in0,$in0,$inpperm + vxor $tmp,$tmp,$twk0 +Lxts_enc6x_steal: + vxor $in0,$in0,$twk0 + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? + + subi r3,$out,17 + subi $out,$out,16 + mtctr $taillen +Loop_xts_enc6x_steal: + lbzu r0,1(r3) + stb r0,16(r3) + bdnz Loop_xts_enc6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_enc1x # one more time... + +.align 4 +Lxts_enc6x_done: + mtlr r7 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_enc5x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_enc5x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + vcipher $out0,$out0,v26 + lvsr $inpperm,r0,$taillen # $in5 is no more + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vcipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vcipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vcipher $out1,$out1,v29 + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vcipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vcipher $out0,$out0,v30 + vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v30 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vcipher $out4,$out4,v30 + + vcipherlast $out0,$out0,$twk0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r0 + li r7,`$FRAME+8*16+15` + li r8,`$FRAME+8*16+31` + $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r8,$sp + addi r8,r8,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r8,$sp + addi r8,r8,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r8,$sp + addi r8,r8,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r8,$sp + addi r8,r8,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r8,$sp + addi r8,r8,32 + stvx v30,r7,$sp + stvx v31,r8,$sp + mr r7,r0 + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_dec_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_dec6x + +.align 5 +Loop_xts_dec6x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v30 + vncipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vncipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vncipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vncipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vncipherlast $out5,$out5,$in5 + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$out5,$out5,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + stvx_u $out5,$x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_dec6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_dec6x_zero + cmpwi $len,0x20 + blt Lxts_dec6x_one + nop + beq Lxts_dec6x_two + cmpwi $len,0x40 + blt Lxts_dec6x_three + nop + beq Lxts_dec6x_four + +Lxts_dec6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + vxor $twk1,$tweak,$rndkey0 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk1 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + vmr $twk1,$twk5 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk5 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + vmr $twk1,$twk4 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk4 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vmr $twk1,$twk3 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk3 + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_dec1x: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec1x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + mtctr $rounds + vncipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vmr $twk1,$twk2 + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + vxor $out0,$in0,$twk2 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_zero: + cmpwi $taillen,0 + beq Lxts_dec6x_done + + lvx_u $in0,0,$inp + le?vperm $in0,$in0,$in0,$leperm + vxor $out0,$in0,$twk1 +Lxts_dec6x_steal: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Lxts_dec6x_steal + + add $inp,$inp,$taillen + vncipher $out0,$out0,v24 + + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v26 + + lvsr $inpperm,0,$taillen # $in5 is no more + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk1,$twk1,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vncipherlast $tmp,$out0,$twk1 + + le?vperm $out0,$tmp,$tmp,$leperm + le?stvx_u $out0,0,$out + be?stvx_u $tmp,0,$out + + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 + vxor $out0,$out0,$twk0 + + subi r3,$out,1 + mtctr $taillen +Loop_xts_dec6x_steal: + lbzu r0,1(r3) + stb r0,16(r3) + bdnz Loop_xts_dec6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_dec1x # one more time... + +.align 4 +Lxts_dec6x_done: + mtlr r7 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_dec5x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_dec5x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vncipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vncipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vncipher $out4,$out4,v30 + + vncipherlast $out0,$out0,$twk0 + vncipherlast $out1,$out1,$in1 + vncipherlast $out2,$out2,$in2 + vncipherlast $out3,$out3,$in3 + vncipherlast $out4,$out4,$in4 + mtctr $rounds + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +___ +}} }}} + +my $consts=1; +foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + # constants table endian-specific conversion + if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { + my $conv=$3; + my @bytes=(); + + # convert to endian-agnostic format + if ($1 eq "long") { + foreach (split(/,\s*/,$2)) { + my $l = /^0/?oct:int; + push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; + } + } else { + @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); + } + + # little-endian conversion + if ($flavour =~ /le$/o) { + SWITCH: for($conv) { + /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; + } + } + + #emit + print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; + next; + } + $consts=0 if (m/Lconsts:/o); # end of table + + # instructions prefixed with '?' are endian-specific and need + # to be adjusted accordingly... + if ($flavour =~ /le$/o) { # little-endian + s/le\?//o or + s/be\?/#be#/o or + s/\?lvsr/lvsl/o or + s/\?lvsl/lvsr/o or + s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or + s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or + s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; + } else { # big-endian + s/le\?/#le#/o or + s/be\?//o or + s/\?([a-z]+)/$1/o; + } + + print $_,"\n"; +} + +close STDOUT; diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl index f9b6992ccc..420f4d5807 100644 --- a/crypto/bn/asm/ppc-mont.pl +++ b/crypto/bn/asm/ppc-mont.pl @@ -191,7 +191,7 @@ L1st: addi $j,$j,$BNSZ ; j++ addi $tp,$tp,$BNSZ ; tp++ - bdnz- L1st + bdnz L1st ;L1st addc $lo0,$alo,$hi0 addze $hi0,$ahi @@ -253,7 +253,7 @@ Linner: addze $hi1,$hi1 $ST $lo1,0($tp) ; tp[j-1] addi $tp,$tp,$BNSZ ; tp++ - bdnz- Linner + bdnz Linner ;Linner $LD $tj,$BNSZ($tp) ; tp[j] addc $lo0,$alo,$hi0 @@ -276,7 +276,7 @@ Linner: slwi $tj,$num,`log($BNSZ)/log(2)` $UCMP $i,$tj addi $i,$i,$BNSZ - ble- Louter + ble Louter addi $num,$num,2 ; restore $num subfc $j,$j,$j ; j=0 and "clear" XER[CA] @@ -289,7 +289,7 @@ Lsub: $LDX $tj,$tp,$j subfe $aj,$nj,$tj ; tp[j]-np[j] $STX $aj,$rp,$j addi $j,$j,$BNSZ - bdnz- Lsub + bdnz Lsub li $j,0 mtctr $num @@ -304,7 +304,7 @@ Lcopy: ; copy or in-place refresh $STX $tj,$rp,$j $STX $j,$tp,$j ; zap at once addi $j,$j,$BNSZ - bdnz- Lcopy + bdnz Lcopy $POP $tj,0($sp) li r3,1 diff --git a/crypto/bn/asm/ppc.pl b/crypto/bn/asm/ppc.pl index 1249ce2299..5e22cd8fc6 100644 --- a/crypto/bn/asm/ppc.pl +++ b/crypto/bn/asm/ppc.pl @@ -1552,7 +1552,7 @@ Lppcasm_sub_mainloop: # if carry = 1 this is r7-r8. Else it # is r7-r8 -1 as we need. $STU r6,$BNSZ(r3) - bdnz- Lppcasm_sub_mainloop + bdnz Lppcasm_sub_mainloop Lppcasm_sub_adios: subfze r3,r0 # if carry bit is set then r3 = 0 else -1 andi. r3,r3,1 # keep only last bit. @@ -1598,7 +1598,7 @@ Lppcasm_add_mainloop: $LDU r8,$BNSZ(r5) adde r8,r7,r8 $STU r8,$BNSZ(r3) - bdnz- Lppcasm_add_mainloop + bdnz Lppcasm_add_mainloop Lppcasm_add_adios: addze r3,r0 #return carry bit. blr @@ -1755,7 +1755,7 @@ Lppcasm_sqr_mainloop: $UMULH r8,r6,r6 $STU r7,$BNSZ(r3) $STU r8,$BNSZ(r3) - bdnz- Lppcasm_sqr_mainloop + bdnz Lppcasm_sqr_mainloop Lppcasm_sqr_adios: blr .long 0 @@ -1819,7 +1819,7 @@ Lppcasm_mw_LOOP: addi r3,r3,`4*$BNSZ` addi r4,r4,`4*$BNSZ` - bdnz- Lppcasm_mw_LOOP + bdnz Lppcasm_mw_LOOP Lppcasm_mw_REM: andi. r5,r5,0x3 diff --git a/crypto/bn/asm/ppc64-mont.pl b/crypto/bn/asm/ppc64-mont.pl index a14e769ad0..d565859667 100644 --- a/crypto/bn/asm/ppc64-mont.pl +++ b/crypto/bn/asm/ppc64-mont.pl @@ -561,7 +561,7 @@ $code.=<<___; stfd $T3b,`$FRAME+56`($sp) std $t0,8($tp) ; tp[j-1] stdu $t4,16($tp) ; tp[j] - bdnz- L1st + bdnz L1st fctid $dota,$dota fctid $dotb,$dotb @@ -856,7 +856,7 @@ $code.=<<___; addze $carry,$carry std $t3,-16($tp) ; tp[j-1] std $t5,-8($tp) ; tp[j] - bdnz- Linner + bdnz Linner fctid $dota,$dota fctid $dotb,$dotb @@ -954,7 +954,7 @@ Lsub: ldx $t0,$tp,$i stdx $t0,$rp,$i stdx $t2,$t6,$i addi $i,$i,16 - bdnz- Lsub + bdnz Lsub li $i,0 subfe $ovf,$i,$ovf ; handle upmost overflow bit @@ -981,7 +981,7 @@ Lcopy: ; copy or in-place refresh stdx $i,$tp,$i ; zap tp at once stdx $i,$t4,$i addi $i,$i,16 - bdnz- Lcopy + bdnz Lcopy ___ $code.=<<___ if ($SIZE_T==4); subf $np,$num,$np ; rewind np @@ -1014,7 +1014,7 @@ Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order stw $t5,8($rp) stw $t6,12($rp) stwu $t7,16($rp) - bdnz- Lsub + bdnz Lsub li $i,0 subfe $ovf,$i,$ovf ; handle upmost overflow bit @@ -1046,7 +1046,7 @@ Lcopy: ; copy or in-place refresh stwu $t3,16($rp) std $i,8($tp) ; zap tp at once stdu $i,16($tp) - bdnz- Lcopy + bdnz Lcopy ___ $code.=<<___; diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index 9a2de166c5..6f77e7e4b9 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -140,6 +140,19 @@ void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, const unsigned char ivec[AES_BLOCK_SIZE]); #endif +#if defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) +extern int OPENSSL_ppccap_P; +# define HWAES_CAPABLE (OPENSSL_ppccap_P & (1<<2)) +# define HWAES_set_encrypt_key aes_p8_set_encrypt_key +# define HWAES_set_decrypt_key aes_p8_set_decrypt_key +# define HWAES_encrypt aes_p8_encrypt +# define HWAES_decrypt aes_p8_decrypt +# define HWAES_cbc_encrypt aes_p8_cbc_encrypt +# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks +# define HWAES_xts_encrypt aes_p8_xts_encrypt +# define HWAES_xts_decrypt aes_p8_xts_decrypt +#endif + #if defined(AES_ASM) && !defined(I386_ONLY) && ( \ ((defined(__i386) || defined(__i386__) || \ defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \ @@ -498,6 +511,13 @@ void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out, unsigned char *ivec, const int enc); void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, size_t len, const AES_KEY *key, const unsigned char ivec[16]); +void HWAES_xts_encrypt(const unsigned char *inp, unsigned char *out, + size_t len, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char iv[16]); +void HWAES_xts_decrypt(const unsigned char *inp, unsigned char *out, + size_t len, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char iv[16]); + #endif #define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ @@ -1172,11 +1192,17 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, { HWAES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)HWAES_encrypt; +#ifdef HWAES_xts_encrypt + xctx->stream = HWAES_xts_encrypt; +#endif } else { HWAES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); xctx->xts.block1 = (block128_f)HWAES_decrypt; +#ifdef HWAES_xts_decrypt + xctx->stream = HWAES_xts_decrypt; +#endif } HWAES_set_encrypt_key(key + ctx->key_len/2, diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile index f4930c6bd8..5a170498bf 100644 --- a/crypto/modes/Makefile +++ b/crypto/modes/Makefile @@ -58,6 +58,8 @@ ghash-parisc.s: asm/ghash-parisc.pl $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ ghashv8-armx.S: asm/ghashv8-armx.pl $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@ +ghashp8-ppc.s: asm/ghashp8-ppc.pl + $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@ # GNU make "catch all" ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl new file mode 100755 index 0000000000..82bf125eb1 --- /dev/null +++ b/crypto/modes/asm/ghashp8-ppc.pl @@ -0,0 +1,663 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# GHASH for for PowerISA v2.07. +# +# July 2014 +# +# Accurate performance measurements are problematic, because it's +# always virtualized setup with possibly throttled processor. +# Relative comparison is therefore more informative. This initial +# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x +# faster than "4-bit" integer-only compiler-generated 64-bit code. +# "Initial version" means that there is room for futher improvement. + +# May 2016 +# +# 2x aggregated reduction improves performance by 50% (resulting +# performance on POWER8 is 1 cycle per processed byte), and 4x +# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb). + +$flavour=shift; +$output =shift; + +if ($flavour =~ /64/) { + $SIZE_T=8; + $LRSAVE=2*$SIZE_T; + $STU="stdu"; + $POP="ld"; + $PUSH="std"; + $UCMP="cmpld"; + $SHRI="srdi"; +} elsif ($flavour =~ /32/) { + $SIZE_T=4; + $LRSAVE=$SIZE_T; + $STU="stwu"; + $POP="lwz"; + $PUSH="stw"; + $UCMP="cmplw"; + $SHRI="srwi"; +} else { die "nonsense $flavour"; } + +$sp="r1"; +$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + +my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block + +my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); +my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); +my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19)); +my $vrsave="r12"; + +$code=<<___; +.machine "any" + +.text + +.globl .gcm_init_p8 +.align 5 +.gcm_init_p8: + li r0,-4096 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $H,0,r4 # load H + + vspltisb $xC2,-16 # 0xf0 + vspltisb $t0,1 # one + vaddubm $xC2,$xC2,$xC2 # 0xe0 + vxor $zero,$zero,$zero + vor $xC2,$xC2,$t0 # 0xe1 + vsldoi $xC2,$xC2,$zero,15 # 0xe1... + vsldoi $t1,$zero,$t0,1 # ...1 + vaddubm $xC2,$xC2,$xC2 # 0xc2... + vspltisb $t2,7 + vor $xC2,$xC2,$t1 # 0xc2....01 + vspltb $t1,$H,0 # most significant byte + vsl $H,$H,$t0 # H<<=1 + vsrab $t1,$t1,$t2 # broadcast carry bit + vand $t1,$t1,$xC2 + vxor $IN,$H,$t1 # twisted H + + vsldoi $H,$IN,$IN,8 # twist even more ... + vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 + vsldoi $Hl,$zero,$H,8 # ... and split + vsldoi $Hh,$H,$zero,8 + + stvx_u $xC2,0,r3 # save pre-computed table + stvx_u $Hl,r8,r3 + li r8,0x40 + stvx_u $H, r9,r3 + li r9,0x50 + stvx_u $Hh,r10,r3 + li r10,0x60 + + vpmsumd $Xl,$IN,$Hl # H.lo·H.lo + vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi + vpmsumd $Xh,$IN,$Hh # H.hi·H.hi + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh + vxor $IN1,$Xl,$t1 + + vsldoi $H2,$IN1,$IN1,8 + vsldoi $H2l,$zero,$H2,8 + vsldoi $H2h,$H2,$zero,8 + + stvx_u $H2l,r8,r3 # save H^2 + li r8,0x70 + stvx_u $H2,r9,r3 + li r9,0x80 + stvx_u $H2h,r10,r3 + li r10,0x90 +___ +{ +my ($t4,$t5,$t6) = ($Hl,$H,$Hh); +$code.=<<___; + vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo + vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo + vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi + vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi + vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi + vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vsldoi $t4,$Xm1,$zero,8 + vsldoi $t5,$zero,$Xm1,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + vxor $Xl1,$Xl1,$t4 + vxor $Xh1,$Xh1,$t5 + + vsldoi $Xl,$Xl,$Xl,8 + vsldoi $Xl1,$Xl1,$Xl1,8 + vxor $Xl,$Xl,$t2 + vxor $Xl1,$Xl1,$t6 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + vpmsumd $Xl1,$Xl1,$xC2 + vxor $t1,$t1,$Xh + vxor $t5,$t5,$Xh1 + vxor $Xl,$Xl,$t1 + vxor $Xl1,$Xl1,$t5 + + vsldoi $H,$Xl,$Xl,8 + vsldoi $H2,$Xl1,$Xl1,8 + vsldoi $Hl,$zero,$H,8 + vsldoi $Hh,$H,$zero,8 + vsldoi $H2l,$zero,$H2,8 + vsldoi $H2h,$H2,$zero,8 + + stvx_u $Hl,r8,r3 # save H^3 + li r8,0xa0 + stvx_u $H,r9,r3 + li r9,0xb0 + stvx_u $Hh,r10,r3 + li r10,0xc0 + stvx_u $H2l,r8,r3 # save H^4 + stvx_u $H2,r9,r3 + stvx_u $H2h,r10,r3 + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 +.size .gcm_init_p8,.-.gcm_init_p8 +___ +} +$code.=<<___; +.globl .gcm_gmult_p8 +.align 5 +.gcm_gmult_p8: + lis r0,0xfff8 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $IN,0,$Xip # load Xi + + lvx_u $Hl,r8,$Htbl # load pre-computed table + le?lvsl $lemask,r0,r0 + lvx_u $H, r9,$Htbl + le?vspltisb $t0,0x07 + lvx_u $Hh,r10,$Htbl + le?vxor $lemask,$lemask,$t0 + lvx_u $xC2,0,$Htbl + le?vperm $IN,$IN,$IN,$lemask + vxor $zero,$zero,$zero + + vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo + vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi + vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh + vxor $Xl,$Xl,$t1 + + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 +.size .gcm_gmult_p8,.-.gcm_gmult_p8 + +.globl .gcm_ghash_p8 +.align 5 +.gcm_ghash_p8: + li r0,-4096 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $Xl,0,$Xip # load Xi + + lvx_u $Hl,r8,$Htbl # load pre-computed table + li r8,0x40 + le?lvsl $lemask,r0,r0 + lvx_u $H, r9,$Htbl + li r9,0x50 + le?vspltisb $t0,0x07 + lvx_u $Hh,r10,$Htbl + li r10,0x60 + le?vxor $lemask,$lemask,$t0 + lvx_u $xC2,0,$Htbl + le?vperm $Xl,$Xl,$Xl,$lemask + vxor $zero,$zero,$zero + + ${UCMP}i $len,64 + bge Lgcm_ghash_p8_4x + + lvx_u $IN,0,$inp + addi $inp,$inp,16 + subic. $len,$len,16 + le?vperm $IN,$IN,$IN,$lemask + vxor $IN,$IN,$Xl + beq Lshort + + lvx_u $H2l,r8,$Htbl # load H^2 + li r8,16 + lvx_u $H2, r9,$Htbl + add r9,$inp,$len # end of input + lvx_u $H2h,r10,$Htbl + be?b Loop_2x + +.align 5 +Loop_2x: + lvx_u $IN1,0,$inp + le?vperm $IN1,$IN1,$IN1,$lemask + + subic $len,$len,32 + vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo + vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo + subfe r0,r0,r0 # borrow?-1:0 + vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi + vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi + and r0,r0,$len + vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi + vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi + add $inp,$inp,r0 + + vxor $Xl,$Xl,$Xl1 + vxor $Xm,$Xm,$Xm1 + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xh,$Xh,$Xh1 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + lvx_u $IN,r8,$inp + addi $inp,$inp,32 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + le?vperm $IN,$IN,$IN,$lemask + vxor $t1,$t1,$Xh + vxor $IN,$IN,$t1 + vxor $IN,$IN,$Xl + $UCMP r9,$inp + bgt Loop_2x # done yet? + + cmplwi $len,0 + bne Leven + +Lshort: + vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo + vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi + vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh + +Leven: + vxor $Xl,$Xl,$t1 + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,4,0 + .long 0 +___ +{ +my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h, + $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31)); +my $IN0=$IN; +my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h); + +$code.=<<___; +.align 5 +.gcm_ghash_p8_4x: +Lgcm_ghash_p8_4x: + $STU $sp,-$FRAME($sp) + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + stvx v20,r10,$sp + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + li r10,0x60 + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME-4`($sp) # save vrsave + mtspr 256,r0 # preserve all AltiVec registers + + lvsl $t0,0,r8 # 0x0001..0e0f + #lvx_u $H2l,r8,$Htbl # load H^2 + li r8,0x70 + lvx_u $H2, r9,$Htbl + li r9,0x80 + vspltisb $t1,8 # 0x0808..0808 + #lvx_u $H2h,r10,$Htbl + li r10,0x90 + lvx_u $H3l,r8,$Htbl # load H^3 + li r8,0xa0 + lvx_u $H3, r9,$Htbl + li r9,0xb0 + lvx_u $H3h,r10,$Htbl + li r10,0xc0 + lvx_u $H4l,r8,$Htbl # load H^4 + li r8,0x10 + lvx_u $H4, r9,$Htbl + li r9,0x20 + lvx_u $H4h,r10,$Htbl + li r10,0x30 + + vsldoi $t2,$zero,$t1,8 # 0x0000..0808 + vaddubm $hiperm,$t0,$t2 # 0x0001..1617 + vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f + + $SHRI $len,$len,4 # this allows to use sign bit + # as carry + lvx_u $IN0,0,$inp # load input + lvx_u $IN1,r8,$inp + subic. $len,$len,8 + lvx_u $IN2,r9,$inp + lvx_u $IN3,r10,$inp + addi $inp,$inp,0x40 + le?vperm $IN0,$IN0,$IN0,$lemask + le?vperm $IN1,$IN1,$IN1,$lemask + le?vperm $IN2,$IN2,$IN2,$lemask + le?vperm $IN3,$IN3,$IN3,$lemask + + vxor $Xh,$IN0,$Xl + + vpmsumd $Xl1,$IN1,$H3l + vpmsumd $Xm1,$IN1,$H3 + vpmsumd $Xh1,$IN1,$H3h + + vperm $H21l,$H2,$H,$hiperm + vperm $t0,$IN2,$IN3,$loperm + vperm $H21h,$H2,$H,$loperm + vperm $t1,$IN2,$IN3,$hiperm + vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo + vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo + vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi + vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi + + vxor $Xm2,$Xm2,$Xm1 + vxor $Xl3,$Xl3,$Xl1 + vxor $Xm3,$Xm3,$Xm2 + vxor $Xh3,$Xh3,$Xh1 + + blt Ltail_4x + +Loop_4x: + lvx_u $IN0,0,$inp + lvx_u $IN1,r8,$inp + subic. $len,$len,4 + lvx_u $IN2,r9,$inp + lvx_u $IN3,r10,$inp + addi $inp,$inp,0x40 + le?vperm $IN1,$IN1,$IN1,$lemask + le?vperm $IN2,$IN2,$IN2,$lemask + le?vperm $IN3,$IN3,$IN3,$lemask + le?vperm $IN0,$IN0,$IN0,$lemask + + vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo + vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi + vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi + vpmsumd $Xl1,$IN1,$H3l + vpmsumd $Xm1,$IN1,$H3 + vpmsumd $Xh1,$IN1,$H3h + + vxor $Xl,$Xl,$Xl3 + vxor $Xm,$Xm,$Xm3 + vxor $Xh,$Xh,$Xh3 + vperm $t0,$IN2,$IN3,$loperm + vperm $t1,$IN2,$IN3,$hiperm + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo + vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi + vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi + vpmsumd $Xl,$Xl,$xC2 + + vxor $Xl3,$Xl3,$Xl1 + vxor $Xh3,$Xh3,$Xh1 + vxor $Xh,$Xh,$IN0 + vxor $Xm2,$Xm2,$Xm1 + vxor $Xh,$Xh,$t1 + vxor $Xm3,$Xm3,$Xm2 + vxor $Xh,$Xh,$Xl + bge Loop_4x + +Ltail_4x: + vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo + vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi + vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi + + vxor $Xl,$Xl,$Xl3 + vxor $Xm,$Xm,$Xm3 + + vpmsumd $t2,$Xl,$xC2 # 1st reduction phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xh,$Xh,$Xh3 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh + vxor $Xl,$Xl,$t1 + + addic. $len,$len,4 + beq Ldone_4x + + lvx_u $IN0,0,$inp + ${UCMP}i $len,2 + li $len,-4 + blt Lone + lvx_u $IN1,r8,$inp + beq Ltwo + +Lthree: + lvx_u $IN2,r9,$inp + le?vperm $IN0,$IN0,$IN0,$lemask + le?vperm $IN1,$IN1,$IN1,$lemask + le?vperm $IN2,$IN2,$IN2,$lemask + + vxor $Xh,$IN0,$Xl + vmr $H4l,$H3l + vmr $H4, $H3 + vmr $H4h,$H3h + + vperm $t0,$IN1,$IN2,$loperm + vperm $t1,$IN1,$IN2,$hiperm + vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo + vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi + vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo + vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi + + vxor $Xm3,$Xm3,$Xm2 + b Ltail_4x + +.align 4 +Ltwo: + le?vperm $IN0,$IN0,$IN0,$lemask + le?vperm $IN1,$IN1,$IN1,$lemask + + vxor $Xh,$IN0,$Xl + vperm $t0,$zero,$IN1,$loperm + vperm $t1,$zero,$IN1,$hiperm + + vsldoi $H4l,$zero,$H2,8 + vmr $H4, $H2 + vsldoi $H4h,$H2,$zero,8 + + vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo + vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi + vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi + + b Ltail_4x + +.align 4 +Lone: + le?vperm $IN0,$IN0,$IN0,$lemask + + vsldoi $H4l,$zero,$H,8 + vmr $H4, $H + vsldoi $H4h,$H,$zero,8 + + vxor $Xh,$IN0,$Xl + vxor $Xl3,$Xl3,$Xl3 + vxor $Xm3,$Xm3,$Xm3 + vxor $Xh3,$Xh3,$Xh3 + + b Ltail_4x + +Ldone_4x: + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mtspr 256,$vrsave + lvx v20,r10,$sp + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,0x04,0,0x80,0,4,0 + .long 0 +___ +} +$code.=<<___; +.size .gcm_ghash_p8,.-.gcm_ghash_p8 + +.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by " +.align 2 +___ + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + if ($flavour =~ /le$/o) { # little-endian + s/le\?//o or + s/be\?/#be#/o; + } else { + s/le\?/#le#/o or + s/be\?//o; + } + print $_,"\n"; +} + +close STDOUT; # enforce flush diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index 2b4df392e7..a46ec61135 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -683,6 +683,14 @@ void gcm_init_v8(u128 Htable[16],const u64 Xi[2]); void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif +# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) +# define GHASH_ASM_PPC +# define GCM_FUNCREF_4BIT +extern int OPENSSL_ppccap_P; +void gcm_init_p8(u128 Htable[16], const u64 Xi[2]); +void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, + size_t len); # elif defined(_TMS320C6400_PLUS) # define GHASH_ASM_C64Xplus # endif @@ -767,6 +775,16 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) ctx->gmult = gcm_gmult_4bit; ctx->ghash = gcm_ghash_4bit; } +# elif defined(GHASH_ASM_PPC) + if (OPENSSL_ppccap_P & (1<<2)) { + gcm_init_p8(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_p8; + ctx->ghash = gcm_ghash_p8; + } else { + gcm_init_4bit(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + } # elif defined(GHASH_ASM_C64Xplus) /* C64x+ assembler doesn't use tables, skip gcm_init_4bit. * This is likely to trigger "function never referenced" diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl index a3edd982b6..0f46cf06bc 100755 --- a/crypto/perlasm/ppc-xlate.pl +++ b/crypto/perlasm/ppc-xlate.pl @@ -27,7 +27,8 @@ my $globl = sub { /osx/ && do { $name = "_$name"; last; }; - /linux.*32/ && do { $ret .= ".globl $name\n"; + /linux.*(32|64le)/ + && do { $ret .= ".globl $name\n"; $ret .= ".type $name,\@function"; last; }; @@ -37,7 +38,6 @@ my $globl = sub { $ret .= ".align 3\n"; $ret .= "$name:\n"; $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; - $ret .= ".size $name,24\n"; $ret .= ".previous\n"; $name = ".$name"; @@ -50,7 +50,9 @@ my $globl = sub { $ret; }; my $text = sub { - ($flavour =~ /aix/) ? ".csect" : ".text"; + my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; + $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); + $ret; }; my $machine = sub { my $junk = shift; @@ -62,9 +64,12 @@ my $machine = sub { ".machine $arch"; }; my $size = sub { - if ($flavour =~ /linux.*32/) + if ($flavour =~ /linux/) { shift; - ".size " . join(",",@_); + my $name = shift; $name =~ s|^[\.\_]||; + my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; + $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); + $ret; } else { ""; } @@ -77,6 +82,25 @@ my $asciz = sub { else { ""; } }; +my $quad = sub { + shift; + my @ret; + my ($hi,$lo); + for (@_) { + if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) + { $hi=$1?"0x$1":"0"; $lo="0x$2"; } + elsif (/^([0-9]+)$/o) + { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl + else + { $hi=undef; $lo=$_; } + + if (defined($hi)) + { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } + else + { push(@ret,".quad $lo"); } + } + join("\n",@ret); +}; ################################################################ # simplified mnemonics not handled by at least one assembler @@ -122,6 +146,66 @@ my $extrdi = sub { $b = ($b+$n)&63; $n = 64-$n; " rldicl $ra,$rs,$b,$n"; }; +my $vmr = sub { + my ($f,$vx,$vy) = @_; + " vor $vx,$vy,$vy"; +}; + +# Some ABIs specify vrsave, special-purpose register #256, as reserved +# for system use. +my $no_vrsave = ($flavour =~ /aix|linux64le/); +my $mtspr = sub { + my ($f,$idx,$ra) = @_; + if ($idx == 256 && $no_vrsave) { + " or $ra,$ra,$ra"; + } else { + " mtspr $idx,$ra"; + } +}; +my $mfspr = sub { + my ($f,$rd,$idx) = @_; + if ($idx == 256 && $no_vrsave) { + " li $rd,-1"; + } else { + " mfspr $rd,$idx"; + } +}; + +# PowerISA 2.06 stuff +sub vsxmem_op { + my ($f, $vrt, $ra, $rb, $op) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); +} +# made-up unaligned memory reference AltiVec/VMX instructions +my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x +my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x +my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx +my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx +my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x +my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x + +# PowerISA 2.07 stuff +sub vcrypto_op { + my ($f, $vrt, $vra, $vrb, $op) = @_; + " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; +} +my $vcipher = sub { vcrypto_op(@_, 1288); }; +my $vcipherlast = sub { vcrypto_op(@_, 1289); }; +my $vncipher = sub { vcrypto_op(@_, 1352); }; +my $vncipherlast= sub { vcrypto_op(@_, 1353); }; +my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; +my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; +my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; +my $vpmsumb = sub { vcrypto_op(@_, 1032); }; +my $vpmsumd = sub { vcrypto_op(@_, 1224); }; +my $vpmsubh = sub { vcrypto_op(@_, 1096); }; +my $vpmsumw = sub { vcrypto_op(@_, 1160); }; +my $vaddudm = sub { vcrypto_op(@_, 192); }; + +my $mtsle = sub { + my ($f, $arg) = @_; + " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); +}; while($line=<>) { @@ -138,7 +222,10 @@ while($line=<>) { { $line =~ s|(^[\.\w]+)\:\s*||; my $label = $1; - printf "%s:",($GLOBALS{$label} or $label) if ($label); + if ($label) { + printf "%s:",($GLOBALS{$label} or $label); + printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/); + } } { @@ -147,7 +234,7 @@ while($line=<>) { my $mnemonic = $2; my $f = $3; my $opcode = eval("\$$mnemonic"); - $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/); + $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } } diff --git a/crypto/ppccap.c b/crypto/ppccap.c index ab89ccaa12..675630e41b 100644 --- a/crypto/ppccap.c +++ b/crypto/ppccap.c @@ -3,13 +3,24 @@ #include #include #include +#include +#if defined(__linux) || defined(_AIX) +# include +#endif +#if defined(_AIX53) /* defined even on post-5.3 */ +# include +# if !defined(__power_set) +# define __power_set(a) (_system_configuration.implementation & (a)) +# endif +#endif #include #include #define PPC_FPU64 (1<<0) #define PPC_ALTIVEC (1<<1) +#define PPC_CRYPTO207 (1<<2) -static int OPENSSL_ppccap_P = 0; +int OPENSSL_ppccap_P = 0; static sigset_t all_masked; @@ -49,10 +60,28 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U } #endif +void sha256_block_p8(void *ctx, const void *inp, size_t len); +void sha256_block_ppc(void *ctx, const void *inp, size_t len); +void sha256_block_data_order(void *ctx, const void *inp, size_t len) +{ + OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha256_block_p8(ctx, inp, len) : + sha256_block_ppc(ctx, inp, len); +} + +void sha512_block_p8(void *ctx, const void *inp, size_t len); +void sha512_block_ppc(void *ctx, const void *inp, size_t len); +void sha512_block_data_order(void *ctx, const void *inp, size_t len) +{ + OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha512_block_p8(ctx, inp, len) : + sha512_block_ppc(ctx, inp, len); +} + static sigjmp_buf ill_jmp; static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } void OPENSSL_ppc64_probe(void); +void OPENSSL_altivec_probe(void); +void OPENSSL_crypto207_probe(void); void OPENSSL_cpuid_setup(void) { @@ -82,6 +111,45 @@ void OPENSSL_cpuid_setup(void) OPENSSL_ppccap_P = 0; +#if defined(_AIX) + if (sizeof(size_t) == 4) { + struct utsname uts; +# if defined(_SC_AIX_KERNEL_BITMODE) + if (sysconf(_SC_AIX_KERNEL_BITMODE) != 64) + return; +# endif + if (uname(&uts) != 0 || atoi(uts.version) < 6) + return; + } + +# if defined(__power_set) + /* + * Value used in __power_set is a single-bit 1< for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA256/512 for PowerISA v2.07. +# +# Accurate performance measurements are problematic, because it's +# always virtualized setup with possibly throttled processor. +# Relative comparison is therefore more informative. This module is +# ~60% faster than integer-only sha512-ppc.pl. To anchor to something +# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than +# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than +# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting +# result is degree of computational resources' utilization. POWER8 is +# "massively multi-threaded chip" and difference between single- and +# maximum multi-process benchmark results tells that utlization is +# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and +# for sha1-ppc.pl - 73%. 100% means that multi-process result equals +# to single-process one, given that all threads end up on the same +# physical core. +# +####################################################################### +# +# SHA256/pre-2.07(*) SHA512/pre-2.07(*) SHA1(*) +# POWER8 9.3 /14.8 5.8 /9.5 7.1 +# +# (*) presented for reference/comparison purposes; + +$flavour=shift; +$output =shift; + +if ($flavour =~ /64/) { + $SIZE_T=8; + $LRSAVE=2*$SIZE_T; + $STU="stdu"; + $POP="ld"; + $PUSH="std"; +} elsif ($flavour =~ /32/) { + $SIZE_T=4; + $LRSAVE=$SIZE_T; + $STU="stwu"; + $POP="lwz"; + $PUSH="stw"; +} else { die "nonsense $flavour"; } + +$LENDIAN=($flavour=~/le/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + +if ($output =~ /512/) { + $bits=512; + $SZ=8; + $sz="d"; + $rounds=80; +} else { + $bits=256; + $SZ=4; + $sz="w"; + $rounds=64; +} + +$func="sha${bits}_block_p8"; +$FRAME=8*$SIZE_T; + +$sp ="r1"; +$toc="r2"; +$ctx="r3"; +$inp="r4"; +$num="r5"; +$Tbl="r6"; +$idx="r7"; +$lrsave="r8"; +$offload="r11"; +$vrsave="r12"; +($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31)); + $x00=0 if ($flavour =~ /osx/); + +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7)); +@X=map("v$_",(8..23)); +($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31)); + +sub ROUND { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my $j=($i+1)%16; + +$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1)); + lvx_u @X[$i+1],0,$inp ; load X[i] in advance + addi $inp,$inp,16 +___ +$code.=<<___ if ($i<16 && ($i%(16/$SZ))); + vsldoi @X[$i],@X[$i-1],@X[$i-1],$SZ +___ +$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0); + vperm @X[$i],@X[$i],@X[$i],$lemask +___ +$code.=<<___; + `"vshasigma${sz} $s0,@X[($j+1)%16],0,0" if ($i>=15)` + vsel $Func,$g,$f,$e ; Ch(e,f,g) + vshasigma${sz} $S1,$e,1,15 ; Sigma1(e) + vaddu${sz}m $h,$h,@X[$i%16] ; h+=X[i] + vshasigma${sz} $S0,$a,1,0 ; Sigma0(a) + `"vshasigma${sz} $s1,@X[($j+14)%16],0,15" if ($i>=15)` + vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g) + vxor $Func,$a,$b + `"vaddu${sz}m @X[$j],@X[$j],@X[($j+9)%16]" if ($i>=15)` + vaddu${sz}m $h,$h,$S1 ; h+=Sigma1(e) + vsel $Func,$b,$c,$Func ; Maj(a,b,c) + vaddu${sz}m $g,$g,$Ki ; future h+=K[i] + vaddu${sz}m $d,$d,$h ; d+=h + vaddu${sz}m $S0,$S0,$Func ; Sigma0(a)+Maj(a,b,c) + `"vaddu${sz}m @X[$j],@X[$j],$s0" if ($i>=15)` + lvx $Ki,$idx,$Tbl ; load next K[i] + addi $idx,$idx,16 + vaddu${sz}m $h,$h,$S0 ; h+=Sigma0(a)+Maj(a,b,c) + `"vaddu${sz}m @X[$j],@X[$j],$s1" if ($i>=15)` +___ +} + +$code=<<___; +.machine "any" +.text + +.globl $func +.align 6 +$func: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr $lrsave + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + mfspr $vrsave,256 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r11,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + $PUSH $lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + mtspr 256,r11 + + bl LPICmeup + addi $offload,$sp,$FRAME+15 +___ +$code.=<<___ if ($LENDIAN); + li $idx,8 + lvsl $lemask,0,$idx + vspltisb $Ki,0x0f + vxor $lemask,$lemask,$Ki +___ +$code.=<<___ if ($SZ==4); + lvx_4w $A,$x00,$ctx + lvx_4w $E,$x10,$ctx + vsldoi $B,$A,$A,4 # unpack + vsldoi $C,$A,$A,8 + vsldoi $D,$A,$A,12 + vsldoi $F,$E,$E,4 + vsldoi $G,$E,$E,8 + vsldoi $H,$E,$E,12 +___ +$code.=<<___ if ($SZ==8); + lvx_u $A,$x00,$ctx + lvx_u $C,$x10,$ctx + lvx_u $E,$x20,$ctx + vsldoi $B,$A,$A,8 # unpack + lvx_u $G,$x30,$ctx + vsldoi $D,$C,$C,8 + vsldoi $F,$E,$E,8 + vsldoi $H,$G,$G,8 +___ +$code.=<<___; + li r0,`($rounds-16)/16` # inner loop counter + b Loop +.align 5 +Loop: + lvx $Ki,$x00,$Tbl + li $idx,16 + lvx_u @X[0],0,$inp + addi $inp,$inp,16 + stvx $A,$x00,$offload # offload $A-$H + stvx $B,$x10,$offload + stvx $C,$x20,$offload + stvx $D,$x30,$offload + stvx $E,$x40,$offload + stvx $F,$x50,$offload + stvx $G,$x60,$offload + stvx $H,$x70,$offload + vaddu${sz}m $H,$H,$Ki # h+K[i] + lvx $Ki,$idx,$Tbl + addi $idx,$idx,16 +___ +for ($i=0;$i<16;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mtctr r0 + b L16_xx +.align 5 +L16_xx: +___ +for (;$i<32;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + bdnz L16_xx + + lvx @X[2],$x00,$offload + subic. $num,$num,1 + lvx @X[3],$x10,$offload + vaddu${sz}m $A,$A,@X[2] + lvx @X[4],$x20,$offload + vaddu${sz}m $B,$B,@X[3] + lvx @X[5],$x30,$offload + vaddu${sz}m $C,$C,@X[4] + lvx @X[6],$x40,$offload + vaddu${sz}m $D,$D,@X[5] + lvx @X[7],$x50,$offload + vaddu${sz}m $E,$E,@X[6] + lvx @X[8],$x60,$offload + vaddu${sz}m $F,$F,@X[7] + lvx @X[9],$x70,$offload + vaddu${sz}m $G,$G,@X[8] + vaddu${sz}m $H,$H,@X[9] + bne Loop +___ +$code.=<<___ if ($SZ==4); + lvx @X[0],$idx,$Tbl + addi $idx,$idx,16 + vperm $A,$A,$B,$Ki # pack the answer + lvx @X[1],$idx,$Tbl + vperm $E,$E,$F,$Ki + vperm $A,$A,$C,@X[0] + vperm $E,$E,$G,@X[0] + vperm $A,$A,$D,@X[1] + vperm $E,$E,$H,@X[1] + stvx_4w $A,$x00,$ctx + stvx_4w $E,$x10,$ctx +___ +$code.=<<___ if ($SZ==8); + vperm $A,$A,$B,$Ki # pack the answer + vperm $C,$C,$D,$Ki + vperm $E,$E,$F,$Ki + vperm $G,$G,$H,$Ki + stvx_u $A,$x00,$ctx + stvx_u $C,$x10,$ctx + stvx_u $E,$x20,$ctx + stvx_u $G,$x30,$ctx +___ +$code.=<<___; + li r10,`$FRAME+8*16+15` + mtlr $lrsave + li r11,`$FRAME+8*16+31` + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,4,1,0x80,6,3,0 + .long 0 +.size $func,.-$func +___ + +# Ugly hack here, because PPC assembler syntax seem to vary too +# much from platforms to platform... +$code.=<<___; +.align 6 +LPICmeup: + mflr r0 + bcl 20,31,\$+4 + mflr $Tbl ; vvvvvv "distance" between . and 1st data entry + addi $Tbl,$Tbl,`64-8` + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + .space `64-9*4` +___ + +if ($SZ==8) { + local *table = sub { + foreach(@_) { $code.=".quad $_,$_\n"; } + }; + table( + "0x428a2f98d728ae22","0x7137449123ef65cd", + "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc", + "0x3956c25bf348b538","0x59f111f1b605d019", + "0x923f82a4af194f9b","0xab1c5ed5da6d8118", + "0xd807aa98a3030242","0x12835b0145706fbe", + "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2", + "0x72be5d74f27b896f","0x80deb1fe3b1696b1", + "0x9bdc06a725c71235","0xc19bf174cf692694", + "0xe49b69c19ef14ad2","0xefbe4786384f25e3", + "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65", + "0x2de92c6f592b0275","0x4a7484aa6ea6e483", + "0x5cb0a9dcbd41fbd4","0x76f988da831153b5", + "0x983e5152ee66dfab","0xa831c66d2db43210", + "0xb00327c898fb213f","0xbf597fc7beef0ee4", + "0xc6e00bf33da88fc2","0xd5a79147930aa725", + "0x06ca6351e003826f","0x142929670a0e6e70", + "0x27b70a8546d22ffc","0x2e1b21385c26c926", + "0x4d2c6dfc5ac42aed","0x53380d139d95b3df", + "0x650a73548baf63de","0x766a0abb3c77b2a8", + "0x81c2c92e47edaee6","0x92722c851482353b", + "0xa2bfe8a14cf10364","0xa81a664bbc423001", + "0xc24b8b70d0f89791","0xc76c51a30654be30", + "0xd192e819d6ef5218","0xd69906245565a910", + "0xf40e35855771202a","0x106aa07032bbd1b8", + "0x19a4c116b8d2d0c8","0x1e376c085141ab53", + "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8", + "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb", + "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3", + "0x748f82ee5defb2fc","0x78a5636f43172f60", + "0x84c87814a1f0ab72","0x8cc702081a6439ec", + "0x90befffa23631e28","0xa4506cebde82bde9", + "0xbef9a3f7b2c67915","0xc67178f2e372532b", + "0xca273eceea26619c","0xd186b8c721c0c207", + "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178", + "0x06f067aa72176fba","0x0a637dc5a2c898a6", + "0x113f9804bef90dae","0x1b710b35131c471b", + "0x28db77f523047d84","0x32caab7b40c72493", + "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c", + "0x4cc5d4becb3e42b6","0x597f299cfc657e2a", + "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0"); +$code.=<<___ if (!$LENDIAN); +.quad 0x0001020304050607,0x1011121314151617 +___ +$code.=<<___ if ($LENDIAN); # quad-swapped +.quad 0x1011121314151617,0x0001020304050607 +___ +} else { + local *table = sub { + foreach(@_) { $code.=".long $_,$_,$_,$_\n"; } + }; + table( + "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5", + "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5", + "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3", + "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174", + "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc", + "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da", + "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7", + "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967", + "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13", + "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85", + "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3", + "0xd192e819","0xd6990624","0xf40e3585","0x106aa070", + "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5", + "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3", + "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208", + "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0"); +$code.=<<___ if (!$LENDIAN); +.long 0x00010203,0x10111213,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x08090a0b,0x10111213 +___ +$code.=<<___ if ($LENDIAN); # word-swapped +.long 0x10111213,0x10111213,0x10111213,0x00010203 +.long 0x10111213,0x10111213,0x04050607,0x00010203 +.long 0x10111213,0x08090a0b,0x04050607,0x00010203 +___ +} +$code.=<<___; +.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by " +.align 2 +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; diff --git a/fips/fips_premain.c b/fips/fips_premain.c index c68b464e31..b6ec32db4e 100644 --- a/fips/fips_premain.c +++ b/fips/fips_premain.c @@ -140,6 +140,9 @@ void FINGERPRINT_premain(void) } #endif } while(0); +#if defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC) + fips_openssl_cpuid_setup(); +#endif } #else diff --git a/fips/fips_premain.c.sha1 b/fips/fips_premain.c.sha1 index 4dbfbeae69..19c30807a7 100644 --- a/fips/fips_premain.c.sha1 +++ b/fips/fips_premain.c.sha1 @@ -1 +1 @@ -HMAC-SHA1(fips_premain.c)= 65b20c3cec235cec85af848e1cd2dfdfa101804a +HMAC-SHA1(fips_premain.c)= 2bfb57ef540bdba29220a45d65e1b4080de9adc1 diff --git a/fips/fipssyms.h b/fips/fipssyms.h index 76db619cec..8f04eb9dcf 100644 --- a/fips/fipssyms.h +++ b/fips/fipssyms.h @@ -712,6 +712,23 @@ #define _bn_GF2m_mul_2x2 _fips_bn_GF2m_mul_2x2 #define _OPENSSL_cleanse _FIPS_openssl_cleanse #endif +#define aes_p8_encrypt fips_aes_p8_encrypt +#define aes_p8_decrypt fips_aes_p8_decrypt +#define aes_p8_set_encrypt_key fips_aes_p8_set_encrypt_key +#define aes_p8_set_decrypt_key fips_aes_p8_set_decrypt_key +#define aes_p8_cbc_encrypt fips_aes_p8_cbc_encrypt +#define aes_p8_ctr32_encrypt_blocks fips_aes_p8_ctr32_encrypt_blocks +#define aes_p8_xts_encrypt fips_aes_p8_xts_encrypt +#define aes_p8_xts_decrypt fips_aes_p8_xts_decrypt +#define gcm_init_p8 fips_gcm_init_p8 +#define gcm_gmult_p8 fips_gcm_gmult_p8 +#define gcm_ghash_p8 fips_gcm_ghash_p8 +#define sha256_block_p8 fips_sha256_block_p8 +#define sha512_block_p8 fips_sha512_block_p8 +#define sha256_block_ppc fips_sha256_block_ppc +#define sha512_block_ppc fips_sha512_block_ppc +#define OPENSSL_ppccap_P fips_openssl_ppccap_p +#define OPENSSL_crypto207_probe fips_openssl_crypto207_probe #if defined(_MSC_VER) # pragma const_seg("fipsro$b") From 10fa6736b137ad55bfcb0e72b4587ec419f1b13e Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 24 Jun 2016 12:53:54 +0200 Subject: [PATCH 116/120] FIPS MIPS assembly pack refresh. Backport CVE-2014-3570 bug and postability fixes. Reviewed-by: Rich Salz --- crypto/aes/asm/aes-mips.pl | 6 +- crypto/bn/asm/mips-mont.pl | 4 +- crypto/bn/asm/mips.pl | 659 ++++++++-------------------------- crypto/sha/asm/sha1-mips.pl | 4 +- crypto/sha/asm/sha512-mips.pl | 8 +- 5 files changed, 165 insertions(+), 516 deletions(-) diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl index 2ce6deffc8..76cf130e91 100644 --- a/crypto/aes/asm/aes-mips.pl +++ b/crypto/aes/asm/aes-mips.pl @@ -47,7 +47,7 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 @@ -70,7 +70,7 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2; # ###################################################################### -$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; +$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0; for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; @@ -89,7 +89,7 @@ $code.=<<___; # include #endif -#if !defined(__vxworks) || defined(__pic__) +#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) .option pic2 #endif .set noat diff --git a/crypto/bn/asm/mips-mont.pl b/crypto/bn/asm/mips-mont.pl index b944a12b8e..a33cdf4111 100644 --- a/crypto/bn/asm/mips-mont.pl +++ b/crypto/bn/asm/mips-mont.pl @@ -46,7 +46,7 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 @@ -133,7 +133,7 @@ $code.=<<___; bnez $at,1f li $t0,0 slt $at,$num,17 # on in-order CPU - bnezl $at,bn_mul_mont_internal + bnez $at,bn_mul_mont_internal nop 1: jr $ra li $a0,0 diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl index c162a3ec23..acafde5e56 100644 --- a/crypto/bn/asm/mips.pl +++ b/crypto/bn/asm/mips.pl @@ -48,7 +48,7 @@ # has to content with 40-85% improvement depending on benchmark and # key length, more for longer keys. -$flavour = shift; +$flavour = shift || "o32"; while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -140,10 +140,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $ta0,$a2,$minus4 - $LD $t0,0($a1) beqz $ta0,.L_bn_mul_add_words_tail .L_bn_mul_add_words_loop: + $LD $t0,0($a1) $MULTU $t0,$a3 $LD $t1,0($a0) $LD $t2,$BNSZ($a1) @@ -200,10 +200,9 @@ $code.=<<___; $ADDU $v0,$ta2 sltu $at,$ta3,$at $ST $ta3,-$BNSZ($a0) - $ADDU $v0,$at .set noreorder - bgtzl $ta0,.L_bn_mul_add_words_loop - $LD $t0,0($a1) + bgtz $ta0,.L_bn_mul_add_words_loop + $ADDU $v0,$at beqz $a2,.L_bn_mul_add_words_return nop @@ -300,10 +299,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $ta0,$a2,$minus4 - $LD $t0,0($a1) beqz $ta0,.L_bn_mul_words_tail .L_bn_mul_words_loop: + $LD $t0,0($a1) $MULTU $t0,$a3 $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) @@ -341,10 +340,9 @@ $code.=<<___; $ADDU $v0,$at sltu $ta3,$v0,$at $ST $v0,-$BNSZ($a0) - $ADDU $v0,$ta3,$ta2 .set noreorder - bgtzl $ta0,.L_bn_mul_words_loop - $LD $t0,0($a1) + bgtz $ta0,.L_bn_mul_words_loop + $ADDU $v0,$ta3,$ta2 beqz $a2,.L_bn_mul_words_return nop @@ -429,10 +427,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $ta0,$a2,$minus4 - $LD $t0,0($a1) beqz $ta0,.L_bn_sqr_words_tail .L_bn_sqr_words_loop: + $LD $t0,0($a1) $MULTU $t0,$t0 $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) @@ -463,11 +461,10 @@ $code.=<<___; mflo $ta3 mfhi $ta2 $ST $ta3,-2*$BNSZ($a0) - $ST $ta2,-$BNSZ($a0) .set noreorder - bgtzl $ta0,.L_bn_sqr_words_loop - $LD $t0,0($a1) + bgtz $ta0,.L_bn_sqr_words_loop + $ST $ta2,-$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return nop @@ -547,10 +544,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $at,$a3,$minus4 - $LD $t0,0($a1) beqz $at,.L_bn_add_words_tail .L_bn_add_words_loop: + $LD $t0,0($a1) $LD $ta0,0($a2) subu $a3,4 $LD $t1,$BNSZ($a1) @@ -589,11 +586,10 @@ $code.=<<___; $ADDU $t3,$ta3,$v0 sltu $v0,$t3,$ta3 $ST $t3,-$BNSZ($a0) - $ADDU $v0,$t9 .set noreorder - bgtzl $at,.L_bn_add_words_loop - $LD $t0,0($a1) + bgtz $at,.L_bn_add_words_loop + $ADDU $v0,$t9 beqz $a3,.L_bn_add_words_return nop @@ -679,10 +675,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $at,$a3,$minus4 - $LD $t0,0($a1) beqz $at,.L_bn_sub_words_tail .L_bn_sub_words_loop: + $LD $t0,0($a1) $LD $ta0,0($a2) subu $a3,4 $LD $t1,$BNSZ($a1) @@ -722,11 +718,10 @@ $code.=<<___; $SUBU $t3,$ta3,$v0 sgtu $v0,$t3,$ta3 $ST $t3,-$BNSZ($a0) - $ADDU $v0,$t9 .set noreorder - bgtzl $at,.L_bn_sub_words_loop - $LD $t0,0($a1) + bgtz $at,.L_bn_sub_words_loop + $ADDU $v0,$t9 beqz $a3,.L_bn_sub_words_return nop @@ -819,7 +814,7 @@ ___ $code.=<<___; .set reorder move $ta3,$ra - bal bn_div_words + bal bn_div_words_internal move $ra,$ta3 $MULTU $ta2,$v0 $LD $t2,-2*$BNSZ($a3) @@ -840,8 +835,9 @@ $code.=<<___; sltu $ta0,$a1,$a2 or $t8,$ta0 .set noreorder - beqzl $at,.L_bn_div_3_words_inner_loop + beqz $at,.L_bn_div_3_words_inner_loop $SUBU $v0,1 + $ADDU $v0,1 .set reorder .L_bn_div_3_words_inner_loop_done: .set noreorder @@ -902,7 +898,8 @@ $code.=<<___; and $t2,$a0 $SRL $at,$a1,$t1 .set noreorder - bnezl $t2,.+8 + beqz $t2,.+12 + nop break 6 # signal overflow .set reorder $SLL $a0,$t9 @@ -917,7 +914,8 @@ $code.=<<___; $SRL $DH,$a2,4*$BNSZ # bits sgeu $at,$a0,$a2 .set noreorder - bnezl $at,.+8 + beqz $at,.+12 + nop $SUBU $a0,$a2 .set reorder @@ -1874,6 +1872,41 @@ ___ ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); +sub add_c2 () { +my ($hi,$lo,$c0,$c1,$c2, + $warm, # !$warm denotes first call with specific sequence of + # $c_[XYZ] when there is no Z-carry to accumulate yet; + $an,$bn # these two are arguments for multiplication which + # result is used in *next* step [which is why it's + # commented as "forward multiplication" below]; + )=@_; +$code.=<<___; + mflo $lo + mfhi $hi + $ADDU $c0,$lo + sltu $at,$c0,$lo + $MULTU $an,$bn # forward multiplication + $ADDU $c0,$lo + $ADDU $at,$hi + sltu $lo,$c0,$lo + $ADDU $c1,$at + $ADDU $hi,$lo +___ +$code.=<<___ if (!$warm); + sltu $c2,$c1,$at + $ADDU $c1,$hi + sltu $hi,$c1,$hi + $ADDU $c2,$hi +___ +$code.=<<___ if ($warm); + sltu $at,$c1,$at + $ADDU $c1,$hi + $ADDU $c2,$at + sltu $hi,$c1,$hi + $ADDU $c2,$hi +___ +} + $code.=<<___; .align 5 @@ -1922,21 +1955,10 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_3,$t_1 @@ -1947,67 +1969,19 @@ $code.=<<___; sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); +$code.=<<___; $ST $c_1,3*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_2,$t_1 @@ -2018,97 +1992,23 @@ $code.=<<___; sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_2,$at - $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); - $ADDU $c_2,$at - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); +$code.=<<___; $ST $c_3,5*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_1,$t_1 @@ -2119,112 +2019,25 @@ $code.=<<___; sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,6*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); +$code.=<<___; $ST $c_2,7*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_2,$at - $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_2,$at - $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_3,$t_1 @@ -2235,82 +2048,21 @@ $code.=<<___; sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,8*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); +$code.=<<___; $ST $c_1,9*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_2,$t_1 @@ -2321,52 +2073,17 @@ $code.=<<___; sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,10*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_2,$at - $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); +$code.=<<___; $ST $c_3,11*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_1,$t_1 @@ -2377,21 +2094,10 @@ $code.=<<___; sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,12*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); +$code.=<<___; $ST $c_2,13*$BNSZ($a0) mflo $t_1 @@ -2459,21 +2165,10 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_3,$t_1 @@ -2484,52 +2179,17 @@ $code.=<<___; sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); +$code.=<<___; $ST $c_1,3*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_2,$t_1 @@ -2540,21 +2200,10 @@ $code.=<<___; sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); +$code.=<<___; $ST $c_3,5*$BNSZ($a0) mflo $t_1 diff --git a/crypto/sha/asm/sha1-mips.pl b/crypto/sha/asm/sha1-mips.pl index f1a702f38f..ca50e1b1ee 100644 --- a/crypto/sha/asm/sha1-mips.pl +++ b/crypto/sha/asm/sha1-mips.pl @@ -42,7 +42,7 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 @@ -64,7 +64,7 @@ if ($flavour =~ /64|n32/i) { # ###################################################################### -$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; +$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0; for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; diff --git a/crypto/sha/asm/sha512-mips.pl b/crypto/sha/asm/sha512-mips.pl index ba5b250890..00e795b0ad 100644 --- a/crypto/sha/asm/sha512-mips.pl +++ b/crypto/sha/asm/sha512-mips.pl @@ -45,7 +45,7 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 @@ -68,7 +68,7 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2; # ###################################################################### -$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; +$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0; for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; @@ -244,7 +244,7 @@ $code.=<<___; .text .set noat -#if !defined(__vxworks) || defined(__pic__) +#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) .option pic2 #endif @@ -351,7 +351,7 @@ $code.=<<___; $ST $G,6*$SZ($ctx) $ST $H,7*$SZ($ctx) - bnel $inp,@X[15],.Loop + bne $inp,@X[15],.Loop $PTR_SUB $Ktbl,`($rounds-16)*$SZ` # rewind $Ktbl $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) From d674242a884368083bf1044cc4e6e30d8f452a50 Mon Sep 17 00:00:00 2001 From: Steve Marquess Date: Tue, 25 Apr 2017 08:15:14 -0400 Subject: [PATCH 117/120] Add linux-mips32be target for new platform Reviewed-by: Rich Salz Reviewed-by: Andy Polyakov (Merged from https://github.com/openssl/openssl/pull/3300) --- Configure | 6 +++++- config | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Configure b/Configure index 850948ef3b..84a2bc2d04 100755 --- a/Configure +++ b/Configure @@ -132,7 +132,9 @@ my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; -my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::"; +# EXTREME: original asm spec was missing colon and final term. +#my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::"; +my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::::::::void"; my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes_ctr.o aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:"; my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void"; @@ -342,6 +344,8 @@ my %table=( # *-generic* is endian-neutral target, but ./config is free to # throw in -D[BL]_ENDIAN, whichever appropriate... "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +#### Extreme add linux-mips32be +"linux-mips32be","gcc:-DB_ENDIAN -DTERMIO -O3 -march=mips32 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # It's believed that majority of ARM toolchains predefine appropriate -march. # If you compiler does not, do complement config command line with one! diff --git a/config b/config index 4003e2fe52..36ab9f2ef6 100755 --- a/config +++ b/config @@ -543,6 +543,10 @@ case "$GUESSOS" in #fi OUT="irix-mips3-$CC" ;; + mips32be-*-linux2) + OUT=linux-mips32be + options="$options threads shared zlib-dynamic" + ;; ppc-apple-rhapsody) OUT="rhapsody-ppc-cc" ;; ppc-apple-darwin*) ISA64=`(sysctl -n hw.optional.64bitops) 2>/dev/null` From fe36a698477e7cb1a49de3f4cba5ad7f89f5ad4c Mon Sep 17 00:00:00 2001 From: Steve Marquess Date: Mon, 21 Aug 2017 15:57:25 -0400 Subject: [PATCH 118/120] Add "wishlist" of desired but possibly unobtainable fixes/improvements Reviewed-by: Richard Levitte Reviewed-by: Stephen Henson (Merged from https://github.com/openssl/openssl/pull/4208) --- README.wishlist | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 README.wishlist diff --git a/README.wishlist b/README.wishlist new file mode 100644 index 0000000000..111ee3ce75 --- /dev/null +++ b/README.wishlist @@ -0,0 +1,31 @@ +A "wish list" of changes we'd like to make to the FIPS module if we could. +Note the CMVP requires retesting of all previously tested platforms +("Operational Environments") to implement any changes considered "cryptographically +significant". Since the OpenSSL FIPS module v2.0 has some 250 such formally +tested platforms (and counting), retesting just isn't logistically or economically +feasible. + +-------- +https://github.com/openssl/openssl/pull/4157 +From 2017-08-14, Fix GCM MAC computation for AES-GCM by srahul123 +cryptographically significant, not fixable + +-------- +Andy Polyakov: harmonize with __thumb__ clause in FIPS_ref_point() (#3354), +https://patch-diff.githubusercontent.com/raw/openssl/openssl/pull/3354.patch +https://github.com/openssl/openssl/pull/3354#pullrequestreview-36086406 +May be possible to introduce in future change letter + +-------- +CVE-2016-0701 +cryptographically significant, not fixable + +-------- +CVE-2014-0076 +cryptographically significant, not fixable + +-------- +"Lucky 13", CVE-2013-0169 +cryptographically significant, not fixable + +-------- From 5526e5791f1426553b6f4806d1ac82efd6ab33bc Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 25 Nov 2016 13:11:09 +0100 Subject: [PATCH 119/120] Add some C64x assembly modules [by minor adjustments of C64x+ modules]. AES, SHA256 and SHA512 modules can actually replace corresponding C64x+ modules. This is because C64x+ instructions don't actually provide "killer-argument" advantage in these modules. As for SHA1, even though its performance exactly same, C64x+ module is more responsive to interrupts, i.e. doesn't inhibit them for as long periods as C64x module. Reviewed-by: Rich Salz Reviewed-by: Tim Hudson Reviewed-by: Stephen Henson (Merged from https://github.com/openssl/openssl/pull/4265) --- crypto/aes/asm/aes-c64x.pl | 1375 +++++++++++++++++++++++++++++ crypto/c64xcpuid.pl | 326 +++++++ crypto/sha/asm/sha1-c64x-large.pl | 230 +++++ crypto/sha/asm/sha1-c64x.pl | 330 +++++++ crypto/sha/asm/sha256-c64x.pl | 313 +++++++ crypto/sha/asm/sha512-c64x.pl | 437 +++++++++ 6 files changed, 3011 insertions(+) create mode 100644 crypto/aes/asm/aes-c64x.pl create mode 100644 crypto/c64xcpuid.pl create mode 100644 crypto/sha/asm/sha1-c64x-large.pl create mode 100644 crypto/sha/asm/sha1-c64x.pl create mode 100644 crypto/sha/asm/sha256-c64x.pl create mode 100644 crypto/sha/asm/sha512-c64x.pl diff --git a/crypto/aes/asm/aes-c64x.pl b/crypto/aes/asm/aes-c64x.pl new file mode 100644 index 0000000000..0817128c1b --- /dev/null +++ b/crypto/aes/asm/aes-c64x.pl @@ -0,0 +1,1375 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# [Endian-neutral] AES for C64x. +# +# Even though loops are scheduled for 13 cycles, and thus expected +# performance is ~8.5 cycles per byte processed with 128-bit key, +# measured performance turned to be ~10 cycles per byte. Discrepancy +# must be caused by limitations of L1D memory banking(*), see SPRU871 +# TI publication for further details. If any consolation it's still +# ~20% faster than TI's linear assembly module anyway... Compared to +# aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this +# code is 3.75x faster and almost 3x smaller (tables included). +# +# (*) This means that there might be subtle correlation between data +# and timing and one can wonder if it can be ... attacked:-( +# On the other hand this also means that *if* one chooses to +# implement *4* T-tables variant [instead of 1 T-table as in +# this implementation, or in addition to], then one ought to +# *interleave* them. Even though it complicates addressing, +# references to interleaved tables would be guaranteed not to +# clash. I reckon that it should be possible to break 8 cycles +# per byte "barrier," i.e. improve by ~20%, naturally at the +# cost of 8x increased pressure on L1D. 8x because you'd have +# to interleave both Te and Td tables... + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($TEA,$TEB)=("A5","B5"); +($KPA,$KPB)=("A3","B1"); +@K=("A6","B6","A7","B7"); +@s=("A8","B8","A9","B9"); +@Te0=@Td0=("A16","B16","A17","B17"); +@Te1=@Td1=("A18","B18","A19","B19"); +@Te2=@Td2=("A20","B20","A21","B21"); +@Te3=@Td3=("A22","B22","A23","B23"); + +$code=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .nocmp + .asg AES_encrypt,_AES_encrypt + .asg AES_decrypt,_AES_decrypt + .asg AES_set_encrypt_key,_AES_set_encrypt_key + .asg AES_set_decrypt_key,_AES_set_decrypt_key + .asg AES_ctr32_encrypt,_AES_ctr32_encrypt + .endif + + .asg B3,RA + .asg A4,INP + .asg B4,OUT + .asg A6,KEY + .asg A4,RET + .asg B15,SP + + .eval 24,EXT0 + .eval 16,EXT1 + .eval 8,EXT2 + .eval 0,EXT3 + .eval 8,TBL1 + .eval 16,TBL2 + .eval 24,TBL3 + + .if .BIG_ENDIAN + .eval 24-EXT0,EXT0 + .eval 24-EXT1,EXT1 + .eval 24-EXT2,EXT2 + .eval 24-EXT3,EXT3 + .eval 32-TBL1,TBL1 + .eval 32-TBL2,TBL2 + .eval 32-TBL3,TBL3 + .endif + + .global _AES_encrypt +_AES_encrypt: + .asmfunc + MVK 1,B2 +__encrypt: + .if __TI_EABI__ + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL \$PCR_OFFSET(AES_Te,__encrypt),$TEA +|| ADDKPC __encrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH \$PCR_OFFSET(AES_Te,__encrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + .else + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL (AES_Te-__encrypt),$TEA +|| ADDKPC __encrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH (AES_Te-__encrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + .endif + LDW *$KPA++[2],$Te0[0] ; zero round key +|| LDW *$KPB++[2],$Te0[1] +|| MVK 60,A0 +|| ADD B0,$TEA,$TEA ; AES_Te + LDW *KEY[A0],B0 ; rounds +|| MVK 1024,A0 ; sizeof(AES_Te) + LDW *$KPA++[2],$Te0[2] +|| LDW *$KPB++[2],$Te0[3] +|| MV $TEA,$TEB + NOP + .if .BIG_ENDIAN + MV A9,$s[0] +|| MV A8,$s[1] +|| MV B9,$s[2] +|| MV B8,$s[3] + .else + MV A8,$s[0] +|| MV A9,$s[1] +|| MV B8,$s[2] +|| MV B9,$s[3] + .endif + XOR $Te0[0],$s[0],$s[0] +|| XOR $Te0[1],$s[1],$s[1] +|| LDW *$KPA++[2],$K[0] ; 1st round key +|| LDW *$KPB++[2],$K[1] + + LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] +|| EXTU $s[1],EXT1,24,$Te1[1] +|| EXTU $s[0],EXT3,24,$Te3[0] +|| SUB B0,1,B0 +;;==================================================================== +enc_loop?: + LDW *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0 +|| LDW *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1 +|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled +|| EXTU $s[1],EXT3,24,$Te3[1] +|| EXTU $s[0],EXT1,24,$Te1[0] + LDW *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2 +|| LDW *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3 +|| EXTU $s[2],EXT2,24,$Te2[2] +|| EXTU $s[3],EXT2,24,$Te2[3] + LDW *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0 +|| LDW *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1 +|| EXTU $s[3],EXT3,24,$Te3[3] +|| EXTU $s[2],EXT1,24,$Te1[2] + LDW *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0 +|| LDW *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1 +|| EXTU $s[0],EXT2,24,$Te2[0] +|| EXTU $s[1],EXT2,24,$Te2[1] + LDW *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2 +|| LDW *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3 +|| EXTU $s[3],EXT1,24,$Te1[3] +|| EXTU $s[2],EXT3,24,$Te3[2] + LDW *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2 +|| LDW *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3 +|| ROTL $Te1[1],TBL1,$Te3[0] ; t0 +|| ROTL $Te3[0],TBL3,$Te1[1] ; t1 +|| EXTU $s[0],EXT0,24,$Te0[0] +|| EXTU $s[1],EXT0,24,$Te0[1] + LDW *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0 +|| LDW *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1 +|| ROTL $Te3[1],TBL3,$Te1[0] ; t2 +|| ROTL $Te1[0],TBL1,$Te3[1] ; t3 +|| EXTU $s[2],EXT0,24,$Te0[2] +|| EXTU $s[3],EXT0,24,$Te0[3] +|| [B0] SUB B0,1,B0 + LDW *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2 +|| LDW *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3 +|| ROTL $Te2[2],TBL2,$Te2[2] ; t0 +|| ROTL $Te2[3],TBL2,$Te2[3] ; t1 +|| XOR $K[0],$Te3[0],$s[0] +|| XOR $K[1],$Te1[1],$s[1] +|| [B0] BNOP enc_loop? + ROTL $Te3[3],TBL3,$Te1[2] ; t0 +|| ROTL $Te1[2],TBL1,$Te3[3] ; t1 +|| XOR $K[2],$Te1[0],$s[2] +|| XOR $K[3],$Te3[1],$s[3] +|| LDW *$KPA++[2],$K[0] ; next round key +|| LDW *$KPB++[2],$K[1] + ROTL $Te2[0],TBL2,$Te2[0] ; t2 +|| ROTL $Te2[1],TBL2,$Te2[1] ; t3 +|| XOR $s[0],$Te2[2],$s[0] +|| XOR $s[1],$Te2[3],$s[1] +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] + ROTL $Te1[3],TBL1,$Te3[2] ; t2 +|| ROTL $Te3[2],TBL3,$Te1[3] ; t3 +|| XOR $s[0],$Te1[2],$s[0] +|| XOR $s[1],$Te3[3],$s[1] + XOR $s[2],$Te2[0],$s[2] +|| XOR $s[3],$Te2[1],$s[3] +|| XOR $s[0],$Te0[0],$s[0] +|| XOR $s[1],$Te0[1],$s[1] + XOR $s[2],$Te3[2],$s[2] +|| XOR $s[3],$Te1[3],$s[3] +|| EXTU $s[1],EXT1,24,$Te1[1] +|| EXTU $s[0],EXT3,24,$Te3[0] +||[!B0] ADD ${TEA},A0,${TEA} ; point to Te4 +||[!B0] ADD ${TEB},A0,${TEB} +;;==================================================================== + LDBU *${TEB}[$Te1[1]],$Te1[1] ; Te1[s1>>8], t0 +|| LDBU *${TEA}[$Te3[0]],$Te3[0] ; Te3[s0>>24], t1 +|| XOR $s[2],$Te0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Te0[3],$s[3] ; modulo-scheduled +|| EXTU $s[0],EXT0,24,$Te0[0] +|| EXTU $s[1],EXT0,24,$Te0[1] + LDBU *${TEA}[$Te0[0]],$Te0[0] ; Te0[s0], t0 +|| LDBU *${TEB}[$Te0[1]],$Te0[1] ; Te0[s1], t1 +|| EXTU $s[3],EXT3,24,$Te3[3] +|| EXTU $s[2],EXT1,24,$Te1[2] + LDBU *${TEB}[$Te3[3]],$Te3[3] ; Te3[s3>>24], t0 +|| LDBU *${TEA}[$Te1[2]],$Te1[2] ; Te1[s2>>8], t1 +|| EXTU $s[2],EXT2,24,$Te2[2] +|| EXTU $s[3],EXT2,24,$Te2[3] + LDBU *${TEA}[$Te2[2]],$Te2[2] ; Te2[s2>>16], t0 +|| LDBU *${TEB}[$Te2[3]],$Te2[3] ; Te2[s3>>16], t1 +|| EXTU $s[1],EXT3,24,$Te3[1] +|| EXTU $s[0],EXT1,24,$Te1[0] + LDBU *${TEB}[$Te3[1]],$Te3[1] ; Te3[s1>>24], t2 +|| LDBU *${TEA}[$Te1[0]],$Te1[0] ; Te1[s0>>8], t3 +|| EXTU $s[3],EXT1,24,$Te1[3] +|| EXTU $s[2],EXT3,24,$Te3[2] + LDBU *${TEB}[$Te1[3]],$Te1[3] ; Te1[s3>>8], t2 +|| LDBU *${TEA}[$Te3[2]],$Te3[2] ; Te3[s2>>24], t3 +|| EXTU $s[2],EXT0,24,$Te0[2] +|| EXTU $s[3],EXT0,24,$Te0[3] + LDBU *${TEA}[$Te0[2]],$Te0[2] ; Te0[s2], t2 +|| LDBU *${TEB}[$Te0[3]],$Te0[3] ; Te0[s3], t3 +|| EXTU $s[0],EXT2,24,$Te2[0] +|| EXTU $s[1],EXT2,24,$Te2[1] + LDBU *${TEA}[$Te2[0]],$Te2[0] ; Te2[s0>>16], t2 +|| LDBU *${TEB}[$Te2[1]],$Te2[1] ; Te2[s1>>16], t3 + + .if .BIG_ENDIAN + PACK2 $Te0[0],$Te1[1],$Te0[0] +|| PACK2 $Te0[1],$Te1[2],$Te0[1] + PACK2 $Te2[2],$Te3[3],$Te2[2] +|| PACK2 $Te2[3],$Te3[0],$Te2[3] + PACKL4 $Te0[0],$Te2[2],$Te0[0] +|| PACKL4 $Te0[1],$Te2[3],$Te0[1] + XOR $K[0],$Te0[0],$Te0[0] ; s[0] +|| XOR $K[1],$Te0[1],$Te0[1] ; s[1] + + PACK2 $Te0[2],$Te1[3],$Te0[2] +|| PACK2 $Te0[3],$Te1[0],$Te0[3] + PACK2 $Te2[0],$Te3[1],$Te2[0] +|| PACK2 $Te2[1],$Te3[2],$Te2[1] +|| BNOP RA + PACKL4 $Te0[2],$Te2[0],$Te0[2] +|| PACKL4 $Te0[3],$Te2[1],$Te0[3] + XOR $K[2],$Te0[2],$Te0[2] ; s[2] +|| XOR $K[3],$Te0[3],$Te0[3] ; s[3] + + MV $Te0[0],A9 +|| MV $Te0[1],A8 + MV $Te0[2],B9 +|| MV $Te0[3],B8 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .else + PACK2 $Te1[1],$Te0[0],$Te1[1] +|| PACK2 $Te1[2],$Te0[1],$Te1[2] + PACK2 $Te3[3],$Te2[2],$Te3[3] +|| PACK2 $Te3[0],$Te2[3],$Te3[0] + PACKL4 $Te3[3],$Te1[1],$Te1[1] +|| PACKL4 $Te3[0],$Te1[2],$Te1[2] + XOR $K[0],$Te1[1],$Te1[1] ; s[0] +|| XOR $K[1],$Te1[2],$Te1[2] ; s[1] + + PACK2 $Te1[3],$Te0[2],$Te1[3] +|| PACK2 $Te1[0],$Te0[3],$Te1[0] + PACK2 $Te3[1],$Te2[0],$Te3[1] +|| PACK2 $Te3[2],$Te2[1],$Te3[2] +|| BNOP RA + PACKL4 $Te3[1],$Te1[3],$Te1[3] +|| PACKL4 $Te3[2],$Te1[0],$Te1[0] + XOR $K[2],$Te1[3],$Te1[3] ; s[2] +|| XOR $K[3],$Te1[0],$Te1[0] ; s[3] + + MV $Te1[1],A8 +|| MV $Te1[2],A9 + MV $Te1[3],B8 +|| MV $Te1[0],B9 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .endif + .endasmfunc + + .global _AES_decrypt +_AES_decrypt: + .asmfunc + MVK 1,B2 +__decrypt: + .if __TI_EABI__ + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL \$PCR_OFFSET(AES_Td,__decrypt),$TEA +|| ADDKPC __decrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH \$PCR_OFFSET(AES_Td,__decrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + .else + [B2] LDNDW *INP++,A9:A8 ; load input +|| MVKL (AES_Td-__decrypt),$TEA +|| ADDKPC __decrypt,B0 + [B2] LDNDW *INP++,B9:B8 +|| MVKH (AES_Td-__decrypt),$TEA +|| ADD 0,KEY,$KPA +|| ADD 4,KEY,$KPB + .endif + LDW *$KPA++[2],$Td0[0] ; zero round key +|| LDW *$KPB++[2],$Td0[1] +|| MVK 60,A0 +|| ADD B0,$TEA,$TEA ; AES_Td + LDW *KEY[A0],B0 ; rounds +|| MVK 1024,A0 ; sizeof(AES_Td) + LDW *$KPA++[2],$Td0[2] +|| LDW *$KPB++[2],$Td0[3] +|| MV $TEA,$TEB + NOP + .if .BIG_ENDIAN + MV A9,$s[0] +|| MV A8,$s[1] +|| MV B9,$s[2] +|| MV B8,$s[3] + .else + MV A8,$s[0] +|| MV A9,$s[1] +|| MV B8,$s[2] +|| MV B9,$s[3] + .endif + XOR $Td0[0],$s[0],$s[0] +|| XOR $Td0[1],$s[1],$s[1] +|| LDW *$KPA++[2],$K[0] ; 1st round key +|| LDW *$KPB++[2],$K[1] + + LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] +|| EXTU $s[1],EXT3,24,$Td3[1] +|| EXTU $s[0],EXT1,24,$Td1[0] +|| SUB B0,1,B0 +;;==================================================================== +dec_loop?: + LDW *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0 +|| LDW *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1 +|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled +|| EXTU $s[1],EXT1,24,$Td1[1] +|| EXTU $s[0],EXT3,24,$Td3[0] + LDW *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2 +|| LDW *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3 +|| EXTU $s[2],EXT2,24,$Td2[2] +|| EXTU $s[3],EXT2,24,$Td2[3] + LDW *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0 +|| LDW *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1 +|| EXTU $s[3],EXT1,24,$Td1[3] +|| EXTU $s[2],EXT3,24,$Td3[2] + LDW *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0 +|| LDW *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1 +|| EXTU $s[0],EXT2,24,$Td2[0] +|| EXTU $s[1],EXT2,24,$Td2[1] + LDW *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2 +|| LDW *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3 +|| EXTU $s[3],EXT3,24,$Td3[3] +|| EXTU $s[2],EXT1,24,$Td1[2] + LDW *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2 +|| LDW *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3 +|| ROTL $Td3[1],TBL3,$Td1[0] ; t0 +|| ROTL $Td1[0],TBL1,$Td3[1] ; t1 +|| EXTU $s[0],EXT0,24,$Td0[0] +|| EXTU $s[1],EXT0,24,$Td0[1] + LDW *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0 +|| LDW *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1 +|| ROTL $Td1[1],TBL1,$Td3[0] ; t2 +|| ROTL $Td3[0],TBL3,$Td1[1] ; t3 +|| EXTU $s[2],EXT0,24,$Td0[2] +|| EXTU $s[3],EXT0,24,$Td0[3] +|| [B0] SUB B0,1,B0 + LDW *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2 +|| LDW *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3 +|| ROTL $Td2[2],TBL2,$Td2[2] ; t0 +|| ROTL $Td2[3],TBL2,$Td2[3] ; t1 +|| XOR $K[0],$Td1[0],$s[0] +|| XOR $K[1],$Td3[1],$s[1] +|| [B0] BNOP dec_loop? + ROTL $Td1[3],TBL1,$Td3[2] ; t0 +|| ROTL $Td3[2],TBL3,$Td1[3] ; t1 +|| XOR $K[2],$Td3[0],$s[2] +|| XOR $K[3],$Td1[1],$s[3] +|| LDW *$KPA++[2],$K[0] ; next round key +|| LDW *$KPB++[2],$K[1] + ROTL $Td2[0],TBL2,$Td2[0] ; t2 +|| ROTL $Td2[1],TBL2,$Td2[1] ; t3 +|| XOR $s[0],$Td2[2],$s[0] +|| XOR $s[1],$Td2[3],$s[1] +|| LDW *$KPA++[2],$K[2] +|| LDW *$KPB++[2],$K[3] + ROTL $Td3[3],TBL3,$Td1[2] ; t2 +|| ROTL $Td1[2],TBL1,$Td3[3] ; t3 +|| XOR $s[0],$Td3[2],$s[0] +|| XOR $s[1],$Td1[3],$s[1] + XOR $s[2],$Td2[0],$s[2] +|| XOR $s[3],$Td2[1],$s[3] +|| XOR $s[0],$Td0[0],$s[0] +|| XOR $s[1],$Td0[1],$s[1] + XOR $s[2],$Td1[2],$s[2] +|| XOR $s[3],$Td3[3],$s[3] +|| EXTU $s[1],EXT3,24,$Td3[1] +|| EXTU $s[0],EXT1,24,$Td1[0] +||[!B0] ADD ${TEA},A0,${TEA} ; point to Td4 +||[!B0] ADD ${TEB},A0,${TEB} +;;==================================================================== + LDBU *${TEB}[$Td3[1]],$Td3[1] ; Td3[s1>>24], t0 +|| LDBU *${TEA}[$Td1[0]],$Td1[0] ; Td1[s0>>8], t1 +|| XOR $s[2],$Td0[2],$s[2] ; modulo-scheduled +|| XOR $s[3],$Td0[3],$s[3] ; modulo-scheduled +|| EXTU $s[0],EXT0,24,$Td0[0] +|| EXTU $s[1],EXT0,24,$Td0[1] + LDBU *${TEA}[$Td0[0]],$Td0[0] ; Td0[s0], t0 +|| LDBU *${TEB}[$Td0[1]],$Td0[1] ; Td0[s1], t1 +|| EXTU $s[2],EXT2,24,$Td2[2] +|| EXTU $s[3],EXT2,24,$Td2[3] + LDBU *${TEA}[$Td2[2]],$Td2[2] ; Td2[s2>>16], t0 +|| LDBU *${TEB}[$Td2[3]],$Td2[3] ; Td2[s3>>16], t1 +|| EXTU $s[3],EXT1,24,$Td1[3] +|| EXTU $s[2],EXT3,24,$Td3[2] + LDBU *${TEB}[$Td1[3]],$Td1[3] ; Td1[s3>>8], t0 +|| LDBU *${TEA}[$Td3[2]],$Td3[2] ; Td3[s2>>24], t1 +|| EXTU $s[1],EXT1,24,$Td1[1] +|| EXTU $s[0],EXT3,24,$Td3[0] + LDBU *${TEB}[$Td1[1]],$Td1[1] ; Td1[s1>>8], t2 +|| LDBU *${TEA}[$Td3[0]],$Td3[0] ; Td3[s0>>24], t3 +|| EXTU $s[0],EXT2,24,$Td2[0] +|| EXTU $s[1],EXT2,24,$Td2[1] + LDBU *${TEA}[$Td2[0]],$Td2[0] ; Td2[s0>>16], t2 +|| LDBU *${TEB}[$Td2[1]],$Td2[1] ; Td2[s1>>16], t3 +|| EXTU $s[3],EXT3,24,$Td3[3] +|| EXTU $s[2],EXT1,24,$Td1[2] + LDBU *${TEB}[$Td3[3]],$Td3[3] ; Td3[s3>>24], t2 +|| LDBU *${TEA}[$Td1[2]],$Td1[2] ; Td1[s2>>8], t3 +|| EXTU $s[2],EXT0,24,$Td0[2] +|| EXTU $s[3],EXT0,24,$Td0[3] + LDBU *${TEA}[$Td0[2]],$Td0[2] ; Td0[s2], t2 +|| LDBU *${TEB}[$Td0[3]],$Td0[3] ; Td0[s3], t3 + + .if .BIG_ENDIAN + PACK2 $Td0[0],$Td1[3],$Td0[0] +|| PACK2 $Td0[1],$Td1[0],$Td0[1] + PACK2 $Td2[2],$Td3[1],$Td2[2] +|| PACK2 $Td2[3],$Td3[2],$Td2[3] + PACKL4 $Td0[0],$Td2[2],$Td0[0] +|| PACKL4 $Td0[1],$Td2[3],$Td0[1] + XOR $K[0],$Td0[0],$Td0[0] ; s[0] +|| XOR $K[1],$Td0[1],$Td0[1] ; s[1] + + PACK2 $Td0[2],$Td1[1],$Td0[2] +|| PACK2 $Td0[3],$Td1[2],$Td0[3] + PACK2 $Td2[0],$Td3[3],$Td2[0] +|| PACK2 $Td2[1],$Td3[0],$Td2[1] +|| BNOP RA + PACKL4 $Td0[2],$Td2[0],$Td0[2] +|| PACKL4 $Td0[3],$Td2[1],$Td0[3] + XOR $K[2],$Td0[2],$Td0[2] ; s[2] +|| XOR $K[3],$Td0[3],$Td0[3] ; s[3] + + MV $Td0[0],A9 +|| MV $Td0[1],A8 + MV $Td0[2],B9 +|| MV $Td0[3],B8 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .else + PACK2 $Td1[3],$Td0[0],$Td1[3] +|| PACK2 $Td1[0],$Td0[1],$Td1[0] + PACK2 $Td3[1],$Td2[2],$Td3[1] +|| PACK2 $Td3[2],$Td2[3],$Td3[2] + PACKL4 $Td3[1],$Td1[3],$Td1[3] +|| PACKL4 $Td3[2],$Td1[0],$Td1[0] + XOR $K[0],$Td1[3],$Td1[3] ; s[0] +|| XOR $K[1],$Td1[0],$Td1[0] ; s[1] + + PACK2 $Td1[1],$Td0[2],$Td1[1] +|| PACK2 $Td1[2],$Td0[3],$Td1[2] + PACK2 $Td3[3],$Td2[0],$Td3[3] +|| PACK2 $Td3[0],$Td2[1],$Td3[0] +|| BNOP RA + PACKL4 $Td3[3],$Td1[1],$Td1[1] +|| PACKL4 $Td3[0],$Td1[2],$Td1[2] + XOR $K[2],$Td1[1],$Td1[1] ; s[2] +|| XOR $K[3],$Td1[2],$Td1[2] ; s[3] + + MV $Td1[3],A8 +|| MV $Td1[0],A9 + MV $Td1[1],B8 +|| MV $Td1[2],B9 +|| [B2] STNDW A9:A8,*OUT++ + [B2] STNDW B9:B8,*OUT++ + .endif + .endasmfunc +___ +{ +my @K=(@K,@s); # extended key +my @Te4=map("B$_",(16..19)); + +my @Kx9=@Te0; # used in AES_set_decrypt_key +my @KxB=@Te1; +my @KxD=@Te2; +my @KxE=@Te3; + +$code.=<<___; + .asg OUT,BITS + + .global _AES_set_encrypt_key +_AES_set_encrypt_key: +__set_encrypt_key: + .asmfunc + MV INP,A0 +|| SHRU BITS,5,BITS ; 128-192-256 -> 4-6-8 +|| MV KEY,A1 + [!A0] B RA +||[!A0] MVK -1,RET +||[!A0] MVK 1,A1 ; only one B RA + [!A1] B RA +||[!A1] MVK -1,RET +||[!A1] MVK 0,A0 +|| MVK 0,B0 +|| MVK 0,A1 + [A0] LDNDW *INP++,A9:A8 +|| [A0] CMPEQ 4,BITS,B0 +|| [A0] CMPLT 3,BITS,A1 + [B0] B key128? +|| [A1] LDNDW *INP++,B9:B8 +|| [A0] CMPEQ 6,BITS,B0 +|| [A0] CMPLT 5,BITS,A1 + [B0] B key192? +|| [A1] LDNDW *INP++,B17:B16 +|| [A0] CMPEQ 8,BITS,B0 +|| [A0] CMPLT 7,BITS,A1 + [B0] B key256? +|| [A1] LDNDW *INP++,B19:B18 + + .if __TI_EABI__ + [A0] ADD 0,KEY,$KPA +|| [A0] ADD 4,KEY,$KPB +|| [A0] MVKL \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA +|| [A0] ADDKPC __set_encrypt_key,B6 + [A0] MVKH \$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA + [A0] ADD B6,$TEA,$TEA ; AES_Te4 + .else + [A0] ADD 0,KEY,$KPA +|| [A0] ADD 4,KEY,$KPB +|| [A0] MVKL (AES_Te4-__set_encrypt_key),$TEA +|| [A0] ADDKPC __set_encrypt_key,B6 + [A0] MVKH (AES_Te4-__set_encrypt_key),$TEA + [A0] ADD B6,$TEA,$TEA ; AES_Te4 + .endif + NOP + NOP + + BNOP RA,5 +|| MVK -2,RET ; unknown bit length +|| MVK 0,B0 ; redundant +;;==================================================================== +;;==================================================================== +key128?: + .if .BIG_ENDIAN + MV A9,$K[0] +|| MV A8,$K[1] +|| MV B9,$Te4[2] +|| MV B8,$K[3] + .else + MV A8,$K[0] +|| MV A9,$K[1] +|| MV B8,$Te4[2] +|| MV B9,$K[3] + .endif + + MVK 256,A0 +|| MVK 8,B0 + + MV $TEA,$TEB +|| ADD $TEA,A0,A30 ; rcon +;;==================================================================== +loop128?: + LDW *A30++[1],A31 ; rcon[i] +|| MV $Te4[2],$K[2] +|| EXTU $K[3],EXT1,24,$Te4[0] + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[3],A0 +|| EXTU $K[3],EXT2,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT3,24,A0 +|| EXTU $K[3],EXT0,24,$Te4[3] + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + .endif + + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + + XOR A31,$K[0],$K[0] ; ^=rcon[i] + .if .BIG_ENDIAN + PACK2 $Te4[0],$Te4[1],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] +|| BDEC loop128?,B0 + PACKL4 $Te4[1],$Te4[3],$Te4[3] + .else + PACK2 $Te4[1],$Te4[0],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] +|| BDEC loop128?,B0 + PACKL4 $Te4[3],$Te4[1],$Te4[3] + .endif + XOR $Te4[3],$K[0],$Te4[0] ; K[0] + XOR $Te4[0],$K[1],$K[1] ; K[1] + MV $Te4[0],$K[0] +|| XOR $K[1],$K[2],$Te4[2] ; K[2] + XOR $Te4[2],$K[3],$K[3] ; K[3] +;;==================================================================== + BNOP RA + MV $Te4[2],$K[2] +|| STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + MVK 10,B0 ; rounds + STW B0,*++${KPB}[15] + MVK 0,RET +;;==================================================================== +;;==================================================================== +key192?: + .if .BIG_ENDIAN + MV A9,$K[0] +|| MV A8,$K[1] +|| MV B9,$K[2] +|| MV B8,$K[3] + MV B17,$Te4[2] +|| MV B16,$K[5] + .else + MV A8,$K[0] +|| MV A9,$K[1] +|| MV B8,$K[2] +|| MV B9,$K[3] + MV B16,$Te4[2] +|| MV B17,$K[5] + .endif + + MVK 256,A0 +|| MVK 6,B0 + MV $TEA,$TEB +|| ADD $TEA,A0,A30 ; rcon +;;==================================================================== +loop192?: + LDW *A30++[1],A31 ; rcon[i] +|| MV $Te4[2],$K[4] +|| EXTU $K[5],EXT1,24,$Te4[0] + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[5],A0 +|| EXTU $K[5],EXT2,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT3,24,A0 +|| EXTU $K[5],EXT0,24,$Te4[3] + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + .endif + + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + STW $K[4],*$KPA++[2] +|| STW $K[5],*$KPB++[2] + + XOR A31,$K[0],$K[0] ; ^=rcon[i] + .if .BIG_ENDIAN + PACK2 $Te4[0],$Te4[1],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[1],$Te4[3],$Te4[3] + .else + PACK2 $Te4[1],$Te4[0],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[3],$Te4[1],$Te4[3] + .endif + BDEC loop192?,B0 +|| XOR $Te4[3],$K[0],$Te4[0] ; K[0] + XOR $Te4[0],$K[1],$K[1] ; K[1] + MV $Te4[0],$K[0] +|| XOR $K[1],$K[2],$Te4[2] ; K[2] + XOR $Te4[2],$K[3],$K[3] ; K[3] + MV $Te4[2],$K[2] +|| XOR $K[3],$K[4],$Te4[2] ; K[4] + XOR $Te4[2],$K[5],$K[5] ; K[5] +;;==================================================================== + BNOP RA + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + MVK 12,B0 ; rounds + STW B0,*++${KPB}[7] + MVK 0,RET +;;==================================================================== +;;==================================================================== +key256?: + .if .BIG_ENDIAN + MV A9,$K[0] +|| MV A8,$K[1] +|| MV B9,$K[2] +|| MV B8,$K[3] + MV B17,$K[4] +|| MV B16,$K[5] +|| MV B19,$Te4[2] +|| MV B18,$K[7] + .else + MV A8,$K[0] +|| MV A9,$K[1] +|| MV B8,$K[2] +|| MV B9,$K[3] + MV B16,$K[4] +|| MV B17,$K[5] +|| MV B18,$Te4[2] +|| MV B19,$K[7] + .endif + + MVK 256,A0 +|| MVK 6,B0 + MV $TEA,$TEB +|| ADD $TEA,A0,A30 ; rcon +;;==================================================================== +loop256?: + LDW *A30++[1],A31 ; rcon[i] +|| MV $Te4[2],$K[6] +|| EXTU $K[7],EXT1,24,$Te4[0] + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[7],A0 +|| EXTU $K[7],EXT2,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT3,24,A0 +|| EXTU $K[7],EXT0,24,$Te4[3] + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + .endif + + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + STW $K[4],*$KPA++[2] +|| STW $K[5],*$KPB++[2] + STW $K[6],*$KPA++[2] +|| STW $K[7],*$KPB++[2] +|| XOR A31,$K[0],$K[0] ; ^=rcon[i] + .if .BIG_ENDIAN + PACK2 $Te4[0],$Te4[1],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[1],$Te4[3],$Te4[3] +||[!B0] B done256? + .else + PACK2 $Te4[1],$Te4[0],$Te4[1] +|| PACK2 $Te4[3],A0,$Te4[3] + PACKL4 $Te4[3],$Te4[1],$Te4[3] +||[!B0] B done256? + .endif + XOR $Te4[3],$K[0],$Te4[0] ; K[0] + XOR $Te4[0],$K[1],$K[1] ; K[1] + MV $Te4[0],$K[0] +|| XOR $K[1],$K[2],$Te4[2] ; K[2] + XOR $Te4[2],$K[3],$K[3] ; K[3] + + MV $Te4[2],$K[2] +|| [B0] EXTU $K[3],EXT0,24,$Te4[0] +|| [B0] SUB B0,1,B0 + LDBU *${TEB}[$Te4[0]],$Te4[0] +|| MV $K[3],A0 +|| EXTU $K[3],EXT1,24,$Te4[1] + LDBU *${TEB}[$Te4[1]],$Te4[1] +|| EXTU A0,EXT2,24,A0 +|| EXTU $K[3],EXT3,24,$Te4[3] + + .if .BIG_ENDIAN + LDBU *${TEA}[A0],$Te4[3] +|| LDBU *${TEB}[$Te4[3]],A0 + NOP 3 + PACK2 $Te4[0],$Te4[1],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] +|| B loop256? + PACKL4 $Te4[1],$Te4[3],$Te4[3] + .else + LDBU *${TEA}[A0],A0 +|| LDBU *${TEB}[$Te4[3]],$Te4[3] + NOP 3 + PACK2 $Te4[1],$Te4[0],$Te4[1] + PACK2 $Te4[3],A0,$Te4[3] +|| B loop256? + PACKL4 $Te4[3],$Te4[1],$Te4[3] + .endif + + XOR $Te4[3],$K[4],$Te4[0] ; K[4] + XOR $Te4[0],$K[5],$K[5] ; K[5] + MV $Te4[0],$K[4] +|| XOR $K[5],$K[6],$Te4[2] ; K[6] + XOR $Te4[2],$K[7],$K[7] ; K[7] +;;==================================================================== +done256?: + BNOP RA + STW $K[0],*$KPA++[2] +|| STW $K[1],*$KPB++[2] + STW $K[2],*$KPA++[2] +|| STW $K[3],*$KPB++[2] + MVK 14,B0 ; rounds + STW B0,*--${KPB}[1] + MVK 0,RET + .endasmfunc + + .global _AES_set_decrypt_key +_AES_set_decrypt_key: + .asmfunc + B __set_encrypt_key ; guarantee local call + MV KEY,B30 ; B30 is not modified + MV RA, B31 ; B31 is not modified + ADDKPC ret?,RA,2 +ret?: ; B0 holds rounds or zero + [!B0] BNOP B31 ; return if zero + [B0] SHL B0,4,A0 ; offset to last round key + [B0] SHRU B0,1,B2 + [B0] SUB B2,2,B2 +|| [B0] MVK 0x0000001B,B3 ; AES polynomial + [B0] MVKH 0x07000000,B3 +|| [B0] MV B30,$KPA + [B0] ADD B30,A0,$KPB +|| [B0] MVK 16,A0 ; sizeof(round key) +;;==================================================================== +flip_loop?: + LDW *${KPA}[0],A16 +|| LDW *${KPB}[0],B16 + LDW *${KPA}[1],A17 +|| LDW *${KPB}[1],B17 + LDW *${KPA}[2],A18 +|| LDW *${KPB}[2],B18 + LDW *${KPA}[3],A19 +|| ADD $KPA,A0,$KPA +|| LDW *${KPB}[3],B19 +|| SUB $KPB,A0,$KPB +|| BDEC flip_loop?,B2 + NOP + STW B16,*${KPA}[-4] +|| STW A16,*${KPB}[4] + STW B17,*${KPA}[-3] +|| STW A17,*${KPB}[5] + STW B18,*${KPA}[-2] +|| STW A18,*${KPB}[6] + STW B19,*${KPA}[-1] +|| STW A19,*${KPB}[7] +;;==================================================================== + SUB B0,1,B0 ; skip last round +|| ADD B30,A0,$KPA ; skip first round +|| ADD B30,A0,$KPB +|| MVC GFPGFR,B30 ; save GFPGFR + LDW *${KPA}[0],$K[0] +|| LDW *${KPB}[1],$K[1] +|| MVC B3,GFPGFR + LDW *${KPA}[2],$K[2] +|| LDW *${KPB}[3],$K[3] + MVK 0x00000909,A24 +|| MVK 0x00000B0B,B24 + MVKH 0x09090000,A24 +|| MVKH 0x0B0B0000,B24 + SUB B0,1,B0 + + GMPY4 $K[0],A24,$Kx9[0] ; ·0x09 +|| GMPY4 $K[1],A24,$Kx9[1] +|| MVK 0x00000D0D,A25 +|| MVK 0x00000E0E,B25 + GMPY4 $K[2],A24,$Kx9[2] +|| GMPY4 $K[3],A24,$Kx9[3] +|| MVKH 0x0D0D0000,A25 +|| MVKH 0x0E0E0000,B25 + + GMPY4 $K[0],B24,$KxB[0] ; ·0x0B +|| GMPY4 $K[1],B24,$KxB[1] + GMPY4 $K[2],B24,$KxB[2] +|| GMPY4 $K[3],B24,$KxB[3] + +;;==================================================================== +invmix_loop?: + GMPY4 $K[0],A25,$KxD[0] ; ·0x0D +|| GMPY4 $K[1],A25,$KxD[1] +|| SWAP2 $Kx9[0],$Kx9[0] ; rotate by 16 +|| SWAP2 $Kx9[1],$Kx9[1] +|| MV $K[0],$s[0] ; this or DINT +|| MV $K[1],$s[1] +|| [B0] LDW *${KPA}[4],$K[0] +|| [B0] LDW *${KPB}[5],$K[1] + GMPY4 $K[2],A25,$KxD[2] +|| GMPY4 $K[3],A25,$KxD[3] +|| SWAP2 $Kx9[2],$Kx9[2] +|| SWAP2 $Kx9[3],$Kx9[3] +|| MV $K[2],$s[2] +|| MV $K[3],$s[3] +|| [B0] LDW *${KPA}[6],$K[2] +|| [B0] LDW *${KPB}[7],$K[3] + + GMPY4 $s[0],B25,$KxE[0] ; ·0x0E +|| GMPY4 $s[1],B25,$KxE[1] +|| XOR $Kx9[0],$KxB[0],$KxB[0] +|| XOR $Kx9[1],$KxB[1],$KxB[1] + GMPY4 $s[2],B25,$KxE[2] +|| GMPY4 $s[3],B25,$KxE[3] +|| XOR $Kx9[2],$KxB[2],$KxB[2] +|| XOR $Kx9[3],$KxB[3],$KxB[3] + + ROTL $KxB[0],TBL3,$KxB[0] +|| ROTL $KxB[1],TBL3,$KxB[1] +|| SWAP2 $KxD[0],$KxD[0] ; rotate by 16 +|| SWAP2 $KxD[1],$KxD[1] + ROTL $KxB[2],TBL3,$KxB[2] +|| ROTL $KxB[3],TBL3,$KxB[3] +|| SWAP2 $KxD[2],$KxD[2] +|| SWAP2 $KxD[3],$KxD[3] +|| [B0] B invmix_loop? + + XOR $KxE[0],$KxD[0],$KxE[0] +|| XOR $KxE[1],$KxD[1],$KxE[1] +|| [B0] GMPY4 $K[0],A24,$Kx9[0] ; ·0x09 +|| [B0] GMPY4 $K[1],A24,$Kx9[1] +|| ADDAW $KPA,4,$KPA + XOR $KxE[2],$KxD[2],$KxE[2] +|| XOR $KxE[3],$KxD[3],$KxE[3] +|| [B0] GMPY4 $K[2],A24,$Kx9[2] +|| [B0] GMPY4 $K[3],A24,$Kx9[3] +|| ADDAW $KPB,4,$KPB + + XOR $KxB[0],$KxE[0],$KxE[0] +|| XOR $KxB[1],$KxE[1],$KxE[1] +|| [B0] GMPY4 $K[0],B24,$KxB[0] ; ·0x0B +|| [B0] GMPY4 $K[1],B24,$KxB[1] + XOR $KxB[2],$KxE[2],$KxE[2] +|| XOR $KxB[3],$KxE[3],$KxE[3] +|| [B0] GMPY4 $K[2],B24,$KxB[2] +|| [B0] GMPY4 $K[3],B24,$KxB[3] +|| STW $KxE[0],*${KPA}[-4] +|| STW $KxE[1],*${KPB}[-3] + STW $KxE[2],*${KPA}[-2] +|| STW $KxE[3],*${KPB}[-1] +|| [B0] SUB B0,1,B0 +;;==================================================================== + BNOP B31,3 + MVC B30,GFPGFR ; restore GFPGFR(*) + MVK 0,RET + .endasmfunc +___ +# (*) Even though ABI doesn't specify GFPGFR as non-volatile, there +# are code samples out there that *assume* its default value. +} +{ +my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8"); +$code.=<<___; + .global _AES_ctr32_encrypt +_AES_ctr32_encrypt: + .asmfunc + LDNDW *${ivp}[0],A31:A30 ; load counter value +|| MV $blocks,A2 ; reassign $blocks +|| MV RA,B27 ; reassign RA +|| MV $key,B26 ; reassign $key + LDNDW *${ivp}[1],B31:B30 +|| MVK 0,B2 ; don't let __encrypt load input +|| MVK 0,A1 ; and postpone writing output + .if .BIG_ENDIAN + NOP + .else + NOP 4 + SWAP2 B31,B31 ; keep least significant 32 bits + SWAP4 B31,B31 ; in host byte order + .endif +ctr32_loop?: + [A2] BNOP __encrypt +|| [A1] XOR A29,A9,A9 ; input^Ek(counter) +|| [A1] XOR A28,A8,A8 +|| [A2] LDNDW *INP++,A29:A28 ; load input + [!A2] BNOP B27 ; return +|| [A1] XOR B29,B9,B9 +|| [A1] XOR B28,B8,B8 +|| [A2] LDNDW *INP++,B29:B28 + .if .BIG_ENDIAN + [A1] STNDW A9:A8,*OUT++ ; save output +|| [A2] MV A31,A9 ; pass counter value to __encrypt +|| [A2] MV A30,A8 ; pass counter value to __encrypt + [A1] STNDW B9:B8,*OUT++ +|| [A2] DMV B31,B30,B9:B8 +|| [A2] ADD B30,1,B30 ; counter++ + .else + [A1] STNDW A9:A8,*OUT++ ; save output +|| [A2] MV A31,A9 +|| [A2] MV A30,A8 +|| [A2] SWAP2 B31,B0 +|| [A2] ADD B31,1,B31 ; counter++ + [A1] STNDW B9:B8,*OUT++ +|| [A2] MV B30,B8 +|| [A2] SWAP4 B0,B9 + .endif + [A2] ADDKPC ctr32_loop?,RA ; return to ctr32_loop? +|| [A2] MV B26,KEY ; pass $key +|| [A2] SUB A2,1,A2 ; $blocks-- +||[!A1] MVK 1,A1 + NOP + NOP + .endasmfunc +___ +} +# Tables are kept in endian-neutral manner +$code.=<<___; + .if __TI_EABI__ + .sect ".text:aes_asm.const" + .else + .sect ".const:aes_asm" + .endif + .align 128 +AES_Te: + .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 + .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d + .byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd + .byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54 + .byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03 + .byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d + .byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62 + .byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a + .byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d + .byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87 + .byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb + .byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b + .byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67 + .byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea + .byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7 + .byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b + .byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c + .byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a + .byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41 + .byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f + .byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4 + .byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08 + .byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73 + .byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f + .byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52 + .byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e + .byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1 + .byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5 + .byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36 + .byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d + .byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69 + .byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f + .byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e + .byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e + .byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2 + .byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb + .byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d + .byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce + .byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e + .byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97 + .byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68 + .byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c + .byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f + .byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed + .byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46 + .byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b + .byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4 + .byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a + .byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a + .byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16 + .byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7 + .byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94 + .byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10 + .byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81 + .byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44 + .byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3 + .byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe + .byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a + .byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc + .byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04 + .byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1 + .byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63 + .byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a + .byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d + .byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14 + .byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f + .byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2 + .byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39 + .byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2 + .byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47 + .byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7 + .byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95 + .byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98 + .byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f + .byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e + .byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83 + .byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29 + .byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c + .byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2 + .byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76 + .byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56 + .byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e + .byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a + .byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4 + .byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e + .byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6 + .byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4 + .byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b + .byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43 + .byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7 + .byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64 + .byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0 + .byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa + .byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25 + .byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e + .byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18 + .byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88 + .byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72 + .byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1 + .byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51 + .byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c + .byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21 + .byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc + .byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85 + .byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42 + .byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa + .byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05 + .byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12 + .byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f + .byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0 + .byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58 + .byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9 + .byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13 + .byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33 + .byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70 + .byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7 + .byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22 + .byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20 + .byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff + .byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a + .byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8 + .byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17 + .byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31 + .byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8 + .byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0 + .byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11 + .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc + .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a +AES_Te4: + .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 + .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 + .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 + .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 + .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc + .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 + .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a + .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 + .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 + .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 + .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b + .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf + .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 + .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 + .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 + .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 + .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 + .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 + .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 + .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb + .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c + .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 + .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 + .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 + .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 + .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a + .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e + .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e + .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 + .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf + .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 + .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +rcon: + .byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 + .byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 + .byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 + .byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 + .byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 + .align 128 +AES_Td: + .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 + .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 + .byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1 + .byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93 + .byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6 + .byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25 + .byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7 + .byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f + .byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67 + .byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1 + .byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12 + .byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6 + .byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95 + .byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda + .byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3 + .byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44 + .byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78 + .byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd + .byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17 + .byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4 + .byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82 + .byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45 + .byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84 + .byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94 + .byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19 + .byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7 + .byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2 + .byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a + .byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03 + .byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5 + .byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2 + .byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c + .byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92 + .byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1 + .byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5 + .byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a + .byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0 + .byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75 + .byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa + .byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51 + .byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d + .byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46 + .byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05 + .byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff + .byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97 + .byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77 + .byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88 + .byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb + .byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9 + .byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00 + .byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48 + .byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e + .byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56 + .byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27 + .byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21 + .byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a + .byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f + .byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e + .byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2 + .byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16 + .byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5 + .byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d + .byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad + .byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8 + .byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c + .byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd + .byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc + .byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34 + .byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc + .byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63 + .byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10 + .byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20 + .byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8 + .byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d + .byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3 + .byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0 + .byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99 + .byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22 + .byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a + .byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef + .byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1 + .byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36 + .byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28 + .byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4 + .byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d + .byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62 + .byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8 + .byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5 + .byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c + .byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3 + .byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7 + .byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b + .byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4 + .byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8 + .byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e + .byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6 + .byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce + .byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6 + .byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31 + .byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0 + .byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6 + .byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15 + .byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7 + .byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f + .byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d + .byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf + .byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b + .byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f + .byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d + .byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e + .byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52 + .byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13 + .byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a + .byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89 + .byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35 + .byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c + .byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f + .byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf + .byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b + .byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86 + .byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e + .byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f + .byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c + .byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41 + .byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde + .byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90 + .byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70 + .byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 +AES_Td4: + .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + .cstring "AES for C64x, CRYPTOGAMS by " + .align 4 +___ + +print $code; +close STDOUT; diff --git a/crypto/c64xcpuid.pl b/crypto/c64xcpuid.pl new file mode 100644 index 0000000000..88fd153b98 --- /dev/null +++ b/crypto/c64xcpuid.pl @@ -0,0 +1,326 @@ +#! /usr/bin/env perl +# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$code.=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .asg OPENSSL_rdtsc,_OPENSSL_rdtsc + .asg OPENSSL_cleanse,_OPENSSL_cleanse + .asg CRYPTO_memcmp,_CRYPTO_memcmp + .asg OPENSSL_atomic_add,_OPENSSL_atomic_add + .asg OPENSSL_wipe_cpu,_OPENSSL_wipe_cpu + .asg OPENSSL_instrument_bus,_OPENSSL_instrument_bus + .asg OPENSSL_instrument_bus2,_OPENSSL_instrument_bus2 + .endif + + .asg B3,RA + .asg 0x01AC0000,TIMER_BASE ; Timer 2 + + .global _OPENSSL_rdtsc +_OPENSSL_rdtsc: + .asmfunc + MVKL TIMER_BASE,A5 + MVKH TIMER_BASE,A5 + LDW *A5[0],A2 ; load CTL + LDW *A5[2],A4 ; load CTN + NOP 2 + .if .BIG_ENDIAN + MVK 0x2c0,A7 ; internal clock source, don't hold, go +|| MVK -1,A6 ; maximum period + .else + MVK 0x2c0,A6 ; internal clock source, don't hold, go +|| MVK -1,A7 ; maximum period + .endif + [!A2] STDW A7:A6,*A5[0] ; fire it up +|| BNOP RA,5 + .endasmfunc + + .global _OPENSSL_cleanse +_OPENSSL_cleanse: + .asmfunc + ZERO A3:A2 +|| ZERO B2 +|| SHRU B4,3,B0 ; is length >= 8 +|| ADD 1,A4,B6 + [!B0] BNOP RA +|| [B0] SUB B0,1,B2 +|| ZERO A1 +|| ZERO B1 + [B2] BDEC cleanse_loop?,B2 +||[!B0] CMPLT 0,B4,A1 +||[!B0] CMPLT 1,B4,B1 +|| ZERO B5 + [A1] STB A2,*A4++[2] +|| [B1] STB B5,*B6++[2] +|| [B2] BDEC cleanse_loop?,B2 +||[!B0] CMPLT 2,B4,A1 +||[!B0] CMPLT 3,B4,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B5,*B6++[2] +|| [B2] BDEC cleanse_loop?,B2 +||[!B0] CMPLT 4,B4,A1 +||[!B0] CMPLT 5,B4,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B5,*B6++[2] +|| [B2] BDEC cleanse_loop?,B2 +||[!B0] CMPLT 6,B4,A1 + [A1] STB A2,*A4++[2] +|| [B2] BDEC cleanse_loop?,B2 + +cleanse_loop?: + STNDW A3:A2,*A4++ +|| SUB B4,8,B4 +|| [B2] BDEC cleanse_loop?,B2 + + MV B4,B0 ; remaining bytes +|| ADD 1,A4,B6 +|| BNOP RA + [B0] CMPLT 0,B0,A1 +|| [B0] CMPLT 1,B0,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B5,*B6++[2] +|| [B0] CMPLT 2,B0,A1 +|| [B0] CMPLT 3,B0,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B5,*B6++[2] +|| [B0] CMPLT 4,B0,A1 +|| [B0] CMPLT 5,B0,B1 + [A1] STB A2,*A4++[2] +|| [B1] STB B5,*B6++[2] +|| [B0] CMPLT 6,B0,A1 + [A1] STB A2,*A4++[2] + .endasmfunc + + .if 0 + .global _CRYPTO_memcmp +_CRYPTO_memcmp: + .asmfunc + MV A6,B0 + [!B0] BNOP RA +||[!B0] ZERO A4 +|| [B0] ZERO A1:A0 + [B0] LDBU *A4++,A5 +|| [B0] LDBU *B4++,B5 +|| [B0] BDEC memcmp_loop?,B0 + [B0] LDBU *A4++,A5 +|| [B0] LDBU *B4++,B5 +|| [B0] BDEC memcmp_loop?,B0 + [B0] LDBU *A4++,A5 +|| [B0] LDBU *B4++,B5 +|| [B0] BDEC memcmp_loop?,B0 + [B0] LDBU *A4++,A5 +|| [B0] LDBU *B4++,B5 +|| [B0] BDEC memcmp_loop?,B0 + [B0] LDBU *A4++,A5 +|| [B0] LDBU *B4++,B5 +|| [B0] BDEC memcmp_loop?,B0 + XOR A5,B5,A1 +|| [B0] LDBU *A4++,A5 +|| [B0] LDBU *B4++,B5 +|| [B0] BDEC memcmp_loop?,B0 + +memcmp_loop?: + OR A1,A0,A0 +|| XOR A5,B5,A1 +|| [B0] LDBU *A4++,A5 +|| [B0] LDBU *B4++,B5 +|| [B0] BDEC memcmp_loop?,B0 + + BNOP RA,3 + ZERO A4 + [A0] MVK 1,A4 + .endasmfunc + .endif + + .global _OPENSSL_atomic_add +_OPENSSL_atomic_add: + .asmfunc + BNOP atomic_store? ; pre-C64x+ systems are uni-processor, it's +|| LDW *A4,B5 ; enough to hold interrupts off through + ; the load-update-store cycle to achieve + ; atomicity + NOP + BNOP RA,3 ; and this branch stretches even over store + ADD B4,B5,B5 +atomic_store?: + STW B5,*A4 +|| MV B5,A4 + .endasmfunc + + .global _OPENSSL_wipe_cpu +_OPENSSL_wipe_cpu: + .asmfunc + ZERO A0 +|| ZERO B0 +|| ZERO A1 +|| ZERO B1 + ZERO A3:A2 +|| MVD B0,B2 +|| ZERO A4 +|| ZERO B4 +|| ZERO A5 +|| ZERO B5 +|| BNOP RA + ZERO A7:A6 +|| ZERO B7:B6 +|| ZERO A8 +|| ZERO B8 +|| ZERO A9 +|| ZERO B9 + ZERO A17:A16 +|| ZERO B17:B16 +|| ZERO A18 +|| ZERO B18 +|| ZERO A19 +|| ZERO B19 + ZERO A21:A20 +|| ZERO B21:B20 +|| ZERO A22 +|| ZERO B22 +|| ZERO A23 +|| ZERO B23 + ZERO A25:A24 +|| ZERO B25:B24 +|| ZERO A26 +|| ZERO B26 +|| ZERO A27 +|| ZERO B27 + ZERO A29:A28 +|| ZERO B29:B28 +|| ZERO A30 +|| ZERO B30 +|| ZERO A31 +|| ZERO B31 + .endasmfunc + +CLFLUSH .macro CONTROL,ADDR,LEN + B passthrough? +|| STW ADDR,*CONTROL[0] + STW LEN,*CONTROL[1] +spinlock?: + LDW *CONTROL[1],A0 + NOP 3 +passthrough?: + NOP + [A0] BNOP spinlock?,5 + .endm + + .global _OPENSSL_instrument_bus +_OPENSSL_instrument_bus: + .asmfunc + MV B4,B0 ; reassign sizeof(output) +|| MV A4,B4 ; reassign output +|| MVK 0x00004030,A3 +|| MVKL TIMER_BASE,B16 + MV B0,A4 ; return value +|| MVK 1,A1 +|| MVKH 0x01840000,A3 ; L1DWIBAR +|| MVKH TIMER_BASE,B16 + LDW *B16[2],B8 ; collect 1st tick +|| MVK 0x00004010,A5 + NOP 4 + MV B8,B9 ; lasttick = tick +|| MVK 0,B7 ; lastdiff = 0 +|| MVKH 0x01840000,A5 ; L2WIBAR + CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line + CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line + LDW *B4,B5 + NOP 4 + ADD B7,B5,B5 + STW B5,*B4 +bus_loop1?: + LDW *B16[2],B8 +|| [B0] SUB B0,1,B0 + NOP 4 + SUB B8,B9,B7 ; lastdiff = tick - lasttick +|| MV B8,B9 ; lasttick = tick + CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line + CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line + LDW *B4,B5 + NOP 4 + ADD B7,B5,B5 + STW B5,*B4 ; [!B1] is removed to flatten samples +|| ADDK 4,B4 +|| [B0] BNOP bus_loop1?,5 + + BNOP RA,5 + .endasmfunc + + .global _OPENSSL_instrument_bus2 +_OPENSSL_instrument_bus2: + .asmfunc + MV A6,B0 ; reassign max +|| MV B4,A6 ; reassing sizeof(output) +|| MVK 0x00004030,A3 +|| MVKL TIMER_BASE,B16 + MV A4,B4 ; reassign output +|| MVK 0,A4 ; return value +|| MVK 1,A1 +|| MVKH 0x01840000,A3 ; L1DWIBAR +|| MVKH TIMER_BASE,B16 + + LDW *B16[2],B8 ; collect 1st tick +|| MVK 0x00004010,A5 + NOP 4 + MV B8,B9 ; lasttick = tick +|| MVK 0,B7 ; lastdiff = 0 +|| MVKH 0x01840000,A5 ; L2WIBAR + CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line + CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line + LDW *B4,B5 + NOP 4 + ADD B7,B5,B5 + STW B5,*B4 + + LDW *B16[2],B8 ; collect 1st diff + NOP 4 + SUB B8,B9,B7 ; lastdiff = tick - lasttick +|| MV B8,B9 ; lasttick = tick +|| SUB B0,1,B0 +bus_loop2?: + CLFLUSH A3,B4,A1 ; write-back and invalidate L1D line + CLFLUSH A5,B4,A1 ; write-back and invalidate L2 line + LDW *B4,B5 + NOP 4 + ADD B7,B5,B5 + STW B5,*B4 ; [!B1] is removed to flatten samples +||[!B0] BNOP bus_loop2_done?,2 +|| SUB B0,1,B0 + LDW *B16[2],B8 + NOP 4 + SUB B8,B9,B8 +|| MV B8,B9 + CMPEQ B8,B7,B2 +|| MV B8,B7 + [!B2] ADDAW B4,1,B4 +||[!B2] ADDK 1,A4 + CMPEQ A4,A6,A2 + [!A2] BNOP bus_loop2?,5 + +bus_loop2_done?: + BNOP RA,5 + .endasmfunc + + .if __TI_EABI__ + .sect ".init_array" + .else + .sect ".pinit" + .endif + .align 4 + .long _OPENSSL_rdtsc ; auto-start timer +___ + +print $code; +close STDOUT; diff --git a/crypto/sha/asm/sha1-c64x-large.pl b/crypto/sha/asm/sha1-c64x-large.pl new file mode 100644 index 0000000000..3916ff3a3f --- /dev/null +++ b/crypto/sha/asm/sha1-c64x-large.pl @@ -0,0 +1,230 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA1 for C64x. +# +# November 2016 +# +# This is fully-unrolled SHA1 implementation. It's 25% faster than +# one with compact loops, doesn't use in-memory ring buffer, as +# everything is accomodated in registers, and has "perfect" interrupt +# agility. Drawback is obviously the code size... + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($CTX,$INP,$NUM) = ("A4","B4","A6"); # arguments + +($A,$B,$C,$D,$E, $Arot,$F,$F0,$K) = map("A$_",(16..20, 21..24)); +@V = ($A,$B,$C,$D,$E); +@X = map("B$_",(16..31)); +($Actx,$Bctx,$Cctx,$Dctx,$Ectx) = map("A$_",(3,6..9)); # zaps $NUM + +sub BODY_00_19 { +my ($i,$a,$b,$c,$d,$e) = @_; +my $j = ($i+1)&15; + +$code.=<<___ if ($i<14); + ROTL $a,5,$Arot ;; $i +|| AND $c,$b,$F +|| ANDN $d,$b,$F0 +|| ADD $K,$e,$e ; E+=K +|| LDNW *${INP}++,@X[$i+2] + OR $F0,$F,$F ; F_00_19(B,C,D) +|| ROTL $b,30,$b +|| SWAP2 @X[$i+1],@X[$i+1] +|| ADD @X[$i],$e,$e ; E+=X[i] + ADD $Arot,$e,$e ; E+=rot(A,5) +|| SWAP4 @X[$i+1],@X[$i+1] + ADD $F,$e,$e ; E+=F_00_19(B,C,D) +___ +$code.=<<___ if ($i==14); + ROTL $a,5,$Arot ;; $i +|| AND $c,$b,$F +|| ANDN $d,$b,$F0 +|| ADD $K,$e,$e ; E+=K + OR $F0,$F,$F ; F_00_19(B,C,D) +|| ROTL $b,30,$b +|| ADD @X[$i],$e,$e ; E+=X[i] +|| SWAP2 @X[$i+1],@X[$i+1] + ADD $Arot,$e,$e ; E+=rot(A,5) +|| SWAP4 @X[$i+1],@X[$i+1] + ADD $F,$e,$e ; E+=F_00_19(B,C,D) +___ +$code.=<<___ if ($i==15); +|| XOR @X[($j+2)&15],@X[$j],@X[$j] + ROTL $a,5,$Arot ;; $i +|| AND $c,$b,$F +|| ANDN $d,$b,$F0 +|| ADD $K,$e,$e ; E+=K +|| XOR @X[($j+8)&15],@X[$j],@X[$j] + OR $F0,$F,$F ; F_00_19(B,C,D) +|| ROTL $b,30,$b +|| ADD @X[$i],$e,$e ; E+=X[i] +|| XOR @X[($j+13)&15],@X[$j],@X[$j] + ADD $Arot,$e,$e ; E+=rot(A,5) +|| ROTL @X[$j],1,@X[$j] + ADD $F,$e,$e ; E+=F_00_19(B,C,D) +___ +$code.=<<___ if ($i>15); +|| XOR @X[($j+2)&15],@X[$j],@X[$j] + ROTL $a,5,$Arot ;; $i +|| AND $c,$b,$F +|| ANDN $d,$b,$F0 +|| ADD $K,$e,$e ; E+=K +|| XOR @X[($j+8)&15],@X[$j],@X[$j] + OR $F0,$F,$F ; F_00_19(B,C,D) +|| ROTL $b,30,$b +|| ADD @X[$i&15],$e,$e ; E+=X[i] +|| XOR @X[($j+13)&15],@X[$j],@X[$j] + ADD $Arot,$e,$e ; E+=rot(A,5) +|| ROTL @X[$j],1,@X[$j] + ADD $F,$e,$e ; E+=F_00_19(B,C,D) +___ +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e) = @_; +my $j = ($i+1)&15; + +$code.=<<___ if ($i<79); +|| XOR @X[($j+2)&15],@X[$j],@X[$j] + ROTL $a,5,$Arot ;; $i +|| XOR $c,$b,$F +|| ADD $K,$e,$e ; E+=K +|| XOR @X[($j+8)&15],@X[$j],@X[$j] + XOR $d,$F,$F ; F_20_39(B,C,D) +|| ROTL $b,30,$b +|| ADD @X[$i&15],$e,$e ; E+=X[i] +|| XOR @X[($j+13)&15],@X[$j],@X[$j] + ADD $Arot,$e,$e ; E+=rot(A,5) +|| ROTL @X[$j],1,@X[$j] + ADD $F,$e,$e ; E+=F_20_39(B,C,D) +___ +$code.=<<___ if ($i==79); +|| [A0] B loop? +|| [A0] LDNW *${INP}++,@X[0] ; pre-fetch input + ROTL $a,5,$Arot ;; $i +|| XOR $c,$b,$F +|| ADD $K,$e,$e ; E+=K +|| [A0] LDNW *${INP}++,@X[1] + XOR $d,$F,$F ; F_20_39(B,C,D) +|| ROTL $b,30,$b +|| ADD @X[$i&15],$e,$e ; E+=X[i] + ADD $Arot,$e,$e ; E+=rot(A,5) + ADD $F,$e,$e ; E+=F_20_39(B,C,D) +|| ADD $Bctx,$a,$a ; accumulate context +|| ADD $Cctx,$b,$b + ADD $Dctx,$c,$c +|| ADD $Ectx,$d,$d +|| ADD $Actx,$e,$e +;;===== branch to loop? is taken here +___ +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e) = @_; +my $j = ($i+1)&15; + +$code.=<<___; +|| XOR @X[($j+2)&15],@X[$j],@X[$j] + ROTL $a,5,$Arot ;; $i +|| AND $c,$b,$F +|| AND $d,$b,$F0 +|| ADD $K,$e,$e ; E+=K +|| XOR @X[($j+8)&15],@X[$j],@X[$j] + XOR $F0,$F,$F +|| AND $c,$d,$F0 +|| ROTL $b,30,$b +|| XOR @X[($j+13)&15],@X[$j],@X[$j] +|| ADD @X[$i&15],$e,$e ; E+=X[i] + XOR $F0,$F,$F ; F_40_59(B,C,D) +|| ADD $Arot,$e,$e ; E+=rot(A,5) +|| ROTL @X[$j],1,@X[$j] + ADD $F,$e,$e ; E+=F_20_39(B,C,D) +___ +} + +$code=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .asg sha1_block_data_order,_sha1_block_data_order + .endif + + .asg B3,RA + .asg A15,FP + .asg B15,SP + + .if .BIG_ENDIAN + .asg MV,SWAP2 + .asg MV,SWAP4 + .endif + + .global _sha1_block_data_order +_sha1_block_data_order: + .asmfunc + MV $NUM,A0 ; reassign $NUM + [!A0] BNOP RA ; if ($NUM==0) return; +|| [A0] LDW *${CTX}[0],$A ; load A-E... + [A0] LDW *${CTX}[1],$B + [A0] LDW *${CTX}[2],$C + [A0] LDW *${CTX}[3],$D + [A0] LDW *${CTX}[4],$E + [A0] LDNW *${INP}++,@X[0] ; pre-fetch input + [A0] LDNW *${INP}++,@X[1] + NOP 3 + +loop?: + SUB A0,1,A0 +|| MV $A,$Actx +|| MVD $B,$Bctx +|| SWAP2 @X[0],@X[0] +|| MVKL 0x5a827999,$K + MVKH 0x5a827999,$K ; K_00_19 +|| MV $C,$Cctx +|| MV $D,$Dctx +|| MVD $E,$Ectx +|| SWAP4 @X[0],@X[0] +___ +for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; +|| MVKL 0x6ed9eba1,$K + MVKH 0x6ed9eba1,$K ; K_20_39 +___ +for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; +|| MVKL 0x8f1bbcdc,$K + MVKH 0x8f1bbcdc,$K ; K_40_59 +___ +for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; +|| MVKL 0xca62c1d6,$K + MVKH 0xca62c1d6,$K ; K_60_79 +___ +for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + BNOP RA ; return + STW $A,*${CTX}[0] ; emit A-E... + STW $B,*${CTX}[1] + STW $C,*${CTX}[2] + STW $D,*${CTX}[3] + STW $E,*${CTX}[4] + .endasmfunc + + .sect .const + .cstring "SHA1 block transform for C64x, CRYPTOGAMS by " + .align 4 +___ + +print $code; +close STDOUT; diff --git a/crypto/sha/asm/sha1-c64x.pl b/crypto/sha/asm/sha1-c64x.pl new file mode 100644 index 0000000000..d7a9dd1d05 --- /dev/null +++ b/crypto/sha/asm/sha1-c64x.pl @@ -0,0 +1,330 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA1 for C64x. +# +# November 2016 +# +# If compared to compiler-generated code with similar characteristics, +# i.e. compiled with OPENSSL_SMALL_FOOTPRINT and utilizing SPLOOPs, +# this implementation is 25% smaller and >2x faster. In absolute terms +# performance is (quite impressive) ~6.5 cycles per processed byte. +# Unlike its predecessor, sha1-c64xplus module, this module has worse +# interrupt agility. While original added up to 5 cycles delay to +# response to interrupt, this module adds up to 100. Fully unrolled +# implementation doesn't add any delay and even 25% faster, but is +# almost 5x larger... +# +# !!! Note that this module uses AMR, which means that all interrupt +# service routines are expected to preserve it and for own well-being +# zero it upon entry. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($CTX,$INP,$NUM) = ("A4","B4","A6"); # arguments + +($A,$B,$C,$D,$E, $Arot,$F,$F0,$T,$K) = map("A$_",(16..20, 21..25)); +($X0,$X2,$X8,$X13) = ("A26","B26","A27","B27"); +($TX0,$TX1,$TX2,$TX3) = map("B$_",(28..31)); +($XPA,$XPB) = ("A5","B5"); # X circular buffer +($Actx,$Bctx,$Cctx,$Dctx,$Ectx) = map("A$_",(3,6..9)); # zaps $NUM + +$code=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .asg sha1_block_data_order,_sha1_block_data_order + .endif + + .asg B3,RA + .asg A15,FP + .asg B15,SP + + .if .BIG_ENDIAN + .asg MV,SWAP2 + .asg MV,SWAP4 + .endif + + .global _sha1_block_data_order +_sha1_block_data_order: + .asmfunc stack_usage(64) + MV $NUM,A0 ; reassign $NUM +|| MVK -64,B0 + [!A0] BNOP RA ; if ($NUM==0) return; +|| [A0] STW FP,*SP--[16] ; save frame pointer and alloca(64) +|| [A0] MV SP,FP + [A0] LDW *${CTX}[0],$A ; load A-E... +|| [A0] AND B0,SP,SP ; align stack at 64 bytes + [A0] LDW *${CTX}[1],$B +|| [A0] SUBAW SP,2,SP ; reserve two words above buffer + [A0] LDW *${CTX}[2],$C +|| [A0] MVK 0x00404,B0 + [A0] LDW *${CTX}[3],$D +|| [A0] MVKH 0x50000,B0 ; 0x050404, 64 bytes for $XP[AB] + [A0] LDW *${CTX}[4],$E +|| [A0] MVC B0,AMR ; setup circular addressing + LDNW *${INP}++,$TX1 ; pre-fetch input + NOP 1 + +loop?: + MVKL 0x5a827999,$K +|| ADDAW SP,2,$XPB +|| SUB A0,1,A0 + MVKH 0x5a827999,$K ; K_00_19 +|| MV $A,$Actx +|| MV $B,$Bctx +;;================================================== + B body_00_13? ; BODY_00_13 +|| MVK 11,B0 +|| MV $XPB,$XPA +|| MV $C,$Cctx +|| MV $D,$Dctx +|| MVD $E,$Ectx + +body_00_13?: + ROTL $A,5,$Arot +|| AND $C,$B,$F +|| ANDN $D,$B,$F0 +|| ADD $K,$E,$T ; T=E+K + + XOR $F0,$F,$F ; F_00_19(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C +|| SWAP2 $TX1,$TX2 +|| LDNW *${INP}++,$TX1 + + ADD $F,$T,$T ; T+=F_00_19(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| SWAP4 $TX2,$TX3 ; byte swap + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A + + ADD $TX3,$T,$A ; A=T+Xi +|| STW $TX3,*${XPB}++ +|| BDEC body_00_13?,B0 +;;================================================== + ROTL $A,5,$Arot ; BODY_14 +|| AND $C,$B,$F +|| ANDN $D,$B,$F0 +|| ADD $K,$E,$T ; T=E+K + + XOR $F0,$F,$F ; F_00_19(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C +|| SWAP2 $TX1,$TX2 +|| LDNW *${INP}++,$TX1 + + ADD $F,$T,$T ; T+=F_00_19(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| SWAP4 $TX2,$TX2 ; byte swap +|| LDW *${XPA}++,$X0 ; fetches from X ring buffer are +|| LDW *${XPB}[4],$X2 ; 2 iterations ahead + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ +;;================================================== + ROTL $A,5,$Arot ; BODY_15 +|| AND $C,$B,$F +|| ANDN $D,$B,$F0 +|| ADD $K,$E,$T ; T=E+K + + XOR $F0,$F,$F ; F_00_19(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C +|| SWAP2 $TX1,$TX2 + + ADD $F,$T,$T ; T+=F_00_19(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| SWAP4 $TX2,$TX2 ; byte swap +|| XOR $X0,$X2,$TX0 ; Xupdate XORs are 1 iteration ahead +|| LDW *${XPA}++,$X0 +|| LDW *${XPB}[4],$X2 + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| XOR $X8,$X13,$TX1 +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ +|| XOR $TX0,$TX1,$TX1 +;;================================================== +|| B body_16_19? ; BODY_16_19 +|| MVK 1,B0 + +body_16_19?: + ROTL $A,5,$Arot +|| AND $C,$B,$F +|| ANDN $D,$B,$F0 +|| ADD $K,$E,$T ; T=E+K +|| ROTL $TX1,1,$TX2 ; Xupdate output + + XOR $F0,$F,$F ; F_00_19(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C + + ADD $F,$T,$T ; T+=F_00_19(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| XOR $X0,$X2,$TX0 +|| LDW *${XPA}++,$X0 +|| LDW *${XPB}[4],$X2 + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| XOR $X8,$X13,$TX1 +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ +|| XOR $TX0,$TX1,$TX1 +|| BDEC body_16_19?,B0 + + MVKL 0x6ed9eba1,$K +|| MVK 17,B0 + MVKH 0x6ed9eba1,$K ; K_20_39 +___ +sub BODY_20_39 { +my $label = shift; +$code.=<<___; +;;================================================== +|| B $label ; BODY_20_39 + +$label: + ROTL $A,5,$Arot +|| XOR $B,$C,$F +|| ADD $K,$E,$T ; T=E+K +|| ROTL $TX1,1,$TX2 ; Xupdate output + + XOR $D,$F,$F ; F_20_39(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C + + ADD $F,$T,$T ; T+=F_20_39(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| XOR $X0,$X2,$TX0 +|| LDW *${XPA}++,$X0 +|| LDW *${XPB}[4],$X2 + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| XOR $X8,$X13,$TX1 +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ ; last one is redundant +|| XOR $TX0,$TX1,$TX1 +|| BDEC $label,B0 +___ +} &BODY_20_39("body_20_39?"); +$code.=<<___; +;;================================================== + MVKL 0x8f1bbcdc,$K +|| MVK 17,B0 + MVKH 0x8f1bbcdc,$K ; K_40_59 +|| B body_40_59? ; BODY_40_59 +|| AND $B,$C,$F +|| AND $B,$D,$F0 + +body_40_59?: + ROTL $A,5,$Arot +|| XOR $F0,$F,$F +|| AND $C,$D,$F0 +|| ADD $K,$E,$T ; T=E+K +|| ROTL $TX1,1,$TX2 ; Xupdate output + + XOR $F0,$F,$F ; F_40_59(B,C,D) +|| MV $D,$E ; E=D +|| MV $C,$D ; D=C + + ADD $F,$T,$T ; T+=F_40_59(B,C,D) +|| ROTL $B,30,$C ; C=ROL(B,30) +|| XOR $X0,$X2,$TX0 +|| LDW *${XPA}++,$X0 +|| LDW *${XPB}[4],$X2 + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| MV $A,$B ; B=A +|| XOR $X8,$X13,$TX1 +|| LDW *${XPA}[7],$X8 +|| MV $TX3,$X13 ; || LDW *${XPB}[15],$X13 +|| MV $TX2,$TX3 + + ADD $TX2,$T,$A ; A=T+Xi +|| STW $TX2,*${XPB}++ +|| XOR $TX0,$TX1,$TX1 +|| AND $B,$C,$F +|| AND $B,$D,$F0 +|| BDEC body_40_59?,B0 + + MVKL 0xca62c1d6,$K +|| MVK 16,B0 + MVKH 0xca62c1d6,$K ; K_60_79 +___ + &BODY_20_39("body_60_78?"); # BODY_60_78 +$code.=<<___; +;;================================================== + [A0] B loop? +|| ROTL $A,5,$Arot ; BODY_79 +|| XOR $B,$C,$F +|| ROTL $TX1,1,$TX2 ; Xupdate output + + [A0] LDNW *${INP}++,$TX1 ; pre-fetch input +|| ADD $K,$E,$T ; T=E+K +|| XOR $D,$F,$F ; F_20_39(B,C,D) + + ADD $F,$T,$T ; T+=F_20_39(B,C,D) +|| ADD $Ectx,$D,$E ; E=D,E+=Ectx +|| ADD $Dctx,$C,$D ; D=C,D+=Dctx +|| ROTL $B,30,$C ; C=ROL(B,30) + + ADD $Arot,$T,$T ; T+=ROL(A,5) +|| ADD $Bctx,$A,$B ; B=A,B+=Bctx + + ADD $TX2,$T,$A ; A=T+Xi + + ADD $Actx,$A,$A ; A+=Actx +|| ADD $Cctx,$C,$C ; C+=Cctx +;; end of loop? + + BNOP RA ; return +|| MV FP,SP ; restore stack pointer +|| LDW *FP[0],FP ; restore frame pointer + STW $A,*${CTX}[0] ; emit A-E... +|| MVK 0,B0 + STW $B,*${CTX}[1] +|| MVC B0,AMR ; clear AMR + STW $C,*${CTX}[2] + STW $D,*${CTX}[3] + STW $E,*${CTX}[4] + .endasmfunc + + .sect .const + .cstring "SHA1 block transform for C64x, CRYPTOGAMS by " + .align 4 +___ + +print $code; +close STDOUT; diff --git a/crypto/sha/asm/sha256-c64x.pl b/crypto/sha/asm/sha256-c64x.pl new file mode 100644 index 0000000000..fbe99c0b7f --- /dev/null +++ b/crypto/sha/asm/sha256-c64x.pl @@ -0,0 +1,313 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA256 for C64x. +# +# November 2016 +# +# Performance is just below 10 cycles per processed byte, which is +# almost 40% faster than compiler-generated code. Unroll is unlikely +# to give more than ~8% improvement... +# +# !!! Note that this module uses AMR, which means that all interrupt +# service routines are expected to preserve it and for own well-being +# zero it upon entry. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($CTXA,$INP,$NUM) = ("A4","B4","A6"); # arguments + $K256="A3"; + +($A,$Actx,$B,$Bctx,$C,$Cctx,$D,$Dctx,$T2,$S0,$s1,$t0a,$t1a,$t2a,$X9,$X14) + =map("A$_",(16..31)); +($E,$Ectx,$F,$Fctx,$G,$Gctx,$H,$Hctx,$T1,$S1,$s0,$t0e,$t1e,$t2e,$X1,$X15) + =map("B$_",(16..31)); + +($Xia,$Xib)=("A5","B5"); # circular/ring buffer + $CTXB=$t2e; + +($Xn,$X0,$K)=("B7","B8","B9"); +($Maj,$Ch)=($T2,"B6"); + +$code.=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .nocmp + .asg sha256_block_data_order,_sha256_block_data_order + .endif + + .asg B3,RA + .asg A15,FP + .asg B15,SP + + .if .BIG_ENDIAN + .asg SWAP2,MV + .asg SWAP4,MV + .endif + + .global _sha256_block_data_order +_sha256_block_data_order: +__sha256_block: + .asmfunc stack_usage(64) + MV $NUM,A0 ; reassign $NUM +|| MVK -64,B0 + [!A0] BNOP RA ; if ($NUM==0) return; +|| [A0] STW FP,*SP--[16] ; save frame pointer and alloca(64) +|| [A0] MV SP,FP + [A0] ADDKPC _sha256_block_data_order,B2 +|| [A0] AND B0,SP,SP ; align stack at 64 bytes + .if __TI_EABI__ + [A0] MVK 0x00404,B1 +|| [A0] MVKL \$PCR_OFFSET(K256,__sha256_block),$K256 + [A0] MVKH 0x50000,B1 +|| [A0] MVKH \$PCR_OFFSET(K256,__sha256_block),$K256 + .else + [A0] MVK 0x00404,B1 +|| [A0] MVKL (K256-__sha256_block),$K256 + [A0] MVKH 0x50000,B1 +|| [A0] MVKH (K256-__sha256_block),$K256 + .endif + [A0] MVC B1,AMR ; setup circular addressing +|| [A0] MV SP,$Xia + [A0] MV SP,$Xib +|| [A0] ADD B2,$K256,$K256 +|| [A0] MV $CTXA,$CTXB +|| [A0] SUBAW SP,2,SP ; reserve two words above buffer + LDW *${CTXA}[0],$A ; load ctx +|| LDW *${CTXB}[4],$E + LDW *${CTXA}[1],$B +|| LDW *${CTXB}[5],$F + LDW *${CTXA}[2],$C +|| LDW *${CTXB}[6],$G + LDW *${CTXA}[3],$D +|| LDW *${CTXB}[7],$H + + LDNW *$INP++,$Xn ; pre-fetch input + LDW *$K256++,$K ; pre-fetch K256[0] + NOP + ADDAW $Xia,9,$Xia +outerloop?: + SUB A0,1,A0 +|| MV $A,$Actx +|| MV $E,$Ectx +|| MVD $B,$Bctx +|| MVD $F,$Fctx + MV $C,$Cctx +|| MV $G,$Gctx +|| MVD $D,$Dctx +|| MVD $H,$Hctx +|| SWAP4 $Xn,$X0 + + MVK 14,B0 ; loop counter +|| SWAP2 $X0,$X0 + +loop_00_14?: ; BODY_00_14 + LDNW *$INP++,$Xn +|| ROTL $A,30,$S0 +|| OR $A,$B,$Maj +|| AND $A,$B,$t2a +|| ROTL $E,26,$S1 +|| AND $F,$E,$Ch +|| ANDN $G,$E,$t2e + ROTL $A,19,$t0a +|| AND $C,$Maj,$Maj +|| ROTL $E,21,$t0e +|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g) + ROTL $A,10,$t1a +|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b) +|| ROTL $E,7,$t1e +|| ADD $K,$H,$T1 ; T1 = h + K256[i] +|| [B0] BDEC loop_00_14?,B0 + ADD $X0,$T1,$T1 ; T1 += X[i]; +|| STW $X0,*$Xib++ +|| XOR $t0a,$S0,$S0 +|| XOR $t0e,$S1,$S1 + XOR $t1a,$S0,$S0 ; Sigma0(a) +|| XOR $t1e,$S1,$S1 ; Sigma1(e) +|| LDW *$K256++,$K ; pre-fetch K256[i+1] +|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g) + ADD $S1,$T1,$T1 ; T1 += Sigma1(e) +|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c) +|| ROTL $G,0,$H ; h = g +|| MV $F,$G ; g = f +|| MV $X0,$X14 +|| SWAP4 $Xn,$X0 + SWAP2 $X0,$X0 +|| MV $E,$F ; f = e +|| ADD $D,$T1,$E ; e = d + T1 +|| MV $C,$D ; d = c + MV $B,$C ; c = b +|| MV $A,$B ; b = a +|| ADD $T1,$T2,$A ; a = T1 + T2 +;;===== branch to loop00_14? is taken here + + ROTL $A,30,$S0 ; BODY_15 +|| OR $A,$B,$Maj +|| AND $A,$B,$t2a +|| ROTL $E,26,$S1 +|| AND $F,$E,$Ch +|| ANDN $G,$E,$t2e +|| LDW *${Xib}[1],$Xn ; modulo-scheduled + ROTL $A,19,$t0a +|| AND $C,$Maj,$Maj +|| ROTL $E,21,$t0e +|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g) +|| LDW *${Xib}[2],$X1 ; modulo-scheduled + ROTL $A,10,$t1a +|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b) +|| ROTL $E,7,$t1e +|| ADD $K,$H,$T1 ; T1 = h + K256[i] + ADD $X0,$T1,$T1 ; T1 += X[i]; +|| STW $X0,*$Xib++ +|| XOR $t0a,$S0,$S0 +|| XOR $t0e,$S1,$S1 + XOR $t1a,$S0,$S0 ; Sigma0(a) +|| XOR $t1e,$S1,$S1 ; Sigma1(e) +|| LDW *$K256++,$K ; pre-fetch K256[i+1] +|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g) + ADD $S1,$T1,$T1 ; T1 += Sigma1(e) +|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c) +|| ROTL $G,0,$H ; h = g +|| MV $F,$G ; g = f +|| MV $X0,$X15 + MV $E,$F ; f = e +|| ADD $D,$T1,$E ; e = d + T1 +|| MV $C,$D ; d = c +|| MV $Xn,$X0 ; modulo-scheduled +|| LDW *$Xia,$X9 ; modulo-scheduled +|| ROTL $X1,25,$t0e ; modulo-scheduled +|| ROTL $X14,15,$t0a ; modulo-scheduled + SHRU $X1,3,$s0 ; modulo-scheduled +|| SHRU $X14,10,$s1 ; modulo-scheduled +|| ROTL $B,0,$C ; c = b +|| MV $A,$B ; b = a +|| ADD $T1,$T2,$A ; a = T1 + T2 + + MVK 47,B1 ; loop counter +|| ROTL $X1,14,$t1e ; modulo-scheduled +|| ROTL $X14,13,$t1a ; modulo-scheduled + +loop_16_63?: ; BODY_16_63 + XOR $t0e,$s0,$s0 +|| XOR $t0a,$s1,$s1 +|| MV $X15,$X14 +|| MV $X1,$Xn + XOR $t1e,$s0,$s0 ; sigma0(X[i+1]) +|| XOR $t1a,$s1,$s1 ; sigma1(X[i+14]) +|| LDW *${Xib}[2],$X1 ; module-scheduled + ROTL $A,30,$S0 +|| OR $A,$B,$Maj +|| AND $A,$B,$t2a +|| ROTL $E,26,$S1 +|| AND $F,$E,$Ch +|| ANDN $G,$E,$t2e +|| ADD $X9,$X0,$X0 ; X[i] += X[i+9] + ROTL $A,19,$t0a +|| AND $C,$Maj,$Maj +|| ROTL $E,21,$t0e +|| XOR $t2e,$Ch,$Ch ; Ch(e,f,g) = (e&f)^(~e&g) +|| ADD $s0,$X0,$X0 ; X[i] += sigma1(X[i+1]) + ROTL $A,10,$t1a +|| OR $t2a,$Maj,$Maj ; Maj(a,b,c) = ((a|b)&c)|(a&b) +|| ROTL $E,7,$t1e +|| ADD $H,$K,$T1 ; T1 = h + K256[i] +|| ADD $s1,$X0,$X0 ; X[i] += sigma1(X[i+14]) +|| [B1] BDEC loop_16_63?,B1 + XOR $t0a,$S0,$S0 +|| XOR $t0e,$S1,$S1 +|| ADD $X0,$T1,$T1 ; T1 += X[i] +|| STW $X0,*$Xib++ + XOR $t1a,$S0,$S0 ; Sigma0(a) +|| XOR $t1e,$S1,$S1 ; Sigma1(e) +|| ADD $Ch,$T1,$T1 ; T1 += Ch(e,f,g) +|| MV $X0,$X15 +|| ROTL $G,0,$H ; h = g +|| LDW *$K256++,$K ; pre-fetch K256[i+1] + ADD $S1,$T1,$T1 ; T1 += Sigma1(e) +|| ADD $S0,$Maj,$T2 ; T2 = Sigma0(a) + Maj(a,b,c) +|| MV $F,$G ; g = f +|| MV $Xn,$X0 ; modulo-scheduled +|| LDW *++$Xia,$X9 ; modulo-scheduled +|| ROTL $X1,25,$t0e ; module-scheduled +|| ROTL $X14,15,$t0a ; modulo-scheduled + ROTL $X1,14,$t1e ; modulo-scheduled +|| ROTL $X14,13,$t1a ; modulo-scheduled +|| MV $E,$F ; f = e +|| ADD $D,$T1,$E ; e = d + T1 +|| MV $C,$D ; d = c +|| MV $B,$C ; c = b + MV $A,$B ; b = a +|| ADD $T1,$T2,$A ; a = T1 + T2 +|| SHRU $X1,3,$s0 ; modulo-scheduled +|| SHRU $X14,10,$s1 ; modulo-scheduled +;;===== branch to loop16_63? is taken here + + [A0] B outerloop? +|| [A0] LDNW *$INP++,$Xn ; pre-fetch input +|| [A0] ADDK -260,$K256 ; rewind K256 +|| ADD $Actx,$A,$A ; accumulate ctx +|| ADD $Ectx,$E,$E +|| ADD $Bctx,$B,$B + ADD $Fctx,$F,$F +|| ADD $Cctx,$C,$C +|| ADD $Gctx,$G,$G +|| ADD $Dctx,$D,$D +|| ADD $Hctx,$H,$H +|| [A0] LDW *$K256++,$K ; pre-fetch K256[0] + + [!A0] BNOP RA +||[!A0] MV $CTXA,$CTXB + [!A0] MV FP,SP ; restore stack pointer +||[!A0] LDW *FP[0],FP ; restore frame pointer + [!A0] STW $A,*${CTXA}[0] ; save ctx +||[!A0] STW $E,*${CTXB}[4] +||[!A0] MVK 0,B0 + [!A0] STW $B,*${CTXA}[1] +||[!A0] STW $F,*${CTXB}[5] +||[!A0] MVC B0,AMR ; clear AMR + STW $C,*${CTXA}[2] +|| STW $G,*${CTXB}[6] + STW $D,*${CTXA}[3] +|| STW $H,*${CTXB}[7] + .endasmfunc + + .if __TI_EABI__ + .sect ".text:sha_asm.const" + .else + .sect ".const:sha_asm" + .endif + .align 128 +K256: + .uword 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .uword 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .uword 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .uword 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .uword 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .uword 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .uword 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .uword 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .uword 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .uword 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .uword 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .uword 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .uword 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .uword 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .uword 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .uword 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + .cstring "SHA256 block transform for C64x, CRYPTOGAMS by " + .align 4 + +___ + +print $code; diff --git a/crypto/sha/asm/sha512-c64x.pl b/crypto/sha/asm/sha512-c64x.pl new file mode 100644 index 0000000000..e35a72ade5 --- /dev/null +++ b/crypto/sha/asm/sha512-c64x.pl @@ -0,0 +1,437 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA512 for C64x. +# +# November 2016 +# +# Performance is ~19 cycles per processed byte. Compared to block +# transform function from sha512.c compiled with cl6x with -mv6400+ +# -o2 -DOPENSSL_SMALL_FOOTPRINT it's almost 7x faster and 2x smaller. +# Loop unroll won't make it, this implementation, any faster, because +# it's effectively dominated by SHRU||SHL pairs and you can't schedule +# more of them. +# +# !!! Note that this module uses AMR, which means that all interrupt +# service routines are expected to preserve it and for own well-being +# zero it upon entry. + +while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +($CTXA,$INP,$NUM) = ("A4","B4","A6"); # arguments + $K512="A3"; + +($Ahi,$Actxhi,$Bhi,$Bctxhi,$Chi,$Cctxhi,$Dhi,$Dctxhi, + $Ehi,$Ectxhi,$Fhi,$Fctxhi,$Ghi,$Gctxhi,$Hhi,$Hctxhi)=map("A$_",(16..31)); +($Alo,$Actxlo,$Blo,$Bctxlo,$Clo,$Cctxlo,$Dlo,$Dctxlo, + $Elo,$Ectxlo,$Flo,$Fctxlo,$Glo,$Gctxlo,$Hlo,$Hctxlo)=map("B$_",(16..31)); + +($S1hi,$CHhi,$S0hi,$t0hi)=map("A$_",(10..13)); +($S1lo,$CHlo,$S0lo,$t0lo)=map("B$_",(10..13)); +($T1hi, $T2hi)= ("A6","A7"); +($T1lo,$T1carry,$T2lo,$T2carry)=("B6","B7","B8","B9"); +($Khi,$Klo)=("A9","A8"); +($MAJhi,$MAJlo)=($T2hi,$T2lo); +($t1hi,$t1lo)=($Khi,"B2"); + $CTXB=$t1lo; + +($Xihi,$Xilo)=("A5","B5"); # circular/ring buffer + +$code.=<<___; + .text + + .if .ASSEMBLER_VERSION<7000000 + .asg 0,__TI_EABI__ + .endif + .if __TI_EABI__ + .nocmp + .asg sha512_block_data_order,_sha512_block_data_order + .endif + + .asg B3,RA + .asg A15,FP + .asg B15,SP + + .if .BIG_ENDIAN + .asg $Khi,KHI + .asg $Klo,KLO + .else + .asg $Khi,KLO + .asg $Klo,KHI + .endif + + .global _sha512_block_data_order +_sha512_block_data_order: +__sha512_block: + .asmfunc stack_usage(40+128) + MV $NUM,A0 ; reassign $NUM +|| MVK -128,B0 + [!A0] BNOP RA ; if ($NUM==0) return; +|| [A0] STW FP,*SP--(40) ; save frame pointer +|| [A0] MV SP,FP + [A0] STDW B13:B12,*SP[4] +|| [A0] MVK 0x00404,B1 + [A0] STDW B11:B10,*SP[3] +|| [A0] STDW A13:A12,*FP[-3] +|| [A0] MVKH 0x60000,B1 + [A0] STDW A11:A10,*SP[1] +|| [A0] MVC B1,AMR ; setup circular addressing +|| [A0] ADD B0,SP,SP ; alloca(128) + .if __TI_EABI__ + [A0] AND B0,SP,SP ; align stack at 128 bytes +|| [A0] ADDKPC __sha512_block,B1 +|| [A0] MVKL \$PCR_OFFSET(K512,__sha512_block),$K512 + [A0] MVKH \$PCR_OFFSET(K512,__sha512_block),$K512 +|| [A0] SUBAW SP,2,SP ; reserve two words above buffer + .else + [A0] AND B0,SP,SP ; align stack at 128 bytes +|| [A0] ADDKPC __sha512_block,B1 +|| [A0] MVKL (K512-__sha512_block),$K512 + [A0] MVKH (K512-__sha512_block),$K512 +|| [A0] SUBAW SP,2,SP ; reserve two words above buffer + .endif + ADDAW SP,3,$Xilo + ADD SP,4*2,$Xihi ; ADDAW SP,2,$Xihi + +|| MV $CTXA,$CTXB + LDW *${CTXA}[0^.LITTLE_ENDIAN],$Ahi ; load ctx +|| LDW *${CTXB}[1^.LITTLE_ENDIAN],$Alo +|| ADD B1,$K512,$K512 + LDW *${CTXA}[2^.LITTLE_ENDIAN],$Bhi +|| LDW *${CTXB}[3^.LITTLE_ENDIAN],$Blo + LDW *${CTXA}[4^.LITTLE_ENDIAN],$Chi +|| LDW *${CTXB}[5^.LITTLE_ENDIAN],$Clo + LDW *${CTXA}[6^.LITTLE_ENDIAN],$Dhi +|| LDW *${CTXB}[7^.LITTLE_ENDIAN],$Dlo + LDW *${CTXA}[8^.LITTLE_ENDIAN],$Ehi +|| LDW *${CTXB}[9^.LITTLE_ENDIAN],$Elo + LDW *${CTXA}[10^.LITTLE_ENDIAN],$Fhi +|| LDW *${CTXB}[11^.LITTLE_ENDIAN],$Flo + LDW *${CTXA}[12^.LITTLE_ENDIAN],$Ghi +|| LDW *${CTXB}[13^.LITTLE_ENDIAN],$Glo + LDW *${CTXA}[14^.LITTLE_ENDIAN],$Hhi +|| LDW *${CTXB}[15^.LITTLE_ENDIAN],$Hlo + + LDNDW *$INP++,B11:B10 ; pre-fetch input + LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0] +outerloop?: + MVK 15,B0 ; loop counters +|| MVK 64,B1 +|| SUB A0,1,A0 + MV $Ahi,$Actxhi +|| MV $Alo,$Actxlo +|| MV $Bhi,$Bctxhi +|| MV $Blo,$Bctxlo +|| MV $Chi,$Cctxhi +|| MV $Clo,$Cctxlo +|| MVD $Dhi,$Dctxhi +|| MVD $Dlo,$Dctxlo + MV $Ehi,$Ectxhi +|| MV $Elo,$Ectxlo +|| MV $Fhi,$Fctxhi +|| MV $Flo,$Fctxlo +|| MV $Ghi,$Gctxhi +|| MV $Glo,$Gctxlo +|| MVD $Hhi,$Hctxhi +|| MVD $Hlo,$Hctxlo +loop0_15?: + .if .BIG_ENDIAN + MV B11,$T1hi +|| MV B10,$T1lo + .else + SWAP4 B10,$T1hi +|| SWAP4 B11,$T1lo + SWAP2 $T1hi,$T1hi +|| SWAP2 $T1lo,$T1lo + .endif + STW $T1hi,*$Xihi++[2] ; original loop16_79? +|| STW $T1lo,*$Xilo++[2] ; X[i] = T1 +|| ADD $Hhi,$T1hi,$T1hi +|| ADDU $Hlo,$T1lo,$T1carry:$T1lo ; T1 += h +|| SHRU $Ehi,14,$S1hi +|| SHL $Ehi,32-14,$S1lo +loop16_79?: + XOR $Fhi,$Ghi,$CHhi +|| XOR $Flo,$Glo,$CHlo +|| ADD KHI,$T1hi,$T1hi +|| ADDU KLO,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += K512[i] +|| SHRU $Elo,14,$t0lo +|| SHL $Elo,32-14,$t0hi + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| AND $Ehi,$CHhi,$CHhi +|| AND $Elo,$CHlo,$CHlo +|| ROTL $Ghi,0,$Hhi +|| ROTL $Glo,0,$Hlo ; h = g +|| SHRU $Ehi,18,$t0hi +|| SHL $Ehi,32-18,$t0lo + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| XOR $Ghi,$CHhi,$CHhi +|| XOR $Glo,$CHlo,$CHlo ; Ch(e,f,g) = ((f^g)&e)^g +|| ROTL $Fhi,0,$Ghi +|| ROTL $Flo,0,$Glo ; g = f +|| SHRU $Elo,18,$t0lo +|| SHL $Elo,32-18,$t0hi + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| OR $Ahi,$Bhi,$MAJhi +|| OR $Alo,$Blo,$MAJlo +|| ROTL $Ehi,0,$Fhi +|| ROTL $Elo,0,$Flo ; f = e +|| SHRU $Ehi,41-32,$t0lo +|| SHL $Ehi,64-41,$t0hi + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| AND $Chi,$MAJhi,$MAJhi +|| AND $Clo,$MAJlo,$MAJlo +|| ROTL $Dhi,0,$Ehi +|| ROTL $Dlo,0,$Elo ; e = d +|| SHRU $Elo,41-32,$t0hi +|| SHL $Elo,64-41,$t0lo + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo ; Sigma1(e) +|| AND $Ahi,$Bhi,$t1hi +|| AND $Alo,$Blo,$t1lo +|| ROTL $Chi,0,$Dhi +|| ROTL $Clo,0,$Dlo ; d = c +|| SHRU $Ahi,28,$S0hi +|| SHL $Ahi,32-28,$S0lo + OR $t1hi,$MAJhi,$MAJhi +|| OR $t1lo,$MAJlo,$MAJlo ; Maj(a,b,c) = ((a|b)&c)|(a&b) +|| ADD $CHhi,$T1hi,$T1hi +|| ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Ch(e,f,g) +|| ROTL $Bhi,0,$Chi +|| ROTL $Blo,0,$Clo ; c = b +|| SHRU $Alo,28,$t0lo +|| SHL $Alo,32-28,$t0hi + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $S1hi,$T1hi,$T1hi +|| ADDU $S1lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += Sigma1(e) +|| ROTL $Ahi,0,$Bhi +|| ROTL $Alo,0,$Blo ; b = a +|| SHRU $Ahi,34-32,$t0lo +|| SHL $Ahi,64-34,$t0hi + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $MAJhi,$T1hi,$T2hi +|| ADDU $MAJlo,$T1carry:$T1lo,$T2carry:$T2lo ; T2 = T1+Maj(a,b,c) +|| SHRU $Alo,34-32,$t0hi +|| SHL $Alo,64-34,$t0lo + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $Ehi,$T1hi,$T1hi +|| ADDU $Elo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += e +|| SHRU $Ahi,39-32,$t0lo +|| SHL $Ahi,64-39,$t0hi + [B0] BNOP loop0_15? +|| [B0] LDNDW *$INP++,B11:B10 ; pre-fetch input + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| SHRU $Alo,39-32,$t0hi +|| SHL $Alo,64-39,$t0lo +||[!B0] LDW *${Xihi}[28],$T1hi +||[!B0] LDW *${Xilo}[28],$T1lo ; X[i+14] + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo ; Sigma0(a) +|| ADD $T1carry,$T1hi,$Ehi +|| ROTL $T1lo,0,$Elo ; e = T1, "ghost" value +||[!B1] BNOP break? + ADD $S0hi,$T2hi,$T2hi +|| ADDU $S0lo,$T2carry:$T2lo,$T2carry:$T2lo ; T2 += Sigma0(a) +|| [B1] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[i] + NOP ; avoid cross-path stall + ADD $T2carry,$T2hi,$Ahi +|| MV $T2lo,$Alo ; a = T2 +|| [B0] SUB B0,1,B0 +;;===== branch to loop00_15? is taken here + [B1] LDW *${Xihi}[2],$T2hi +|| [B1] LDW *${Xilo}[2],$T2lo ; X[i+1] +|| [B1] SHRU $T1hi,19,$S1hi +|| [B1] SHL $T1hi,32-19,$S1lo + [B1] SHRU $T1lo,19,$t0lo +|| [B1] SHL $T1lo,32-19,$t0hi +;;===== branch to break? is taken here + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| SHRU $T1hi,61-32,$t0lo +|| SHL $T1hi,64-61,$t0hi + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| SHRU $T1lo,61-32,$t0hi +|| SHL $T1lo,64-61,$t0lo + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| SHRU $T1hi,6,$t0hi +|| SHL $T1hi,32-6,$t0lo + XOR $t0hi,$S1hi,$S1hi +|| XOR $t0lo,$S1lo,$S1lo +|| SHRU $T1lo,6,$t0lo +|| LDW *${Xihi}[18],$T1hi +|| LDW *${Xilo}[18],$T1lo ; X[i+9] + XOR $t0lo,$S1lo,$S1lo ; sigma1(Xi[i+14]) + +|| LDW *${Xihi}[0],$CHhi +|| LDW *${Xilo}[0],$CHlo ; X[i] +|| SHRU $T2hi,1,$S0hi +|| SHL $T2hi,32-1,$S0lo + SHRU $T2lo,1,$t0lo +|| SHL $T2lo,32-1,$t0hi + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| SHRU $T2hi,8,$t0hi +|| SHL $T2hi,32-8,$t0lo + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| SHRU $T2lo,8,$t0lo +|| SHL $T2lo,32-8,$t0hi + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $S1hi,$T1hi,$T1hi +|| ADDU $S1lo,$T1lo,$T1carry:$T1lo ; T1 = X[i+9]+sigma1() +|| SHRU $T2hi,7,$t0hi +|| SHL $T2hi,32-7,$t0lo + XOR $t0hi,$S0hi,$S0hi +|| XOR $t0lo,$S0lo,$S0lo +|| ADD $CHhi,$T1hi,$T1hi +|| ADDU $CHlo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += X[i] +|| SHRU $T2lo,7,$t0lo +|| [B1] BNOP loop16_79? + XOR $t0lo,$S0lo,$S0lo ; sigma0(Xi[i+1] + + ADD $S0hi,$T1hi,$T1hi +|| ADDU $S0lo,$T1carry:$T1lo,$T1carry:$T1lo ; T1 += sigma0() +|| [B1] SUB B1,1,B1 + NOP ; avoid cross-path stall + ADD $T1carry,$T1hi,$T1hi + + STW $T1hi,*$Xihi++[2] ; copied "top" bundle +|| STW $T1lo,*$Xilo++[2] ; X[i] = T1 +|| ADD $Hhi,$T1hi,$T1hi +|| ADDU $Hlo,$T1lo,$T1carry:$T1lo ; T1 += h +|| SHRU $Ehi,14,$S1hi +|| SHL $Ehi,32-14,$S1lo +;;===== branch to loop16_79? is taken here + +break?: + ADD $Ahi,$Actxhi,$Ahi ; accumulate ctx +|| ADDU $Alo,$Actxlo,$Actxlo:$Alo +|| [A0] LDNDW *$INP++,B11:B10 ; pre-fetch input +|| [A0] ADDK -640,$K512 ; rewind pointer to K512 + ADD $Bhi,$Bctxhi,$Bhi +|| ADDU $Blo,$Bctxlo,$Bctxlo:$Blo +|| [A0] LDDW *$K512++,$Khi:$Klo ; pre-fetch K512[0] + ADD $Chi,$Cctxhi,$Chi +|| ADDU $Clo,$Cctxlo,$Cctxlo:$Clo +|| ADD $Actxlo,$Ahi,$Ahi +||[!A0] MV $CTXA,$CTXB + ADD $Dhi,$Dctxhi,$Dhi +|| ADDU $Dlo,$Dctxlo,$Dctxlo:$Dlo +|| ADD $Bctxlo,$Bhi,$Bhi +||[!A0] STW $Ahi,*${CTXA}[0^.LITTLE_ENDIAN] ; save ctx +||[!A0] STW $Alo,*${CTXB}[1^.LITTLE_ENDIAN] + ADD $Ehi,$Ectxhi,$Ehi +|| ADDU $Elo,$Ectxlo,$Ectxlo:$Elo +|| ADD $Cctxlo,$Chi,$Chi +|| [A0] BNOP outerloop? +||[!A0] STW $Bhi,*${CTXA}[2^.LITTLE_ENDIAN] +||[!A0] STW $Blo,*${CTXB}[3^.LITTLE_ENDIAN] + ADD $Fhi,$Fctxhi,$Fhi +|| ADDU $Flo,$Fctxlo,$Fctxlo:$Flo +|| ADD $Dctxlo,$Dhi,$Dhi +||[!A0] STW $Chi,*${CTXA}[4^.LITTLE_ENDIAN] +||[!A0] STW $Clo,*${CTXB}[5^.LITTLE_ENDIAN] + ADD $Ghi,$Gctxhi,$Ghi +|| ADDU $Glo,$Gctxlo,$Gctxlo:$Glo +|| ADD $Ectxlo,$Ehi,$Ehi +||[!A0] STW $Dhi,*${CTXA}[6^.LITTLE_ENDIAN] +||[!A0] STW $Dlo,*${CTXB}[7^.LITTLE_ENDIAN] + ADD $Hhi,$Hctxhi,$Hhi +|| ADDU $Hlo,$Hctxlo,$Hctxlo:$Hlo +|| ADD $Fctxlo,$Fhi,$Fhi +||[!A0] STW $Ehi,*${CTXA}[8^.LITTLE_ENDIAN] +||[!A0] STW $Elo,*${CTXB}[9^.LITTLE_ENDIAN] + ADD $Gctxlo,$Ghi,$Ghi +||[!A0] STW $Fhi,*${CTXA}[10^.LITTLE_ENDIAN] +||[!A0] STW $Flo,*${CTXB}[11^.LITTLE_ENDIAN] + ADD $Hctxlo,$Hhi,$Hhi +||[!A0] STW $Ghi,*${CTXA}[12^.LITTLE_ENDIAN] +||[!A0] STW $Glo,*${CTXB}[13^.LITTLE_ENDIAN] +;;===== branch to outerloop? is taken here + + STW $Hhi,*${CTXA}[14^.LITTLE_ENDIAN] +|| STW $Hlo,*${CTXB}[15^.LITTLE_ENDIAN] +|| MVK -40,B0 + ADD FP,B0,SP ; destroy circular buffer +|| LDDW *FP[-4],A11:A10 + LDDW *SP[2],A13:A12 +|| LDDW *FP[-2],B11:B10 + LDDW *SP[4],B13:B12 +|| BNOP RA + LDW *++SP(40),FP ; restore frame pointer + MVK 0,B0 + MVC B0,AMR ; clear AMR + NOP 2 ; wait till FP is committed + .endasmfunc + + .if __TI_EABI__ + .sect ".text:sha_asm.const" + .else + .sect ".const:sha_asm" + .endif + .align 128 +K512: + .uword 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd + .uword 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc + .uword 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019 + .uword 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118 + .uword 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe + .uword 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2 + .uword 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1 + .uword 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694 + .uword 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3 + .uword 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65 + .uword 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483 + .uword 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5 + .uword 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210 + .uword 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4 + .uword 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725 + .uword 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70 + .uword 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926 + .uword 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df + .uword 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8 + .uword 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b + .uword 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001 + .uword 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30 + .uword 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910 + .uword 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8 + .uword 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53 + .uword 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8 + .uword 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb + .uword 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3 + .uword 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60 + .uword 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec + .uword 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9 + .uword 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b + .uword 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207 + .uword 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178 + .uword 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6 + .uword 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b + .uword 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493 + .uword 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c + .uword 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a + .uword 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817 + .cstring "SHA512 block transform for C64x, CRYPTOGAMS by " + .align 4 +___ + +print $code; +close STDOUT; From 781280094ad389e8958631b97e70f498becbd9cb Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 25 Nov 2016 11:52:06 +0100 Subject: [PATCH 120/120] c6x/* "facelift": - make scripts executable; - "parameterize" platform selection in c6x/do_fips; - add c6x/fips_algvs.mak; - add c6x/run6x.js launcher for more recent CCS versions; Reviewed-by: Rich Salz Reviewed-by: Tim Hudson Reviewed-by: Stephen Henson (Merged from https://github.com/openssl/openssl/pull/4265) --- Configure | 3 +- c6x/do_fips | 7 +++- c6x/fips_algvs.mak | 14 +++++++ c6x/fips_standalone_sha1 | 0 c6x/incore6x | 0 c6x/run6x | 0 c6x/run6x.js | 91 ++++++++++++++++++++++++++++++++++++++++ test/fips_algvs.c | 2 +- util/mk1mf.pl | 2 +- 9 files changed, 115 insertions(+), 4 deletions(-) mode change 100644 => 100755 c6x/do_fips create mode 100644 c6x/fips_algvs.mak mode change 100644 => 100755 c6x/fips_standalone_sha1 mode change 100644 => 100755 c6x/incore6x mode change 100644 => 100755 c6x/run6x create mode 100755 c6x/run6x.js diff --git a/Configure b/Configure index 84a2bc2d04..679252e415 100755 --- a/Configure +++ b/Configure @@ -636,13 +636,14 @@ my %table=( "uClinux-dist64","$ENV{'CC'}:\$(CFLAGS)::-D_REENTRANT::\$(LDFLAGS) \$(LDLIBS):SIXTY_FOUR_BIT_LONG:${no_asm}:$ENV{'LIBSSL_dlfcn'}:linux-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):$ENV{'RANLIB'}::", "c64xplus","cl6x:-mv6400+ -o2 -ox -ms -pden -DNO_SYS_TYPES_H -DGETPID_IS_MEANINGLESS -DMD32_REG_T=int -DOPENSSL_SMALL_FOOTPRINT:::DSPBIOS::BN_LLONG:c64xpluscpuid.o:bn-c64xplus.o c64xplus-gf2m.o::aes-c64xplus.o aes_cbc.o aes_ctr.o:::sha1-c64xplus.o sha256-c64xplus.o sha512-c64xplus.o:::::::ghash-c64xplus.o::void:", +"c64x","cl6x:-mv6400 -o2 -ox -ms -as -pden -DNO_SYS_TYPES_H -DGETPID_IS_MEANINGLESS -DMD32_REG_T=int -DOPENSSL_SMALL_FOOTPRINT:::DSPBIOS:::c64xcpuid.o:::aes-c64x.o aes_cbc.o aes_ctr.o:::sha1-c64x.o sha256-c64x.o sha512-c64x.o:::::::::void:", ); my @MK1MF_Builds=qw(VC-WIN64I VC-WIN64A debug-VC-WIN64I debug-VC-WIN64A VC-NT VC-CE VC-WIN32 debug-VC-WIN32 - BC-32 c64xplus + BC-32 c64xplus c64x netware-clib netware-clib-bsdsock netware-libc netware-libc-bsdsock); diff --git a/c6x/do_fips b/c6x/do_fips old mode 100644 new mode 100755 index c1c29fcf83..4045e605ce --- a/c6x/do_fips +++ b/c6x/do_fips @@ -1,6 +1,11 @@ #!/bin/sh -perl Configure c64xplus fipscanisteronly no-engine +if ! which cl6x > /dev/null 2>&1; then + echo 'fatal: cl6x is not on $PATH' + exit 1 +fi + +perl Configure ${C6XPLATFORM:-c64xplus} fipscanisteronly no-engine perl util/mkfiles.pl > MINFO perl util/mk1mf.pl auto > c6x/fips.mak make -f c6x/fips.mak diff --git a/c6x/fips_algvs.mak b/c6x/fips_algvs.mak new file mode 100644 index 0000000000..7f67927fbd --- /dev/null +++ b/c6x/fips_algvs.mak @@ -0,0 +1,14 @@ +CC=cl6x +CFLAGS=-mv$${C6XSILICON:-6400+} -o2 -I. -Ic6x/inc -Ifips -DNO_SYS_TYPES_H +OBJ_D=c6x/tmp +OUT_D=c6x + +all: $(OUT_D)/fips_algvs.out + +$(OBJ_D)/fips_algvs.obj: test/fips_algvs.c + $(CC) --obj_directory=$(OBJ_D) $(CFLAGS) -c $< + +$(OUT_D)/fips_algvs.out: $(OBJ_D)/fips_algvs.obj $(OUT_D)/fipscanister.obj c6x/fips_algvs.cmd + $(OUT_D)/fips_standalone_sha1 -verify $(OUT_D)/fipscanister.obj + $(CC) -z -o $@ -m $(OUT_D)/fips_algvs.map $< $(OUT_D)/fipscanister.obj c6x/fips_algvs.cmd + $(OUT_D)/incore6x $@ || rm $@ diff --git a/c6x/fips_standalone_sha1 b/c6x/fips_standalone_sha1 old mode 100644 new mode 100755 diff --git a/c6x/incore6x b/c6x/incore6x old mode 100644 new mode 100755 diff --git a/c6x/run6x b/c6x/run6x old mode 100644 new mode 100755 diff --git a/c6x/run6x.js b/c6x/run6x.js new file mode 100755 index 0000000000..6d94949751 --- /dev/null +++ b/c6x/run6x.js @@ -0,0 +1,91 @@ +#!/usr/bin/env dss.sh +// +// Debug Server Scripting C6x launcher. +// + +importPackage(Packages.com.ti.debug.engine.scripting); +importPackage(Packages.com.ti.ccstudio.scripting.environment); +importPackage(Packages.java.lang); + +if (arguments.length == 0) { + // Extract script name from eclipse + var regex = new RegExp("-dss\\.rhinoArgs\n(.*)"); + var matches = regex.exec(environment["eclipse.commands"]); + + System.err.println("Usage: " + matches[1] + " executable [args]"); + System.err.println(); + System.err.println("You're also required to set CCSTARGETCONFIG " + + "environment variable to appoint"); + System.err.println("proper .ccxml file, customarily one of " + + "$HOME/ti/CCSTargetConfigurations/*.ccxml"); + quit(1); +} + +try { + var prog = arguments[0]; + var script = ScriptingEnvironment.instance(); + + var debugServer = script.getServer("DebugServer.1"); + + // CCSTARGETCONFIG environment variable should point at proper .ccxml, + // customarily one of $HOME/ti/CCSTargetConfigurations/*.ccxml. + debugServer.setConfig(System.getenv("CCSTARGETCONFIG")); + + var debugSession = debugServer.openSession("*", "*"); + + // Redirect GEL output to |prog|.gel file, so that it doesn't clobber + // standard output from the program... + var dot = prog.lastIndexOf("."); + var gel_out = prog + ".gel"; + if (dot > 0) { + gel_out = prog.substr(0,dot) + ".gel"; + } + debugSession.expression.evaluate('GEL_EnableFileOutput("' + + gel_out + '", 0, 0)'); + + debugSession.target.connect(); + + // It should be noted that "current working directory" for program + // executed on the target system is one where |prog| resides, and + // not where script executed [as one would expect]... + debugSession.memory.loadProgram(prog, arguments); + + // Pull exit()'s address and set breakpoint, then just execute till + // it's reached... + var exitAddr = debugSession.symbol.getAddress("exit"); + debugSession.breakpoint.add(exitAddr); + + while (1) { + debugSession.target.run(); + + var PC = debugSession.expression.evaluate("PC"); + if (PC == exitAddr) { + break; + } + } + + // Snatch value passed to exit(), so that it can be passed down to + // shell as exit code from this script... + var exitCode = debugSession.expression.evaluate("A4"); + + // Last run to termination... + debugSession.target.run(); + // Clean up... + debugSession.terminate(); + debugServer.stop(); + + // It should be noted that there is kind of a bug in C6x run-time. + // Return value from main() is not passed to last implicit exit() + // call [as it would on other systems], but instead constant 1 is + // passed, which conventionally indicates an error. So that if one + // wants to pass specific exit code, or even 0 indicating "success", + // one has to call exit() explicitly instead of relying on value + // returned by main()... + quit(exitCode); + +} catch (e) { + // We catch everything, because default handler terminates script with + // "success" exit code upon exception... + System.err.println(e.rhinoException); + quit(139); +} diff --git a/test/fips_algvs.c b/test/fips_algvs.c index 8ff75dcd2e..2bfd213a0e 100644 --- a/test/fips_algvs.c +++ b/test/fips_algvs.c @@ -150,7 +150,7 @@ extern int fips_rsavtest_main(int argc, char **argv); extern int fips_shatest_main(int argc, char **argv); extern int fips_test_suite_main(int argc, char **argv); -#if !defined(_TMS320C6400_PLUS) +#if !defined(_TMS320C6400_PLUS) && !defined(_TMS320C6400) #include "fips_aesavs.c" #include "fips_cmactest.c" #include "fips_desmovs.c" diff --git a/util/mk1mf.pl b/util/mk1mf.pl index 8934ababa1..5c4c50ab35 100755 --- a/util/mk1mf.pl +++ b/util/mk1mf.pl @@ -249,7 +249,7 @@ elsif (($platform eq "netware-clib") || ($platform eq "netware-libc") || $BSDSOCK=1 if ($platform eq "netware-libc-bsdsock") || ($platform eq "netware-clib-bsdsock"); require 'netware.pl'; } -elsif ($platform eq "c64xplus") +elsif ($platform =~ /^c64x/) { require "TI_CGTOOLS.pl"; }