I’m trying to write a SHA-2 implementation, but the result keeps coming back incorrect-

Question

0

Asked: June 5, 20262026-06-05T04:25:21+00:00 2026-06-05T04:25:21+00:00

I’m trying to write a SHA-2 implementation, but the result keeps coming back incorrect-

0

I’m trying to write a SHA-2 implementation, but the result keeps coming back incorrect- I’ve tested on such things as the empty string. I implemented it in two steps, preprocessing and primary body.

template<typename T> struct Output {
    std::array<T, 8> h;
};
template<typename T> struct Input {
    std::array<T, 16> c;
};
template<typename T> Output<T> sha2(Input<T> in) {
    T w[64];
    for(int i = 0; i < 16; i++)
        w[i] = in.c[i];

    for(int i = 16; i < 64; i++) {
        auto s0 = _rotr(w[i - 15], 7) ^ _rotr(w[i - 15], 18) ^ (w[i - 15] >> 3);

        auto s1 = _rotr(w[i - 2], 17) ^ _rotr(w[i - 2], 19) ^ (w[i - 2] >> 10);
        w[i] = w[i - 16] + s0 + w[i - 7] + s1;        
    }

    static const T k[] = {    
        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 
        0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
        0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 
        0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
        0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 
        0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 
        0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
        0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 
        0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
        0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 
        0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 
        0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 
        0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
    };

    static const T h[] = {
        0x6a09e667,
        0xbb67ae85,
        0x3c6ef372,
        0xa54ff53a,
        0x510e527f,
        0x9b05688c,
        0x1f83d9ab,
        0x5be0cd19
    };

    T loopvars[8];
    for(int i = 0; i < 8; i++)
        loopvars[i] = h[i];

    for(int i = 0; i < 64; i++) {
        auto&& la = loopvars[0];
        auto&& lb = loopvars[1];
        auto&& lc = loopvars[2];
        auto&& ld = loopvars[3];
        auto&& le = loopvars[4];
        auto&& lf = loopvars[5];
        auto&& lg = loopvars[6];
        auto&& lh = loopvars[7];

        auto s0 = _rotr(la, 2) ^ _rotr(la, 13) ^ _rotr(la, 22);
        auto maj = (la & lb) ^ (la & lc) ^ (lb & lc);
        auto t2 = s0 + maj;

        auto s1 = _rotr(le, 6) ^ _rotr(le, 11) ^ _rotr(le, 25);
        auto ch = (le & lf) ^ ((~le) & lg);
        auto t1 = lh + s1 + ch + k[i] + w[i];

        lh = lg;
        lg = lf;
        lf = le;
        le = ld + t1;
        ld = lc;
        lc = lb;
        lb = la;
        la = t1 + t2;
    }
    Output<T> output;
    for(int i = 0; i < 8; i++) {
        output.h[i] = h[i] + loopvars[i];
    }
    return output;
}
Output<unsigned int> SHA2(std::vector<char> bytes) {
    auto bitlen = bytes.size() * 8;
    auto big_endian_bitlen = ::_byteswap_uint64(bitlen);
    if (bitlen > 440)
        throw std::runtime_error("Epic fail!");
    Input<unsigned int> in;
    for(int i = 0; i < 16; i++) {
        in.c[i] = 0;
    }
    memcpy(&in.c[0], &bytes.front(), bytes.size());
    in.c[bitlen / 32] |= (1 << (bitlen % 32));
    // all zero by default, so no need to append the extra bits
    in.c[14] = (big_endian_bitlen >> 32);
    in.c[15] = big_endian_bitlen;
    return sha2(in);
}

I suspect endianness error. For example, when I listed the input of the primary body, it came back as 1 .. (511x)0, which I’m pretty sure was correct. But when I tried swapping the values to respect endianness, I still did not get the correct output.

I’m fairly sure that the error is in the preprocessing step, as the primary body is endianness-independent, as far as I can tell.

Any suggestions as to where the implementation is incorrect?

Edit: Oh yes, _byteswap_uint64 is an MSVC intrinsic for endianness conversion of 64bit unsigned integer, and _rotr right rotates 32bit unsigned integers. For GCC, you just use macro or define them as functions to the equivalent GCC intrinsics.

Just for reference, the incorrect output is

de5c4195
c21e7e70
e6a365c2
77f6bc03
f651e23a
6fb9b88a
1decb688
d6fddf1f

whereas the correct output is

e3b0c442
98fc1c14
9afbf4c8
996fb924
27ae41e4
649b934c
a495991b
7852b855

Report

Leave an answer
Cancel reply

You must login to add an answer.

Need An Account,

1 Answer

Editorial Team · Answer 1 · 2026-06-05T04:25:23+00:00

Perhaps some extracts from a working implementation would be helpful — especially since it emphasizes following the FIPS description quite closely rather than any mundane considerations like efficiency. 🙂 Probably the biggest deviation is using temp[0]…temp[7] for what the FIPS calls a, b, …h and temp[8] and temp[9] for T₁ and T₂.

namespace {
    uint32_t word(int a, int b, int c, int d) {
        a &= 0xff;
        b &= 0xff;
        c &= 0xff;
        d &= 0xff;
        int val =  a << 24 | b << 16 | c << 8 | d;
        return val;
    }

    uint32_t ROTR(uint32_t number, unsigned bits) { 
        return (number >> bits) | (number << (32-bits));
    }

    uint32_t f1(uint32_t x, uint32_t y, uint32_t z) { 
        return (x & y) ^ (~x & z);
    }
    uint32_t f2(uint32_t x, uint32_t y, uint32_t z) { 
        return (x & y) ^ (x&z) ^ (y&z);
    }
    uint32_t f3(uint32_t x) { 
        return ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22);  
    }
    uint32_t f4(uint32_t x) { 
        return ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25);
    }
    uint32_t f5(uint32_t x) { 
        return ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3);
    }
    uint32_t f6(uint32_t x) { 
        return ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10);
    }

    uint32_t add(uint32_t a, uint32_t b) {
        return a+b;
    }
}

sha256::sha256() : H(hash_size), W(64), temp(10) { 
    static const uint32_t H0[hash_size] = {
        0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 
        0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
    };

    std::copy(H0, H0+hash_size, H.begin());
}

void sha256::hash_block(std::vector<uint32_t> const &block) {
    static const uint32_t K[] = {
        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
        0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 
        0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
        0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
        0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
        0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
        0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
        0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
        0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
        0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
        0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 
        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
        0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
        0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
    };

    assert(block.size() == 16);

    std::copy(block.begin(), block.end(), W.begin());
    for (int t=16; t<64; ++t) 
        W[t] = f6(W[t-2]) + W[t-7] + f5(W[t-15]) + W[t-16];
    std::copy(H.begin(), H.end(), temp.begin());

    for (int t=0; t<64; ++t) {
        temp[8] = temp[7]+f4(temp[4]) + f1(temp[4],temp[5],temp[6])+K[t]+W[t];
        temp[9] = f3(temp[0]) + f2(temp[0], temp[1], temp[2]);
        temp[7] = temp[6];
        temp[6] = temp[5];
        temp[5] = temp[4];
        temp[4] = temp[3] + temp[8];
        temp[3] = temp[2];
        temp[2] = temp[1];
        temp[1] = temp[0];
        temp[0] = temp[8] + temp[9];
    }
    std::transform(H.begin(), H.end(), temp.begin(), H.begin(), add);
}

I’d tend to agree with your guess though: I’d suspect the padding routine. At least in my experience, the padding is harder to get correct than the hash routine itself (partly because it isn’t described as carefully).

Sign Up

Sign In

Forgot Password

The Archive Base Latest Questions

I’m trying to write a SHA-2 implementation, but the result keeps coming back incorrect-

Leave an answerCancel reply

1 Answer

Leave an answer
Cancel reply