I’m working on optimizing an application . I found that i need to optimize an inner loop for improved performance.
rgiFilter is a 16 bit arrary.
for (i = 0; i < iLen; i++) {
iPredErr = (I32)*rgiResidue;
rgiFilter = rgiFilterBuf;
rgiPrevVal = rgiPrevValRdBuf + iRecent;
rgiUpdate = rgiUpdateRdBuf + iRecent;
iPred = iScalingOffset;
for (j = 0; j < iOrder_Div_8; j++) {
iPred += (I32) rgiFilter[0] * rgiPrevVal[0];
rgiFilter[0] += rgiUpdate[0];
iPred += (I32) rgiFilter[1] * rgiPrevVal[1];
rgiFilter[1] += rgiUpdate[1];
iPred += (I32) rgiFilter[2] * rgiPrevVal[2];
rgiFilter[2] += rgiUpdate[2];
iPred += (I32) rgiFilter[3] * rgiPrevVal[3];
rgiFilter[3] += rgiUpdate[3];
iPred += (I32) rgiFilter[4] * rgiPrevVal[4];
rgiFilter[4] += rgiUpdate[4];
iPred += (I32) rgiFilter[5] * rgiPrevVal[5];
rgiFilter[5] += rgiUpdate[5];
iPred += (I32) rgiFilter[6] * rgiPrevVal[6];
rgiFilter[6] += rgiUpdate[6];
iPred += (I32) rgiFilter[7] * rgiPrevVal[7];
rgiFilter[7] += rgiUpdate[7];
rgiFilter += 8;
rgiPrevVal += 8;
rgiUpdate += 8;
}
ode here
Your only bet is to do more than one operation at a time, and that means one of these 3 options: