Here’s the code which determines whether a matrix has orthogonal columns or not . The code is running properly when I have n which is divisible by 4, but when n is not divisible by 4, the program unexpectedly stops (mentioned position in the code)
for(i=0;i<n-1;i++)
{
for(j=i+1;j<n;j++)
{
sum = 0;
for(k=0;k<n-4;k+=4)
{
X=_mm_load_ps(&D[n*i+k]);
Y=_mm_load_ps(&D[n*j+k]);
printf("fff"); //not printing , program stops here
acc = _mm_add_ps(acc,_mm_mul_ps(X,Y));
}
_mm_store_ps(&temp[0],acc);
sum = temp[0]+temp[1]+temp[2]+temp[3];
for(;k<n;k++){
sum = sum + D[i*n+k]*D[j*n+k];
}
if(sum ==0)
return 1;
}
}
return 0;
}
What could be the possible reason for that? How do you handle arrays with size not divisible by 4?
Your code that determines when to stop early is not correct in the
for(k...)loop. Try something like this: