I have a unit test that checks behavior on blocking and non-blocking sockets – the server writes a long response and at some point it should not be able to write any more and it
blocks on write.
Basically one side writes and other side does not reads.
Under Solaris at some point I get a error “Not enough space” (after writing 75MB) instead of blocking on write:
Program that reproduces the problem:
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <signal.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <netinet/in.h>
char const *address = "127.0.0.1";
#define check(x) do { if( (x) < 0) { perror(#x) ; exit(1); } } while(0)
int main()
{
signal(SIGPIPE,SIG_IGN);
struct sockaddr_in inaddr = {};
inaddr.sin_family = AF_INET;
inaddr.sin_addr.s_addr = inet_addr(address);
inaddr.sin_port = htons(8080);
int res = fork();
if(res < 0) {
perror("fork");
exit(1);
}
if(res > 0) {
int fd = -1;
int status;
sleep(1);
check(fd = socket(AF_INET,SOCK_STREAM,0));
check(connect(fd,(sockaddr*)&inaddr,sizeof(inaddr)));
sleep(5);
close(fd);
wait(&status);
return 0;
}
else {
int acc,fd;
check(acc = socket(AF_INET,SOCK_STREAM,0));
int yes = 1;
check(setsockopt(acc,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(yes)));
check(bind(acc,(sockaddr*)&inaddr,sizeof(inaddr)));
check(listen(acc,10));
check(fd = accept(acc,0,0));
char buf[1000];
long long total= 0;
do {
int r = send(fd,buf,sizeof(buf),0);
if(r < 0) {
printf("write %s\n",strerror(errno));
return 0;
}
else if(r==0) {
printf("Got eof\n");
return 0;
}
total += r;
if(total > 100*1024*1024) {
printf("Too much!!!!\n");
return 0;
}
printf("%lld\n",total);
}while(1);
}
return 0;
}
The output on Solaris (last two lines)
75768000
write Not enough space
The expected output on Linux (last two lines)
271760
write Connection reset by peer
Which happens only when the other side closes the socket.
Any ideas why and how can I fix it, what options to set?
P.S.: It is OpenSolaris 2009.06, x86
Edits
- Added full C code that reproduces the problem
Answer:
This seems like a bug in specific version of Solaris kernel, libc library.
From OpenSolaris source code, I’m afraid the SO_SNDTIMEO option is unsupported: https://hg.java.net/hg/solaris~on-src/file/tip/usr/src/uts/common/inet/sockmods/socksctp.c#l1233