Greetings,

I'm trying debug problems associated with an App I'm writing.

The idea here is that the app is a calculation engine with a TCP
server
socket that communicates results to Tcl/TK GUI clients. The GUI can
also send commands
to the calculation engine. With one Tcl/Tk client everything works
fine, results are routinely
sent to the GUI, and commands can be sent from the GUI to the
calculation engine
requesting changes in aspects of the calculation.

The problems start when I have more than one client. The symptoms are
a bit screwey.

I can start off with one client, and execute commands from the GUI
and like I said above,
everything is fine. When I connect a second client, initially things
look good; as that client receives updates
from the calculation engine too. However, the weird thing is, I can
execute one command from the
first client which works, and then all subsequent commands seem to
vanish in the bit bucket somewhere.
I put trace code in the client and in the calc engine, client handling
threads. The client is sending the commands, and as far as the client
can tell the socket connection is still available. However, none of
the client threads
show the command coming across the socket, so, like I have no idea
what happened to it.

The main program starts off by creating a server socket and placing it
in its own thread:

servSock = CreateTCPServerSocket(echoServPort);
if (pthread_create(&threadID, NULL, Get_Client, (void *)
&servSock) != 0)
DieWithError("pthread_create() failed");

Where Get_Client creates a thread to handle the client.

The CreateTCPServerSocket function is:

#define MAXPENDING 5 /* Maximum outstanding connection requests */

int CreateTCPServerSocket(unsigned short port)
{
int sock; /* socket to create */
int sopt =1;
struct sockaddr_in echoServAddr; /* Local address */

/* Create socket for incoming connections */
if ((sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
DieWithError("socket() failed");

/* Construct local address structure */
memset(&echoServAddr, 0, sizeof(echoServAddr)); /* Zero out
structure */
echoServAddr.sin_family = AF_INET; /* Internet
address family */
echoServAddr.sin_addr.s_addr = htonl(INADDR_ANY); /* Any incoming
interface */
echoServAddr.sin_port = htons(port); /* Local port */

setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &sopt, sizeof(sopt));
/* Bind to the local address */
if (bind(sock, (struct sockaddr *) &echoServAddr,
sizeof(echoServAddr)) < 0)
DieWithError("bind() failed");

/* Mark the socket so it will listen for incoming connections */
if (listen(sock, MAXPENDING) < 0)
DieWithError("listen() failed");

return sock;
}


Then the calculation engine gets going. All of the comms back and
forth between the GUI client and the calculation engine is in the form
of simple strings. The GUI client send a command in the form of a
simple string and the client handler calls the appropriate function.
Similarly when the calc engine send results; those are strings sent
across to the GUI clients which then "interpret" the information in
the string.

If you have some ideas on how to debug issues like these, I'm all
ears.

I've given the code for Get_Client and other functions involved in the
comms between server and client below.

Yours,

Stuart


Get_Client is :

void *
Get_Client(void *Get_Client_Args)
{

int servSock;
int sock;
struct ThreadArgs *threadArgs; /* Pointer to argument
structure for
* thread */
pthread_t threadID[Max_Num_Clients]; /* Thread ID from
pthread_create() */
SockArg.num_clients = 0;

servSock = *((int *) Get_Client_Args);
for (; {
printf("Waiting for Client %d\n",
SockArg.num_clients);
sock = AcceptTCPConnection(servSock);
if (sock < 0) {
DieWithError("Accept() failed");
break;
}
if (SockArg.num_clients >= Max_Num_Clients ) {
printf("Number of Allowable Clients Exceeded\n
");
close(sock);
continue;
}
pthread_mutex_lock(&SockArg_mutex);
SockArg.clntSock[SockArg.num_clients] = sock;
printf("Have Client %d\n",
SockArg.num_clients);
printf("New Client SockNumber is %d\n",
SockArg.clntSock[SockArg.num_clients]);
if ((threadArgs = (struct ThreadArgs *)
malloc(sizeof(struct ThreadArgs))) == NULL)
DieWithError("malloc() failed");
threadArgs->clntSock = sock;
if
(pthread_create(&threadID[SockArg.num_clients], NULL, ThreadMain,
(void *) threadArgs) != 0)
DieWithError("pthread_create()
failed");
printf("With thread %ld\n", (long int)
threadID[SockArg.num_clients]);
SockArg.num_clients++;
pthread_mutex_unlock(&SockArg_mutex);
}
return (NULL);
}


ThreadMain simply calls the client handling function HandleTcpClient

void *
ThreadMain(void *threadArgs)
{
int clntSock; /* Socket descriptor for
client
* connection */

/* Guarantees that thread resources are deallocated upon
return */
pthread_detach(pthread_self());

/* Extract socket file descriptor from argument */
clntSock = ((struct ThreadArgs *) threadArgs)->clntSock;
free(threadArgs); /* Deallocate memory for argument */

HandleTCPClient(clntSock);

return (NULL);
}


and HandleTCPClient is:

#define RCVBUFSIZE 1024 /* Size of receive buffer */

#define Max_Num_Clients 12

struct ClientThreads {
int clntSock[Max_Num_Clients]; /* Socket
descriptor for client */
int num_clients;
} SockArg;


extern struct ClientThreads SockArg;
extern pthread_mutex_t SockArg_mutex;


extern int SEND_MAIL;


void HandleTCPClient(int clntSocket)
{
register int j;
char echoBuffer[RCVBUFSIZE]; /* Buffer for echo string */
static char strBuffer[RCVBUFSIZE];
int dummy;
float fdummy;
int recvMsgSize; /* Size of received message */
static FILE *fpin, *fpout;


fpin =fdopen(clntSocket,"r");
/*fpout =fdopen(clntSocket,"w");*/
/*setvbuf(fpout, strBuffer, _IOLBF, RCVBUFSIZE);*/
printf ("Fps %d \n", fpin);

/* Receive message from client */
printf("Have message from Client:\n");
for (; {
if (fgets(echoBuffer, RCVBUFSIZE, fpin) == NULL || !
strncmp(echoBuffer,"client_exit",4) ) {
printf("Exiting for Socket %d\n", clntSocket);
fflush(stdout);
pthread_mutex_lock(&SockArg_mutex);
for (j = 0; j < Max_Num_Clients; j++) {
printf("Searching for Socket %d with %d\n",
clntSocket, SockArg.clntSock[j]);
printf("Num Socks CLientH is %d\n",
SockArg.num_clients);
if (SockArg.clntSock[j] == clntSocket) {
printf("CLosing Socket %d \n", j);
SockArg.clntSock[j] = 0;
SockArg.num_clients--;
break;
}
}
condense_sock_array(clntSocket);
pthread_mutex_unlock(&SockArg_mutex);

close(clntSocket); /* Close client socket */
fclose(fpin);
break;
}
printf("Buffer :%s:\n", echoBuffer);
if (!strncmp(echoBuffer,"exit",4) ) {
PRINT();
channels_close();
exit(0);
} else if ( strstr(echoBuffer,"SEND_MAIL")) {
sscanf( echoBuffer,"%*s %d", &dummy);
SEND_MAIL == dummy;
} else if ( strstr(echoBuffer,"Rem_Data")) {
sscanf( echoBuffer,"%*s %d", &dummy);
Rem_Data(dummy);
} else if (strstr(echoBuffer,"PRINT")) {
PRINT();
} else if (strstr(echoBuffer,"WCOMF")) {
sscanf(echoBuffer, "%*s %f %d", &fdummy, &dummy);
printf(":%f %d:\n", fdummy, dummy);
wcomf( fdummy, dummy);
} else {
printf("Command :%s: not understood\n",echoBuffer);
}
fflush(stdout);
}
}
return;
}