I needed to kick off a bunch of background threads and receive
notification if one failed due to fatal error. I was hoping waitpid()
would work; but found out quickly that although each thread has a
pid_t; threads and processes were not interchangeable. Without a
timed pthread_join() over multiple threads (like WIN32
WaitForMultipleObjects), I formulated a seemingly dead simple
implementation using signals. I hacked out a quick prototype to
ensure I could get the signals to work as desired.

When I ran the prototype, I experienced behavior looking very bug-
like; but I wanted to post it here in case I am misunderstanding unix
signals and just doing something silly. I am requesting for comments
or [hopefully clever] suggestions... Code follows:


#include
#include
#include
#include
#include
#include
#include

siginfo_t signal_info;
pthread_t thread_main;
pthread_mutex_t mtx = PTHREAD_MUTEX_INITIALIZER;

_syscall0(pid_t,gettid)

void *fThrd( void *context )
{
union sigval val;
val.sival_int = gettid();
printf( "{%d} TID started OK\n", gettid() );
sleep( 5 );
printf( "{%d} TID exiting...\n", gettid() );

//xxpthread_mutex_lock( &mtx );
while( 0!=sigqueue( getpid(), SIGCHLD, val ) )
{
printf( "ERR=[%d]\n", errno );
if( EAGAIN!=errno )
break;
//xxusleep( 10000 );
}
//xxusleep( 10000 );
//xxpthread_mutex_unlock( &mtx );
};

void fSigChld( int signal, siginfo_t *pInfo, void* context ) {
printf( "{%d} SIGCHLD called...\n", gettid() );
if( getpid()!=gettid() || signal!=SIGCHLD )
return;
memcpy( &signal_info, pInfo, sizeof(siginfo_t) );
printf( "{%d} SIGCHLD executed OK\n", gettid() ); }

int main( int argc, char* argv[] )
{
pthread_t hT = 0;
struct sigaction act;
int rc = -1,i;
const pid_t ppid = getppid();

thread_main = pthread_self();

memset( &signal_info, 0x00, sizeof(signal_info) );
printf( "{%d} Parent PID={%d}\n", gettid(), ppid );

for( i=0;i<5;i++ )
rc = pthread_create( &hT, NULL, &fThrd, NULL );

memset( &act, 0x00, sizeof(act) );
act.sa_flags = SA_SIGINFO;
act.sa_sigaction = &fSigChld;
rc = sigaction( SIGCHLD, &act, NULL );

for( ;; )
{
printf( "{%d} PID sleeping...\n", gettid() );
sleep(11);
if( 0!=signal_info.si_pid )
printf( " SIGNAL SRC={%d} OK\n",
signal_info.si_value.sival_int);
memset( &signal_info, 0x00, sizeof(signal_info) );
};

printf( "{%d} PID exiting...\n", gettid() );
return rc;
};


When I execute it in 2.6.9-42.EL, I get the following output:
{10321} Parent PID={10235}
{10321} PID sleeping...
{10322} TID started OK
{10323} TID started OK
{10324} TID started OK
{10325} TID started OK
{10326} TID started OK
{10322} TID exiting...
{10323} TID exiting...
{10324} TID exiting...
{10325} TID exiting...
{10326} TID exiting...
{10321} SIGCHLD called...
{10321} SIGCHLD executed OK
SIGNAL SRC={10322} OK
{10321} PID sleeping...
{10321} PID sleeping...

As you can see, 4/5 of the signals are lost.

When I uncomment the other lines above, (in fThrd); the output looks
better:
{10355} Parent PID={10235}
{10355} PID sleeping...
{10356} TID started OK
{10357} TID started OK
{10358} TID started OK
{10359} TID started OK
{10360} TID started OK
{10356} TID exiting...
{10357} TID exiting...
{10358} TID exiting...
{10359} TID exiting...
{10360} TID exiting...
{10355} SIGCHLD called...
{10355} SIGCHLD executed OK
SIGNAL SRC={10356} OK
{10355} PID sleeping...
{10355} SIGCHLD called...
{10355} SIGCHLD executed OK
SIGNAL SRC={10357} OK
{10355} PID sleeping...
{10355} SIGCHLD called...
{10355} SIGCHLD executed OK
SIGNAL SRC={10358} OK
{10355} PID sleeping...
{10355} SIGCHLD called...
{10355} SIGCHLD executed OK
SIGNAL SRC={10359} OK
{10355} PID sleeping...
{10355} SIGCHLD called...
{10355} SIGCHLD executed OK
SIGNAL SRC={10360} OK
{10355} PID sleeping...
{10355} PID sleeping...

James
Beverly, MA USA