Hello;

For the below program, I am trying to create a procnanny program which will monitor the processes according to the specification said in a configuration file. The parent process will create fork() child processes each for monitoring a process as specified in the configuration file.
The configuration file is like this -

a.out 5
elm 34

For the above case, if 2 a.out and 1 elm is existing, 3 child procnanny will be creatd. Each child procnanny will then go to sleep for specified time (5 secs for a.out and 34 secs for elm). It will wake up, see if the process is still existing. If yes, it will kill the process and wait for further signal form the parent.

Sending SIGHUP to the parent will cause the parent to reread the configuration file. The parent will check the child processes if it is busy. If not, it will assign process to the procnanny as specified in the new configuration file.
In my program, when the first time a child process is created, it monitors child processes and send "Killed" message to parent. But when the configuration file is reread, when the parent send the new PID to the process, the child receives it. It also successfully kills the process after waking up (see monitorProcess). Then it sends "Killed" message to parent. But for some strange reason, parent does not get it. But when a new chils process is again created (by runChildProcess()), the parent gets the messages fine, but once the monitorProcess() is run not for the first time [processInfo.firstTime =0], the parent does not get any. COuld anybody help on this?

#include<dirent.h>
#include<errno.h>
#include<sys/stat.h>
#include<signal.h>
#include<fcntl.h>
#include<setjmp.h>
#include<stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#include <sys/wait.h>

#include "memwatch.h"
#include "logger.h"

#define MAX_PROC_COUNT 128
#define MAX_LINE_SIZE 1024

typedef struct
{
    pid_t child_pid;            //child's own PID
    pid_t process_pid;              //the PID of the montiroed process
    char process_name[MAX_LINE_SIZE];      //the name of the monitored process
    int p2c_pipe[2];            //pipe for parent to child communication.
    int c2p_pipe[2];            //pipe for child to parent communication.
    int proc_killed;            //number of processes killed
    int alive_time;                //alive tim for the given process
    int isBusy;                //indicates whether the child is busy or not
    int firstTime;                //used to indicate whether the child is monitoring for the first time
} processInfo;

processInfo processes[MAX_PROC_COUNT];
int childCount = 0; /*Number of child process (i.e., number of valid entries in the array above)*/


/*Some helper variables*/
FILE * grepFile;
char grepLine [MAX_LINE_SIZE];
char logfileMessage[MAX_LINE_SIZE];
char bash_command[MAX_LINE_SIZE];
char * config_file_name;

int reloaded = 0;                //flag to indicate whether SIGHUP has been received
sigjmp_buf env;
//sigjmp_buf env1;  //needed to handle "-1" message from parent

int indexChild;                    //used to know the index of the child process
fd_set read_from_child;                //the read file descriptor set for parent


void cleanUpProcnanny();
void readConfigFile(char* configFileName);
void runChildProcess(int childCount, int monitoredPID, int alive_time);
void waitForChildren();
void cleanExit(int status);
void SIGINT_handler(int sig_num);
void SIGHUP_handler(int sig_num);
void reload_config_file(char* configFileName);
void monitor5Seconds();
void monitorProcess(int child_proc_count);

int number_of_process_killed=0;          //stores the total number fo proces killed.
pid_t own_id;                     /*The parent process' own pid*/
int number_of_free_children = 0;         //used to keep info about the number of free children
//int monitoredPIDs[400];                //buffer that holds motiored PIDs as specified in the last read configuration
//int number_of_monitored_pids = 0;        //number of monitored PIDs

FILE* config_file;                //variables
char configLine[MAX_LINE_SIZE];            //needed
char proc_name[100];                //while 
char alive_time[10];                //reading the
int alive_time_int = 0;                //config file
  

int main(int argc, char * argv[])
{
    int ret2, ret3;
    if(argc!=2)
    {
        printf("usage: ./procnanny <path to configuration file> \n");
        exit(0);
    }
    
    signal(SIGHUP, SIGHUP_handler);
    signal(SIGINT, SIGINT_handler);
    /*Start the logging process*/
    initLogger();
    
    /*Clean up previous procnanny processes*/
    cleanUpProcnanny();
    
    /*Read the config file*/
    readConfigFile(argv[1]);

    config_file_name = argv[1];

    sigsetjmp(env, 1);
    if(reloaded == 1)
    {
        //only parent reloads the configuration file
        if(own_id == getpid())
        {
            reload_config_file(config_file_name);
            reloaded = 0;
        }
    }
    if(own_id == getpid())
    {
        while(1)
        {
            sleep(5);
            monitor5Seconds();
            //also listen for child's messages
            int maxdesc = getdtablesize();
            struct timeval tv; 
            tv.tv_sec = 0;
            ret2 = select(maxdesc, &read_from_child, NULL, NULL, &tv);
            //printf("%d FDs ready \n ", ret);
            
            
            if (ret2 < 0) //error
            {
                printf("Got a ha ha : \n");
                perror("select");
                 continue;
            }
            
            else
            {
                //got a message from child
                int i = 0;
                char buf[30];
                for (i = 0; i < childCount; i++)
                {
                    if (FD_ISSET(processes[i].c2p_pipe[0], &read_from_child)) {
                        memset(buf, 0, MAX_LINE_SIZE);
                        ret3 = read(processes[i].c2p_pipe[0], &buf, sizeof(buf));
                        if (ret3 <= 0){printf("return error \n");}
                        else
                        {
                            printf("Child %d Sent %s\n",i,buf);
                            processes[i].isBusy = 0;        //the child becomes free
                            number_of_free_children++;        //increment the number of free children
                            processes[i].process_pid =  0;        //clear the monitored PID of that process
                        }//end of else
                    }//end of if (FD_ISSET(processes[childIndex].p2c_pipe[0], &read_from))
                }//end of for
            }//end of else
        }
    }
    return 0;
}//end of main

void cleanUpProcnanny(){
    pid_t process_pid; /*The pid of a previous procnanny processes*/
    
    grepFile = popen("ps -eo pid,comm | grep  ./procnanny","r");

    if (grepFile == NULL){                            //popen error
        perror("popen");
        printLog(LOG_ERROR, "Could not check for previous procnanny processes. Pipe open failed!\n");
    }
    else                                        //success in popen
    {
        own_id = getpid();
        
        while ( fgets(grepLine, MAX_LINE_SIZE, grepFile) )
        {
            sscanf (grepLine,"%d", &process_pid);
            //To make sure it does not kill itself
            if (own_id!=process_pid)
            {
                //kill the process
                sprintf( bash_command, "kill %d", (int)process_pid);
                if ( system(bash_command) < 0) //kill failed
                {
                      perror("system() error");
                      sprintf(logfileMessage, "failed to kill previous procnanny process with PID %d\n",process_pid);
                      printLog(LOG_ERROR, logfileMessage);
                }
                else //kill succeeded
                {
                    sprintf(logfileMessage, "previous procnanny process PID %d killed\n",process_pid);
                    printLog(LOG_INFO, logfileMessage);
                }
            }//end of if
        }//end of while
    }//end of else and end of checking whether procnanny exists
    pclose(grepFile); // Done processing the output of ps -eo pid,comm | grep 
}

void readConfigFile(char* configFileName){
    
    
    FD_ZERO(&read_from_child);

    int procExists; /*Checks whether there has been an instance of a specific process running*/
    
    pid_t child_pid;
    pid_t proc_pid;
    
    config_file = fopen( configFileName, "r" );

    if ( config_file == NULL ) // fopen failed
    {
        printf( "Could not open configurationfile\n" );
        cleanExit(0);
    }
    
    //get the name of the processes and their alive time as mentioned in the conf file
    while ( fgets(configLine, MAX_LINE_SIZE, config_file) )
    {
        //configLine[strlen(configLine)-1]='\0'; /*remove the \n at the end*/

        //extract the process name and alive_time from the line
        sscanf(configLine,"%s %d",proc_name, &alive_time_int);
        printf ("Alive time = %d \n", alive_time_int);
        
        //convert the alive_time array to alive_time_int
        //alive_time_int = atoi(alive_time);

        /*Prepare and run the grep command for the current line of config file*/
        sprintf(bash_command,"ps -eo pid,comm | grep  %s", proc_name);
        grepFile = popen(bash_command , "r");
        
        if(grepFile==NULL)
        {
            perror("popen");
            sprintf(logfileMessage, "Failed to check for processes running as %s", proc_name);
            printLog(LOG_ERROR, logfileMessage);
            cleanExit(0);
        }
        
        procExists=0;
        
        //initialize array to store the currently monitored PIDs
        //monitoredPIDs = (int *)malloc(sizeof(int)*200);
        //int index_in_monitored_PIDs = 0;  //used to store PIDs in the buffer monitoredPIDs
        
        //iterating through the output of ps aux|grep x(the line read from config file)
        while (fgets(grepLine, MAX_LINE_SIZE, grepFile))
        {
            procExists=1; /*There has been at least one process with this name*/
            sscanf (grepLine,"%d", &proc_pid);
            
            //monitoredPIDs[index_in_monitored_PIDs] = proc_pid;
            //index_in_monitored_PIDs++;
            processes[childCount].process_pid = proc_pid;
            strcpy(processes[childCount].process_name, proc_name);
            processes[childCount].alive_time = alive_time_int;

            //create the pipes for communication
            pipe(processes[childCount].p2c_pipe);
            pipe(processes[childCount].c2p_pipe);
            
            //create a child
            child_pid=fork();
            if ((child_pid) < 0) {
                perror("fork");
                cleanExit(0);
            } 
            else 
            {
                processes[childCount].child_pid = child_pid;
                processes[childCount].isBusy = 1;
                processes[childCount].firstTime = 1;

                //if parent, close the unused end of the pipes
                if (child_pid != 0)
                {
                    close(processes[childCount].p2c_pipe[0]);
                    close(processes[childCount].c2p_pipe[1]);
                    //add the read end of the pipe to fd_set
                    FD_SET (processes[childCount].c2p_pipe[0], &read_from_child);
                }
                if (child_pid == 0) {
                    //child creation successful
                    //close unused end of the pipes
                    close(processes[childCount].p2c_pipe[1]);
                    close(processes[childCount].c2p_pipe[0]);
                    //write(processes[child_proc_count].p2c_pipe[1], proc_pid , 10);
                    runChildProcess(childCount, proc_pid, alive_time_int);
                    //cleanExit(procKilled); //return whether the process has been killed
                }//end of child
            }
            childCount++;
        }//end of while
        if(procExists == 0)
        {
            sprintf(logfileMessage, "No Process with name '%s'.\n",proc_name);
            printLog(LOG_INFO, logfileMessage);
        }
        
    }//end of outer while
    
    fclose(config_file);
}
void reload_config_file(char* configFileName)
{
    printf("Reread successful  \n");

    //int procKilled; /*Return value from a child process, giving the number of processes it has killed, or 0 if nothing has been killed*/
    int procExists; /*Checks whether there has been an instance of a specific process running*/
    
    pid_t child_pid;
    pid_t proc_pid;

    //open the configuration file again
    config_file = fopen( configFileName, "r" );

    if ( config_file == NULL ) // fopen failed
    {
        printf( "Could not open configurationfile\n" );
        cleanExit(0);
    }    
    //go through the config file
    //get the name of the processes and their alive time as mentioned in the conf file
    
    while ( fgets(configLine, MAX_LINE_SIZE, config_file) )
    {
        //configLine[strlen(configLine)-1]='\0'; /*remove the \n at the end*/

        //extract the process name and alive_time from the line
        sscanf(configLine,"%s %d",proc_name, &alive_time_int);
        printf ("Alive time = %d \n", alive_time_int);
        

        /*Prepare and run the grep command for the current line of config file*/
        sprintf(bash_command,"ps -eo pid,comm | grep  %s", proc_name);
        grepFile = popen(bash_command , "r");
        
        if(grepFile==NULL)
        {
            perror("popen");
            sprintf(logfileMessage, "Failed to check for processes running as %s", proc_name);
            printLog(LOG_ERROR, logfileMessage);
            cleanExit(0);
        }
        
        //clear the buffer array that stored the previously monitored PIDs
        //free(monitoredPIDs);

        //reinitialize array to store the currently monitored PIDs
        //monitoredPIDs = (int *)malloc(sizeof(int)*200);

        //int index_in_monitored_PIDs = 0;  //used to store PIDs in the new buffer monitoredPIDs
        
        //iterating through the output of ps aux|grep x(the line read from config file)
        while (fgets(grepLine, MAX_LINE_SIZE, grepFile))
        {
            procExists=1; /*There has been at least one process with this name*/
            sscanf (grepLine,"%d", &proc_pid);
            
            //check if there is any free children.  If yes, assign the PID to them
            //int j = 0;
            int k = 0;
            
            //int assigned = 0;    //flag to know that process has been assigned so that go to the next iteration and 
                        //assign another PID
            printf ("%d \n", number_of_free_children); 

            if(number_of_free_children > 0)
            {
                //find the child who is free
                for (k = 0; k < childCount; k++)
                {
                    if (processes[k].isBusy == 0)
                    {
                        //free child
                        //assign the PID and the process name to the child
                        processes[k].process_pid = proc_pid;
                        strcpy(processes[k].process_name, proc_name);
                        processes[k].alive_time = alive_time_int;
                        processes[k].isBusy = 1;

                        //write the process ID and the time to monitor in the pipe
                        printf("Writing to child again..\n");
                        write(processes[k].p2c_pipe[1], &proc_pid, sizeof(proc_pid));
                        write(processes[k].p2c_pipe[1], &alive_time_int, sizeof(alive_time_int));
                        write(processes[k].p2c_pipe[1], proc_name, strlen(proc_name));
                        

                        //add this PID to the monitored PID buffer
                        //monitoredPIDs[index_in_monitored_PIDs] = proc_pid;
                        //index_in_monitored_PIDs++;
                        
                        //we have lost one free children
                        number_of_free_children--;
                        break;
                    }//end of if
                }//end of for
                
            }
            else
            {
                //create a new child process
                processes[childCount].process_pid = proc_pid;
                strcpy(processes[childCount].process_name, proc_name);
                processes[childCount].alive_time = alive_time_int;
                
                //create the pipes for communication
                pipe(processes[childCount].p2c_pipe);
                pipe(processes[childCount].c2p_pipe);
            
                //create a child
                child_pid=fork();
                if ((child_pid) < 0) {
                    perror("fork");
                    cleanExit(0);
                } 
                else 
                {
                    processes[childCount].child_pid = child_pid;
                    processes[childCount].isBusy = 1;
                    processes[childCount].firstTime = 1;

                    //if parent, close the unused end of the pipes
                    if (child_pid != 0)
                    {
                        close(processes[childCount].p2c_pipe[0]);
                        close(processes[childCount].c2p_pipe[1]);
                        //add the read end of the pipe to fd_set
                        FD_SET (processes[childCount].c2p_pipe[0], &read_from_child);
                    }
                    if (child_pid == 0) {
                        //child creation successful
                        //close unused end of the pipes
                        close(processes[childCount].p2c_pipe[1]);
                        close(processes[childCount].c2p_pipe[0]);
                        //write(processes[child_proc_count].p2c_pipe[1], proc_pid , 10);
                        runChildProcess(childCount, proc_pid, alive_time_int);
                    }//end of child
                }//end of else
                childCount++;
            }//end of else (creating a new child)        
        }//end of while
        if(procExists == 0)
        {
            sprintf(logfileMessage, "No Process with name '%s'.\n",proc_name);
            printLog(LOG_INFO, logfileMessage);
        }
        
    }//end of outer while
    fclose(config_file);
}
void runChildProcess(int childIndex, int monitoredPID, int alive_time){

//only the children execute the following code
if(getpid() != own_id)
{
        
    printf("In Child %d\n", childIndex);
    indexChild = childIndex;

    fd_set read_from;
    FD_ZERO(&read_from);
    //set the parent to child read end of file
    FD_SET (processes[childIndex].p2c_pipe[0], &read_from);

    int ret = 0;
    char buf[MAX_LINE_SIZE];
    char buf2[MAX_LINE_SIZE];
    int maxdesc = getdtablesize();
    pid_t pid_given;

    if (processes[childIndex].firstTime ==1)
    {
        monitorProcess(childIndex);
        processes[indexChild].firstTime = 0;    
    }

    //wait in infinite while loop for instruction from parent
    //if instuction is a PID, monitor that process
    //if the instruction is PID = -1, exit    
    while(1)
    {
        //sigsetjmp(env1, 1);
        ret = select(maxdesc, &read_from, NULL, NULL, NULL);

        if (ret < 0)
        {
            printf("Got a ha ha : \n");
            perror("select");
             continue;
        }
        else
        {
            //got a message from parent
            if (FD_ISSET(processes[childIndex].p2c_pipe[0], &read_from)) {
                memset(buf, 0, MAX_LINE_SIZE);
                
                ret = read(processes[childIndex].p2c_pipe[0], &pid_given, sizeof(pid_given));
                if (ret <= 0)continue;
                else
                {
                    //check whether a process ID is given or -1
                    printf("Parent sent: %s \n", buf);
                    //convert the message sent by parent to a pid_t type
                    // = atoi(buf);
                    if (pid_given == -1)
                    {
                        //it's time to quit
                        printf("Parent sent -1 \n");
                        exit(processes[indexChild].proc_killed);
                    }
                    else
                    //monitor that process
                    {
                        //get the alive time from parent
                        printf("Parent said to continue \n");
                        int new_alive_time;
                        memset(buf2, 0, MAX_LINE_SIZE);
                        ret = read(processes[childIndex].p2c_pipe[0], &new_alive_time, sizeof(new_alive_time));
                        ret = read(processes[childIndex].p2c_pipe[0], &buf2, sizeof(buf2));
                        // = atoi(buf1);
                        printf("New alive time %d \n", new_alive_time);
                        printf("New process name %s \n", buf2);
                        processes[indexChild].process_pid = pid_given;
                        processes[indexChild].alive_time = new_alive_time;
                        monitorProcess(childIndex);
                    }
                }//end of else
            }//end of if (FD_ISSET(processes[childIndex].p2c_pipe[0], &read_from))
        }//end of else
    }//end of main while loop in child
}//end of if (own_id != getpid())
}
void monitorProcess(int child_proc_count)
{
        //firsttime monitor the assigned process
        pid_t proc_id;
        sprintf(logfileMessage, "Initializing monitoring of process '%s' PID[%d]\n",
        processes[child_proc_count].process_name,processes[child_proc_count].process_pid);
        printLog(LOG_INFO, logfileMessage);
    
        //go to sleep for the time specified in logfile                            
        sleep(processes[child_proc_count].alive_time);
            
        //wake up and check for still alive processes
        sprintf(bash_command, "ps -eo pid,comm | grep %s", processes[child_proc_count].process_name);
                
        FILE * grepFile = popen(bash_command , "r");
        
        while(fgets(grepLine, MAX_LINE_SIZE, grepFile)) 
        {
            sscanf (grepLine,"%d", &proc_id);
            if(processes[child_proc_count].process_pid == proc_id) //process with the pid still exists
            {
                sprintf (bash_command, "kill %d", processes[child_proc_count].process_pid);
                if(system(bash_command) < 0)
                {
                    perror("system");
                    sprintf(logfileMessage, "Failed to kill process with PID %d (%s). \n", 
                    processes[child_proc_count].process_pid, 
                        processes[child_proc_count].process_name);
                    printLog(LOG_ERROR, logfileMessage);
                }
                else //killing successful
                {
                    processes[child_proc_count].proc_killed++;
                    //printf("Processes killed %d\n",processes[child_proc_count].proc_killed);
                    number_of_process_killed++;
                    char buf1[20]="Killed";
                    printf("Child %d writing to parent ..\n",child_proc_count);
                    write(processes[child_proc_count].c2p_pipe[1], buf1, strlen(buf1));
                }
            }//end of if
        }//end of while
}
void waitForChildren(){

//only parent is allowed to execute the functionality
if (getpid() == own_id)
{
    pid_t pid4;
    int i = 0;
    int status;
        
    for(i = 0; i < childCount; i++)
    {
        pid4 = waitpid(processes[i].child_pid,&status,0);
        //pid4 = wait(status);
        status = WEXITSTATUS(status);
        number_of_process_killed += status;
        printf("Child with PID %ld exited with status %d.\n", (long)pid4, status);
    }
    sprintf(logfileMessage, "Exiting. %d process(es) killed.\n",number_of_process_killed);
    printLog(LOG_INFO, logfileMessage);

}
}

void monitor5Seconds()
{
    printf("Parent woke up \n");
}

//signal handler for sighup
void SIGHUP_handler(int sig_num) {
//only parent executes the following
//if (getpid() == own_id)
//{
    printf("Signal caught \n");
    reloaded = 1;
    siglongjmp(env,1);
//}
}

//SIGINT handler
void SIGINT_handler(int sig_num) {
    
    //free the buffer containing Monitored PIDs
    //free(monitoredPIDs);

    //parent send the quit signal to everyone
    if(getpid() == own_id)
    {
    printf("Parent Quitting ..\n");
        
    /*int i = 0;
    char buf[4] = "-1";
    //send "-1" to all the existing childnanys to ask them to quit
    for (i = 0; i < childCount; i++)
    {
        printf("Writing to child..");
        write(processes[i].p2c_pipe[1], buf, strlen(buf));
    }*/

    /*Wait for the children to return*/
    waitForChildren();
    
    cleanExit(0);
    }
    else
    {
        //child quits with the number of processes killed
        printf("Child Quitting ..\n");
        //siglongjmp(env,1);
        exit(processes[indexChild].proc_killed);
    }
    
}
void cleanExit(int status)
{
    cleanupLogger();
    exit(status);
}

Let me give you an example of the problem.

1. I run the program for the first time.
2. One a.out and one elm is present in the system. So two child procnannys are created. (by calling readConfiguration())
3. The children run the monitorProcess() function, kills the respective processes and writes "Killed" in the pipe. The parent receivs those and prints those out -
Child 0 Sent Killed [line 145]
Child 1 Sent Killed.
4. Now I create another a.out and elm process (as the previous ones are killed ) and ask the parent to reread the configuration file using SIGHUP.
5. As the number_of_free_children = 2 (from line 147), the parent assigns new processes to existing childnannys (from line 366).
6. The children procnanny again runs monitorProcess() as they see something appearing from parent (line 490)
7. They kill the new processes and writes to parent.
8. But strangely this time, the parent prints out nothing like "Child 0 Sent Killed" or something.
9. And if I create a new process with the name a.out and ask the parent to reread th configuration file, the parent creates a new children (as in the previous case number_of_children did not get incremented following line 147 and thus the parent has to go to the else part (line 398) of reload_config_file())
10. And now the newly created child when kills the process(following the logic of line 470), sends "Killed" message to parent and tha parent gets it.

I hope I was able to give some pointers to my gigantic post.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.