iam trying to optimize a code
i want to run code for arm_no=1000, its running for 100 arms but further increasing causes segmentation fault core dumped.
to run the code use

 $ g++ Epsilon_greed.cpp -lm `gsl-config --libs`
 $ ./a.out

but you'll have to install libgsl0 i.e gnu scientific library.

    #include<stdio.h>
    #include<math.h>
    #include<stdlib.h>
    #include<iostream>
    #include<gsl/gsl_randist.h>

    using namespace std;
    const gsl_rng_type * T;      
    gsl_rng * r;
    double Epsilon;
        const int arm_no=10;
        const int play_no=1000;
        const int bandit_no=2000;
//Action class
//Each actuion corresponds to one arm

class Action
{
    private:    
    double mean_reward;//its for calculating Qt(a)
    int no_chosen;//no of times an action is been chosen
    double cur_rewrd;// gives the current reward on that play        
    double Q_reward; //   its the Q*(a)



       public:
       //constructor for Action       
    Action()
        {

        mean_reward=0;
    no_chosen=0;    
    cur_rewrd=0;
        Q_reward=gsl_ran_gaussian(r, 1);

    }

//every time action gets selected Qt(a) is calculated  and returned

     double selected()
        {
        no_chosen++;
        mean_reward=((mean_reward*(no_chosen-1))+get_current_reward())/no_chosen;
        return mean_reward;
        }


// returns the current reward
        double get_current_reward()
        {
         cur_rewrd=gsl_ran_gaussian (r, 1)+ Q_reward;
         return cur_rewrd;
        }


//returns the Qt(a)
        double get_mean_reward()
        {return mean_reward;}

//returns Q*(a)
    double get_Q_reward()
        {
        return Q_reward;
        }

        }; //end of class action

//class bandit
class Bandit
{       private:
    Action action[arm_no];//each bandit having 10 actions
        int optimal, optimalcount;//optimal stores the current max Qt(a) value action
        int optimalval;//optimal count is flag whether action with maxQ*(a) {optimalval } is chosen or not




    public:

    Bandit()
    {
     optimal=0;
         optimalcount=0;
     optimalval=getmaxval();
        }




//setting the max of Qt(a)
    void set_optimal()
    {
    int max=-80000;//large -ve value
        for(int i=0;i<arm_no;i++)
         {             
            if(action[i].get_mean_reward()>max)
                    {       int x=tiebreak(i);//calling tiebreaker in case of same Qt(a)
                                if(x)
                              {
                               max=action[x].get_mean_reward();
                               optimal=x;               
                              } 
                               else
                             {
                              max=action[i].get_mean_reward();
                              optimal=i;                
                             }
                    }


         }
    }




//function for getting max Q*(a) value


    int getmaxval()
     {       int k=0;
         int max=-800000;
         for(int i=0;i<arm_no;i++)
         {
           if(action[i].get_Q_reward()>max)
            {
            k=i;
            max=action[i].get_Q_reward();
            }
          } 
          return k;
     }



//tie breaker
         int tiebreak(int i)
         {
          int n=0;
              int aux[arm_no];//storing the action nos. as index whose estimated reward is same
          int k=0;
          for(int j=0;j<arm_no;j++)
            {
            if(action[i].get_mean_reward()==action[k].get_mean_reward())
                          {
                          aux[n]=k;
                          n++;
                          }
            k=(k+1)%arm_no;//for checking all the actions
            }

          if(n==0)
           return 0;
          else
           return aux[rand()%n];//returning random action of same Qt(a) value
         }




//function for each play
         double play()
         {  set_optimal();//setting the optimal i.e max Q(a) 
        int choice;
            choice=probab();//choosing choice with probability
            double ret = action[choice].selected();
            if(choice==optimalval)
            optimalcount++;//setting the flag that selected action has max Q*(a)
            return ret;
         }



//returning whether chosen action is the one having Q*(a)?
     int is_optimal_count()
     { 
        if(optimalcount!=0)
        {
         optimalcount--;//resetting the flag
         return 1;
        }
            else 
         return 0;
     }

//choosic action according to the probability
     int probab()
         {
           if(gsl_rng_uniform(r)<=::Epsilon)
            {
               return (int)(gsl_rng_uniform(r)*arm_no);
            }
           else 
            return optimal;
         }


};//end of bandit class



//driver code

int main()
{       gsl_rng_env_setup ();

        ::T = gsl_rng_default;
        ::r = gsl_rng_alloc (T);
       //creating object of 3 bandir problem each for different epsilon
    Bandit bandit1[bandit_no];
//        Bandit bandit2[bandit_no];
//    Bandit bandit3[bandit_no];    
        //plot arrays containing data poins for plotting graph
        double  plot1[play_no],plotpercent1[play_no];
//  double  plot2[play_no],plotpercent2[play_no];   
//  double  plot3[play_no],plotpercent3[play_no];   
    double mean[play_no];
        int i,j;

        //sampling over 2000 bandits for 1000 plays
    ::Epsilon=0.10;          
    for(i=0;i<play_no;i++)
       {
        mean[i]=0;
        plotpercent1[i]=0;
        for(j=0;j<bandit_no;j++)
        {
          mean[i]=mean[i] + bandit1[j].play();      
          plotpercent1[i]+=bandit1[j].is_optimal_count();                   
        }

        plot1[i]=mean[i]/bandit_no;
        plotpercent1[i]=plotpercent1[i]/bandit_no;
       }


/*
    ::Epsilon=0.01;
    for(i=0;i<play_no;i++)
       {
        mean[i]=0;
            plotpercent2[i]=0;  
        for(j=0;j<bandit_no;j++)
        {
           mean[i]=mean[i] + bandit2[j].play();
           plotpercent2[i]+=bandit2[j].is_optimal_count();      
        }

        plot2[i]=mean[i]/bandit_no;
            plotpercent2[i]=plotpercent2[i]/bandit_no; 
       }


    ::Epsilon=0.00;
    for(i=0;i<play_no;i++)
    {
     mean[i]=0;
         plotpercent3[i]=0;
     for(j=0;j<bandit_no;j++)
     {
        mean[i]=mean[i] + bandit3[j].play();
        plotpercent3[i]+=bandit3[j].is_optimal_count();         
     }

     plot3[i]=mean[i]/bandit_no;
         plotpercent3[i]= plotpercent3[i]/bandit_no;        
     }


    for(int i=0;i<play_no;i++)
          { cout<<plot1[i]<<"\t"<<plot2[i]<<"\t"<<plot3[i]<<"\t"<<plotpercent1[i]*100<<"\t"<<plotpercent2[i]*100<<"\t"<<plotpercent3[i]*100<<endl;
          }
          */
          for(int i=0;i<1000;i++)
      { 
        cout<<plot1[i]<<"\t"<<plotpercent1[i]*100<<endl;
      }

    return 0;

    }//end of main

Edited 3 Years Ago by anuran

Have you debugged this? It's pretty hard for someone to compile the code to find the error when we don't have the specific external Libraries.

You should therefore try the following:

Valgrind

Only thing I can think of is that you're going outside the bounds of the array. Have you made sure that your declarations are all up to date when you adjust the static arms value?

Run this is your debugger so when it segfaults you can tell where it was when it happened. Then you can tell more easily why it dumped core. Also, asking us to analyze almost 300 lines of code is really unfair - we donate our time. It is valuable. Don't abuse it!

Comments
A good point well made :)

okay sorry for the posting the long code like a noob, i ran degubber on it, gbd is saying it

Program received signal SIGSEGV, Segmentation fault.
main () at Epsilon_greed.cpp:218
warning: Source file is more recent than executable.
218     {       gsl_rng_env_setup ();

here in the question its line no 206
and i don't know what it means and why this is happening in case of 150 or more but not for 100?

Edited 3 Years Ago by anuran

First, it indicates that you have edited the source file since you built the image. Rebuild, and rerun to be sure it is crashing where it thinks it is. Then, we can take a closer look at the source. In any case, please generate a stack trace so we can see where in the call tree it is, other than right now it thinks it is crashing at line 218 in main() in the call to gsl_rng_env_setup. Since the source has changed since you built it last, this may not be really where it is crashing... :-)

Also, you may want to make sure you can allocate that much memory statically. A quick estimate on your numbers shows that, for arm_no = 100 you are using ~5MB of memory but at arm_no = 1000 you are closer to 53MB. That is basically equivalent to:

int main () {
    char buff[56048000] = {0};
    return 0;
}

Can you compile and run that? It may be the case that you need to allocate your memory dynamically (e.g. using new) to get around this problem.

i ran gdb again after compiling and got this

Program received signal SIGSEGV, Segmentation fault.
0x0000000000400ac2 in main ()

i tried with

char buff[560480000]={0};

since i want to do this for 1000 arms not 100(it works fine),

i also tried

Bandit* bandit1=new Bandit[bandit_no];

in line no 211
still getting the error

This article has been dead for over six months. Start a new discussion instead.