0

iam trying to optimize a code

i want to run code for arm_no=1000, its running for 100 arms but further increasing causes segmentation fault core dumped.

to run the code use

```
$ g++ Epsilon_greed.cpp -lm `gsl-config --libs`
$ ./a.out
```

but you'll have to install libgsl0 i.e gnu scientific library.

```
#include<stdio.h>
#include<math.h>
#include<stdlib.h>
#include<iostream>
#include<gsl/gsl_randist.h>
using namespace std;
const gsl_rng_type * T;
gsl_rng * r;
double Epsilon;
const int arm_no=10;
const int play_no=1000;
const int bandit_no=2000;
//Action class
//Each actuion corresponds to one arm
class Action
{
private:
double mean_reward;//its for calculating Qt(a)
int no_chosen;//no of times an action is been chosen
double cur_rewrd;// gives the current reward on that play
double Q_reward; // its the Q*(a)
public:
//constructor for Action
Action()
{
mean_reward=0;
no_chosen=0;
cur_rewrd=0;
Q_reward=gsl_ran_gaussian(r, 1);
}
//every time action gets selected Qt(a) is calculated and returned
double selected()
{
no_chosen++;
mean_reward=((mean_reward*(no_chosen-1))+get_current_reward())/no_chosen;
return mean_reward;
}
// returns the current reward
double get_current_reward()
{
cur_rewrd=gsl_ran_gaussian (r, 1)+ Q_reward;
return cur_rewrd;
}
//returns the Qt(a)
double get_mean_reward()
{return mean_reward;}
//returns Q*(a)
double get_Q_reward()
{
return Q_reward;
}
}; //end of class action
//class bandit
class Bandit
{ private:
Action action[arm_no];//each bandit having 10 actions
int optimal, optimalcount;//optimal stores the current max Qt(a) value action
int optimalval;//optimal count is flag whether action with maxQ*(a) {optimalval } is chosen or not
public:
Bandit()
{
optimal=0;
optimalcount=0;
optimalval=getmaxval();
}
//setting the max of Qt(a)
void set_optimal()
{
int max=-80000;//large -ve value
for(int i=0;i<arm_no;i++)
{
if(action[i].get_mean_reward()>max)
{ int x=tiebreak(i);//calling tiebreaker in case of same Qt(a)
if(x)
{
max=action[x].get_mean_reward();
optimal=x;
}
else
{
max=action[i].get_mean_reward();
optimal=i;
}
}
}
}
//function for getting max Q*(a) value
int getmaxval()
{ int k=0;
int max=-800000;
for(int i=0;i<arm_no;i++)
{
if(action[i].get_Q_reward()>max)
{
k=i;
max=action[i].get_Q_reward();
}
}
return k;
}
//tie breaker
int tiebreak(int i)
{
int n=0;
int aux[arm_no];//storing the action nos. as index whose estimated reward is same
int k=0;
for(int j=0;j<arm_no;j++)
{
if(action[i].get_mean_reward()==action[k].get_mean_reward())
{
aux[n]=k;
n++;
}
k=(k+1)%arm_no;//for checking all the actions
}
if(n==0)
return 0;
else
return aux[rand()%n];//returning random action of same Qt(a) value
}
//function for each play
double play()
{ set_optimal();//setting the optimal i.e max Q(a)
int choice;
choice=probab();//choosing choice with probability
double ret = action[choice].selected();
if(choice==optimalval)
optimalcount++;//setting the flag that selected action has max Q*(a)
return ret;
}
//returning whether chosen action is the one having Q*(a)?
int is_optimal_count()
{
if(optimalcount!=0)
{
optimalcount--;//resetting the flag
return 1;
}
else
return 0;
}
//choosic action according to the probability
int probab()
{
if(gsl_rng_uniform(r)<=::Epsilon)
{
return (int)(gsl_rng_uniform(r)*arm_no);
}
else
return optimal;
}
};//end of bandit class
//driver code
int main()
{ gsl_rng_env_setup ();
::T = gsl_rng_default;
::r = gsl_rng_alloc (T);
//creating object of 3 bandir problem each for different epsilon
Bandit bandit1[bandit_no];
// Bandit bandit2[bandit_no];
// Bandit bandit3[bandit_no];
//plot arrays containing data poins for plotting graph
double plot1[play_no],plotpercent1[play_no];
// double plot2[play_no],plotpercent2[play_no];
// double plot3[play_no],plotpercent3[play_no];
double mean[play_no];
int i,j;
//sampling over 2000 bandits for 1000 plays
::Epsilon=0.10;
for(i=0;i<play_no;i++)
{
mean[i]=0;
plotpercent1[i]=0;
for(j=0;j<bandit_no;j++)
{
mean[i]=mean[i] + bandit1[j].play();
plotpercent1[i]+=bandit1[j].is_optimal_count();
}
plot1[i]=mean[i]/bandit_no;
plotpercent1[i]=plotpercent1[i]/bandit_no;
}
/*
::Epsilon=0.01;
for(i=0;i<play_no;i++)
{
mean[i]=0;
plotpercent2[i]=0;
for(j=0;j<bandit_no;j++)
{
mean[i]=mean[i] + bandit2[j].play();
plotpercent2[i]+=bandit2[j].is_optimal_count();
}
plot2[i]=mean[i]/bandit_no;
plotpercent2[i]=plotpercent2[i]/bandit_no;
}
::Epsilon=0.00;
for(i=0;i<play_no;i++)
{
mean[i]=0;
plotpercent3[i]=0;
for(j=0;j<bandit_no;j++)
{
mean[i]=mean[i] + bandit3[j].play();
plotpercent3[i]+=bandit3[j].is_optimal_count();
}
plot3[i]=mean[i]/bandit_no;
plotpercent3[i]= plotpercent3[i]/bandit_no;
}
for(int i=0;i<play_no;i++)
{ cout<<plot1[i]<<"\t"<<plot2[i]<<"\t"<<plot3[i]<<"\t"<<plotpercent1[i]*100<<"\t"<<plotpercent2[i]*100<<"\t"<<plotpercent3[i]*100<<endl;
}
*/
for(int i=0;i<1000;i++)
{
cout<<plot1[i]<<"\t"<<plotpercent1[i]*100<<endl;
}
return 0;
}//end of main
```

*Edited 3 Years Ago by anuran*