HuePig 0 Light Poster

I'm trying to learn planning, i have a half baked program and I'm stuck. I don't know whats wrong.
Well, here's an overview: I have an Agent who have goals and actions, doing an action will make the value of a goal lower. Each Action cost a energy.

Actions and Goals object are created and stored in the Agent Object.

from copy import deepcopy


class Action(object):

    def __init__(self, name, goals=None, costs=None):
        self.name = name
        #{} = empty dictionary
        self.goals = goals if goals else {}
        self.costs = costs if costs else {}

        #intialise time
        if 'time' not in self.costs:
            self.costs['time'] = 0

    def __deepcopy__(self, memo):
        # support deep copy
        return Action(self.name, deepcopy(self.goals), deepcopy(self.costs))

    def __str__(self):
        return '%s, %s, Energy: %s' % (self.name, str(self.goals), str(self.costs.get('Energy')))


class Goal(object):
    def __init__(self, name, value=0, rate=0.0):
        self.name = name
        self.value = value
        self.rate = rate

    def update(self, time):
        # update goal value by rate using time
        self.value += self.rate * time

    def _deepcopy__(self, memo):
        #supports deep copy
        return Goal(self.name, self.value, self.rate)

    def apply_action(self, action):
        self.update(action.costs['time'])
        if self.name in action.goals:
            self.value = max(self.value + action.goals[self.name], 0)

    def discontentment(self):
        return self.value**2

    def __str__(self):
        #pretty string format
        return 'Name: %s, Value %s, Rate: %s' % (self.name, str(self.value), str(self.rate))


class WorldState(object):
    id = 0

    def __init__(self, goals, actions, costs):
        self.goals = deepcopy(goals)
        self.actions = deepcopy(actions)
        self.costs = deepcopy(costs)
        self.reset_next_action()
        WorldState.id += 1
        self.id = WorldState.id

    def __deepcopy__(self, memo):
        # support deepcopy
        return WorldState(self.goals, self.actions, self.costs)

    def reset_next_action(self):
        self.current_actions = self.valid_actions()
        self.valid_action_len = len(self.current_actions)

    def apply_action(self, action, goals=None, costs=None):
        #use agent self.goals/costs if none provided
        goals = goals if goals else self.goals
        costs = costs if costs else self.costs

        #go through all goals of the action influences and change
        #apply this action to all goals

        for name, goal in goals.items():
            goal.apply_action(action)

        for cost, change in self.actions[action.name].costs.items():
            costs[cost] = costs[cost] + change

        return goals, costs

    def apply_action_reset(self, action):
        self.apply_action(action)
        self.reset_next_action()

    def try_action(self, action):
        goals = deepcopy(self.goals)
        costs = deepcopy(self.costs)
        return self.apply_action(action, goals, costs)

    def valid_actions(self):
        actions = []
        for name, action in self.actions.items():
            goals, costs = self.try_action(action)
            if all( [value >= 0 for key, value in costs.items() ]):
                actions.append(action)
        return actions

    def next_action(self):
        return self.current_actions.pop() if self.current_actions else None

    def discontentment(self):
        return sum([goal.discontentment() for name, goal in self.goals.items()])

    def __str__(self):
        goals = ', '.join(([ '%s %d' %  (v.name, v.value) for k,v in self.goals.items() ]))
        costs = str(self.costs)
        actions = ', '.join([a.name for a in self.valid_actions() ])
        next = '%d:%d' % (self.valid_action_len, len(self._current_actions))

        return 'id: %d, goals: {%s}, costs: %s, actions: {%s}, next: %s' % (self.id, goals, costs, actions, next)

#==============================================================================
class Agent(object):
    def __init__(self, goals=None, actions=None, costs=None):
        self.goals = deepcopy(goals) if goals else {}
        self.actions = deepcopy(actions) if actions else {}
        self.costs = deepcopy(costs) if costs else {}

        if 'time' not in self.costs:
            self.costs['time'] = 0

    def apply_action(self, action, goals=None, costs=None):
        goals = goals if goals else self.goals
        costs = costs if costs else self.costs

        for name, goal in goals.items():
            goal.apply_action(action)

        for cost, change in self.actions[action.name].costs.items():
            costs[cost] = costs[cost] + change
        return goals, costs

    def overall_discontentment(self, goals=None, costs=None):
        #calculates overall total discontentment
        goals = goals if goals else self.goals
        costs = costs if costs else self.costs
        return sum([goal.discontentment() for name, goal in goals.items()])

    def valid_actions(self):
        for name, action in self.actions.items():
            goals, costs = self.apply_action(action)
            if all( [value >= 0 for key, value in costs.items() ]):
                self.actions.append(action)
        return self.actions

    def action_discontentment(self, action):
        goals, costs = self.try_action(action)
        return self.overall_discontentment(goals, costs)

    def choose_action_goap(self, max_depth):
        #storage for world state and the actions used
        states = [[ WorldState(self.goals, self.actions, self.costs), Action('Base')]]

        #keep track of current best actions
        best_action = None
        best_value = 1000000
        best_plan = []

        verbose = True

        if verbose:
            print('Searching...')

        changed = True

        while states:
            current_value = states[-1][0].discontentment()

            if verbose and changed:
                changed = False
                level = len(states) - 1
                for i, state in enumerate(states[level:], start=level):
                    print('    '*i, '+->[', state[1].name,'] (', str(current_value) ,')')

            if len(states) >= max_depth:
                if current_value < best_value:
                    best_action = states[1][1]
                    best_value = current_value
                    best_plan = [state[1].name for state in states if state[1]] + [best_value]

                states.pop()
                continue

            next_action = states[-1][0].next_action()

            if next_action:
                new_state = deepcopy(states[-1][0])
                state.append([new_state, None])
                states[-1][1] = next_action
                new_state.apply_action_reset(next_action)
                changed = True
            else:
                states.pop()

        return best_action, best_plan


def print_actions(agent):
    print ('ACTIONS:')
    for k, v in agent.actions.items():
        print (" - ", v)


def print_action_evals(agent):
    print ('VALID ACTIONS (DISCONTENTMENT)')
    for action in agent.valid_actions():
        print ("- [%s] (%d " % (action.name, agent.action_discontentment(action)))


def print_goals(agent):
    print ('GOALS:')
    for k, v in agent.goals.items():
        print (' - ', v)

def run_drone_attack_goap(agent, depth=4):
    HR = '-'*60
    print_actions(agent)
    print('>> Start <<')
    while agent.costs['Energy'] > 0:
        print(HR)
        energy = agent.costs['Energy']

        print ('Current Goals (Discontentment= %d) ' % agent.overall_discontentment())
        print (' - ', ', '.join(['%s=%s' % (v.name, str(v.value)) for k, v in agent.goals.items()]))
        print (('Current Costs'), str(agent.costs))

        action, plan = agent.choose_action_goap(depth)
        print ('Best Action: \n => [%s] (%d)' % (action.name, agent.action_discontentment(action)))
        agent.apply_action(action)
        print ('New Goals (Discontentment=%d)' % agent.overall_discontentment())
        print (' - ' ', '.join('%s=%s' % (v.name, str(v.value)) for k, v in agent.goals.items()))
        depth -= 1

    print (HR)
    print ('Energy depleted...victory!')
    print (HR)
    print ('>> Done! <<\n\n')

#==============================================================================

def laser_shield_energy_example():
    #list that is then converted to dictionary
    goals = {
        Goal('Attack', 3, 1),
        Goal('Shield', 4, 0),
    }

    goals = {g.name: g for g in goals}

    actions = {
        Action('Attack with Laser', goals={'Attack': -2}, costs={'Energy': -3}),
        Action('Half Shield Recharge', goals={'Attack': -2}, costs={'Energy': -2}),
        Action('Full Shield Recharge', goals={'Shield': -4}, costs={'Energy': -3}),
    }

    actions = dict([(a.name, a) for a in actions])
    costs = {'Energy': 5}
    drone = Agent(goals, actions, costs)
    run_drone_attack_goap(drone)



if __name__ == '__main__':
    laser_shield_energy_example()