Artificial Intelligence for Robotics - Search Class

A star algorithm

# -----------
# User Instructions:
#
# Modify the the search function so that it becomes
# an A* search algorithm as defined in the previous
# lectures.
#
# Your function should return the expanded grid
# which shows, for each element, the count when
# it was expanded or -1 if the element was never expanded.
# 
# If there is no path from init to goal,
# the function should return the string 'fail'
# ----------

grid = [[0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0]]
heuristic = [[9, 8, 7, 6, 5, 4],
             [8, 7, 6, 5, 4, 3],
             [7, 6, 5, 4, 3, 2],
             [6, 5, 4, 3, 2, 1],
             [5, 4, 3, 2, 1, 0]]

init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>']

def search(grid,init,goal,cost,heuristic):
    # ----------------------------------------
    # modify the code below
    # ----------------------------------------
    closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]
    closed[init[0]][init[1]] = 1

    expand = [[-1 for col in range(len(grid[0]))] for row in range(len(grid))]
    action = [[-1 for col in range(len(grid[0]))] for row in range(len(grid))]

    x = init[0]
    y = init[1]
    g = 0
    f = g + heuristic[x][y]

    open = [[f, x, y, g]]

    found = False  # flag that is set when search is complete
    resign = False # flag set if we can't find expand
    count = 0

    while not found and not resign:
        if len(open) == 0:
            resign = True
            return "Fail"
        else:
            open.sort()
            open.reverse()
            next = open.pop()
            x = next[1]
            y = next[2]
            f = next[0]
            g = next[3]
            expand[x][y] = count
            count += 1

            if x == goal[0] and y == goal[1]:
                found = True
            else:
                for i in range(len(delta)):
                    x2 = x + delta[i][0]
                    y2 = y + delta[i][1]
                    if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]):
                        if closed[x2][y2] == 0 and grid[x2][y2] == 0:
                            g2 = g + cost
                            f = g2 + heuristic[x2][y2]
                            open.append([f, x2, y2, g2])
                            closed[x2][y2] = 1

    return expand

print search(grid,init,goal,cost, heuristic)

Value program

# ----------
# User Instructions:
# 
# Create a function compute_value which returns
# a grid of values. The value of a cell is the minimum
# number of moves required to get from the cell to the goal. 
#
# If a cell is a wall or it is impossible to reach the goal from a cell,
# assign that cell a value of 99.
# ----------

grid = [[0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0]]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1 # the cost associated with moving from a cell to an adjacent one

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>']

def compute_value(grid,goal,cost):
    # ----------------------------------------
    # insert code below
    # ----------------------------------------

    # make sure your function returns a grid of values as 
    # demonstrated in the previous video.
# closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]
# value = [[99 for col in range(len(grid[0]))] for row in range(len(grid))]
# x = goal[0]
# y = goal[1]
# g = 0
# 
# closed[x][y] = 1
# value[x][y] = g
# open = []
# open.append([g, x, y])
# while len(open)>0:
# open.sort()
# open.reverse()
# next = open.pop()
# 
# x = next[1]
# y = next[2]
# g = next[0]
# 
# for i in range(len(delta)):
# x2 = x + delta[i][0]
# y2 = y + delta[i][1]
# g2 = g + cost
# if x2>=0 and x2 < len(grid) and y2>=0 and y2 < len(grid[0]):
# if 0 == closed[x2][y2] and 0 == grid[x2][y2]:
# open.append([g2, x2, y2])
# closed[x2][y2] = 1
# value[x2][y2] = g2

    # ---------- The official program ----------------
    value = [[99 for col in range(len(grid[0]))] for row in range(len(grid))]
    change = True
    while change:
        change = False

        for x in range(len(grid)):
            for y in range(len(grid[0])):

                if goal[0] == x and goal[1] == y:
                    if value[x][y] > 0:
                        value[x][y] = 0
                        change = True

                elif grid[x][y] == 0:
                    for a in range(len(delta)):
                        x2 = x + delta[a][0]
                        y2 = y + delta[a][1]

                        if x2>=0 and x2 < len(grid) and y2>=0 and y2 < len(grid[0]) and grid[x2][y2]==0:
                            v2 = value[x2][y2] + cost

                            if v2 < value[x][y]:
                                change = True
                                value[x][y] = v2





    return value 


xx = compute_value(grid,goal,cost)
for i in range(len(xx)):
    print xx[i]

Optimum policy

# ----------
# User Instructions:
# 
# Write a function optimum_policy that returns
# a grid which shows the optimum policy for robot
# motion. This means there should be an optimum
# direction associated with each navigable cell from
# which the goal can be reached.
# 
# Unnavigable cells as well as cells from which 
# the goal cannot be reached should have a string 
# containing a single space (' '), as shown in the 
# previous video. The goal cell should have '*'.
# ----------

grid = [[0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0]]
init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1]
cost = 1 # the cost associated with moving from a cell to an adjacent one

delta = [[-1, 0 ], # go up
         [ 0, -1], # go left
         [ 1, 0 ], # go down
         [ 0, 1 ]] # go right

delta_name = ['^', '<', 'v', '>']

def optimum_policy(grid,goal,cost):
    # ----------------------------------------
    # modify code below
    # ----------------------------------------
    value = [[99 for row in range(len(grid[0]))] for col in range(len(grid))]
    policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
    change = True

    while change:
        change = False

        for x in range(len(grid)):
            for y in range(len(grid[0])):
                if goal[0] == x and goal[1] == y:
                    if value[x][y] > 0:
                        value[x][y] = 0
                        policy[x][y] = '*'
                        change = True

                elif grid[x][y] == 0:
                    for a in range(len(delta)):
                        x2 = x + delta[a][0]
                        y2 = y + delta[a][1]

                        if x2 >= 0 and x2 < len(grid) and y2 >= 0 and y2 < len(grid[0]) and grid[x2][y2] == 0:
                            v2 = value[x2][y2] + cost

                            if v2 < value[x][y]:
                                change = True
                                value[x][y] = v2
                                policy[x][y] = delta_name[a]

    return policy

policy = optimum_policy(grid,goal,cost)
for i in range(len(policy)):
    print policy[i]

Left turn policy

# ----------
# User Instructions:
# 
# Implement the function optimum_policy2D below.
#
# You are given a car in grid with initial state
# init. Your task is to compute and return the car's 
# optimal path to the position specified in goal; 
# the costs for each motion are as defined in cost.
#
# There are four motion directions: up, left, down, and right.
# Increasing the index in this array corresponds to making a
# a left turn, and decreasing the index corresponds to making a 
# right turn.

forward = [[-1,  0], # go up
           [ 0, -1], # go left
           [ 1,  0], # go down
           [ 0,  1]] # go right
forward_name = ['up', 'left', 'down', 'right']

# action has 3 values: right turn, no turn, left turn
action = [-1, 0, 1]
action_name = ['R', '#', 'L']

# EXAMPLE INPUTS:
# grid format:
# 0 = navigable space
# 1 = unnavigable space 
#grid = [[1, 1, 1, 0, 0, 0],
# [1, 1, 1, 0, 1, 0],
# [0, 0, 0, 0, 0, 0],
# [1, 1, 1, 0, 1, 1],
# [1, 1, 1, 0, 1, 1]]
#
#init = [4, 3, 0] # given in the form [row,col,direction]
# # direction = 0: up
# # 1: left
# # 2: down
# # 3: right
# 
##init = [2, 3, 1]
# 
#goal = [2, 0] # given in the form [row,col]
#
#cost = [2, 1, 20] # cost has 3 values, corresponding to making 
# # a right turn, no turn, and a left turn


grid = [[0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 0],
        [0, 1, 0, 0, 0, 1, 0],
        [0, 1, 0, 1, 0, 1, 0],
        [0, 1, 0, 1, 0, 0, 0],
        [0, 1, 1, 1, 1, 1, 0],
        [0, 0, 0, 0, 0, 0, 0]]
init = [0, 0, 3]
goal = [4, 2]
cost = [10, 40, 65]

# EXAMPLE OUTPUT:
# calling optimum_policy2D with the given parameters should return 
# [[' ', ' ', ' ', 'R', '#', 'R'],
# [' ', ' ', ' ', '#', ' ', '#'],
# ['*', '#', '#', '#', '#', 'R'],
# [' ', ' ', ' ', '#', ' ', ' '],
# [' ', ' ', ' ', '#', ' ', ' ']]
# ----------

# ----------------------------------------
# modify code below
# ----------------------------------------

def optimum_policy2D(grid,init,goal,cost):

# value = [[[200000 for row in range(len(grid[0]))] for col in range(len(grid))], \
# [[200000 for row in range(len(grid[0]))] for col in range(len(grid))], \
# [[200000 for row in range(len(grid[0]))] for col in range(len(grid))], \
# [[200000 for row in range(len(grid[0]))] for col in range(len(grid))]] 
# policy = [[[' ' for row in range(len(grid[0]))] for col in range(len(grid))], \
# [[' ' for row in range(len(grid[0]))] for col in range(len(grid))], \
# [[' ' for row in range(len(grid[0]))] for col in range(len(grid))], \
# [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]] 
# 
# policyvalue = [[[99 for row in range(len(grid[0]))] for col in range(len(grid))], \
# [[99 for row in range(len(grid[0]))] for col in range(len(grid))], \
# [[99 for row in range(len(grid[0]))] for col in range(len(grid))], \
# [[99 for row in range(len(grid[0]))] for col in range(len(grid))]] 
# 
# policy2D = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
# change = True
# 
# for i in range(len(value)):
# value[i][goal[0]][goal[1]] = 0
# policy[i][goal[0]][goal[1]] = '*'
#
# while change:
# change = False
#
# for x in range(len(grid)):
# for y in range(len(grid[0])):
# if grid[x][y] == 0:
# for i in range(len(value)):
# for a in range(len(forward)):
# if abs(a-i) == 2:
# continue
# 
# x2 = x - forward[a][0]
# y2 = y - forward[a][1]
# 
# if x2 >= 0 and x2 < len(grid) and y2 >= 0 and y2 < len(grid[0]) and grid[x2][y2] == 0:
# v2 = value[i][x][y] + cost[(1+i-a)%4]
# 
# if v2 < value[a][x2][y2]: 
# change = True
# value[a][x2][y2] = v2
# policy[a][x2][y2] = action_name[(1+i-a)%4]
# policyvalue[a][x2][y2] = i-a
# 
# 
# 
#
# x = init[0]
# y = init[1]
# o = init[2]
# policy2D[x][y] = '#'
# while x != goal[0] or y != goal[1]: 
# x2 = x + forward[o][0]
# y2 = y + forward[o][1]
# o2 = o + policyvalue[o][x][y] 
# 
# print x2, y2, o2
# 
# policy2D[x2][y2] = policy[o][x][y]
# 
# x = x2
# y = y2
# o = o2
#
# policy2D[goal[0]][goal[1]] = '*'

    # -------- The official code --------------
    value = [[[999 for row in range(len(grid[0]))] for col in range(len(grid))], \
             [[999 for row in range(len(grid[0]))] for col in range(len(grid))], \
             [[999 for row in range(len(grid[0]))] for col in range(len(grid))], \
             [[999 for row in range(len(grid[0]))] for col in range(len(grid))]] 
    policy = [[[' ' for row in range(len(grid[0]))] for col in range(len(grid))], \
             [[' ' for row in range(len(grid[0]))] for col in range(len(grid))], \
             [[' ' for row in range(len(grid[0]))] for col in range(len(grid))], \
             [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]] 

    policy2D = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]

    change = True
    while change:
        change = False
        # go throuth all grid cells and calculate values
        for x in range(len(grid)):
            for y in range(len(grid[0])):
                for orientation in range(4):
                    if goal[0] == x and goal[1] == y:
                        if value[orientation][x][y] > 0:
                            change = True
                            value[orientation][x][y] = 0
                            policy[orientation][x][y] = '*'
                    elif grid[x][y] == 0:
                        # caluculate the three ways to propagate value
                        for i in range(3):
                            o2 = (orientation+action[i])%4
                            x2 = x + forward[o2][0]
                            y2 = y + forward[o2][1]

                            if x2 >= 0 and x2 < len(grid) and y2 >= 0 and y2 < len(grid[0]) and grid[x2][y2] == 0:
                                v2 = value[o2][x2][y2]+cost[i]
                                if v2 < value[orientation][x][y]:
                                    value[orientation][x][y] = v2
                                    policy[orientation][x][y] = action_name[i]
                                    change = True



    x = init[0]
    y = init[1]
    orientation = init[2]

    policy2D[x][y] = policy[orientation][x][y]
    while policy[orientation][x][y] != '*':
        if policy[orientation][x][y] == '#':
            o2 = orientation
        elif policy[orientation][x][y] == 'R':
            o2 = (orientation-1)%4
        elif policy[orientation][x][y] == 'L':
            o2 = (orientation+1)%4
        x = x + forward[o2][0]
        y = y + forward[o2][1]
        orientation = o2
        policy2D[x][y] = policy[orientation][x][y]



    return policy2D


policy = optimum_policy2D(grid,init,goal,cost)
for i in range(len(policy)):
    print policy[i]
相关文章
相关标签/搜索