import numpy
hugeNumber = float("inf")
# Initialize all needed parameters and data here
stages = # number of stages
f = numpy.zeros([stages + 2, (highest - numbered state) + 1])
x = numpy.zeros([stages + 1, (highest - numbered state) + 1], dtype = int)
# If not zero, set each f[stages+1,i] to the terminal value of being in state i at the end
# For states that are not allowed, use hugenumber for minimization, -hugenumber for maximization
for t in range(stages, 0, -1):
# If necessary, determine which states are possible at stage t
# (This is usually not necessary in the examples in this class)
for i in (states that are possible at stage t):
# Determine set of decisions d which are possible from this state and stage
value = # -hugeNumber if maximizing or hugenumber if minimizing
for d in (set of allowed decisions d):
j = # resulting next state
# Compute immediate costs and/or rewards from decision d
moveValue = (immediateCostsAnd/orRewards) + f[t + 1, j]
if moveValue > value: # use < instead of > if minimizing
value = moveValue
bestMove = d
# End of d loop
f[t, i] = value
x[t, i] = bestMove
# End of i loop
# End of t loop
print("Optimal solution is " + str(f[1, (initial state)]))
print("(something explanatory about the solution)")
i = # initial state
for t in range(1, stages + 1):
print("Some explanation" + str(x[t, i]) + " some more explanation " + str(t))
i = # compute next state based on decision x[t,i] being taken
# Optionally, you can print something about ending state here