dynamic programming (subproblem memoization)

LIS   edit   dp   matrix   ssrp   allpairs   knapsack  

longest increasing subsequence

longest increasing subsequence
longest increasing subsequence
0 8 4 12 2 10 6 14 1 9 5 13 3 11 7 15
*   *         *  *                  *

is there a longer increasing subsequence ?

0 8 4 12 2 10 6 14 1 9 5 13 3 11 7 15
*        *    *      *    *         *

key observation:
   if this is index set of LIS
         *    *      *    *         *
  then this is index set of LIS
         that ends here:  +
         *    *      *    *

so: sufficient to know, for each index j,
length of LIS ending at index j
longest increasing subsequence
import random

def genseq(n):
  S = []
  for _ in range(n): S.append(random.randint(1,n))
  return S

def longest(S):
  L= [1]
  for k in range(1,len(S)):
    mymax = 0
    for j in range(k):
      if S[j]<S[k]: mymax = max(mymax,L[j])
    L.append(1+mymax)
  return L

S = genseq(20); print S
L = longest(S); print L

edit distance

edit distance
  • edit: insert, delete, substitute

  • editDistance(A,B): min number of edits to change A to B

  • editDistance( cheese, case ) ?

  • wiki

example
editDistance( cheese, case) at most 3:
    c h e e s e
    c h e s e
    c h s e
    c a s e
edit distance: equivalent definition
  • in each string, insert some number (maybe 0) gaps

  • align strings, and compare position by position

  • the cost of the alignment is the number of positions that

    • have a gap in either or both strings, or

    • have two different characters

example
c h e e s e
c a     s e
  * * *      3 edits
dp edit distance
# track edit distance of prefixes
# best alignment of A[1..i], B[1..j] ?
#  A[1.. i ]   -
#  B[1..j-1]  B[j]
#                        or
#  A[1..i-1]  A[j]
#  B[1.. j ]   -
#                        or
#  A[1..i-1]  B[i]
#  B[1..j-1]  A[j]
#
#  C[i][j]: best of above 3
import sys

def getStrings():
  print 'strings:  ',
  words = sys.stdin.readline().split()
  return words[0], words[1]

A, B = getStrings()
m, n = len(A), len(B)

C = [[0 for y in range(n+1)] for x in range(m+1)]
for r in range(m): C[r+1][0] = r+1
for c in range(n): C[0][c+1] = c+1
for r in range(m):
  for c in range(n):
    t = C[r][c]
    if A[r] != B[c]: t+= 1
    C[r+1][c+1] = min(t, 1+C[r][c+1], 1+C[r+1][c])

print '\n     -',
for c in range(len(B)): print B[c],
print '\n'
S = '-' + A
for r in range(len(S)):
  print '',S[r],' ',
  for c in range(n+1):
    print C[r][c],
  print ''
example
     - d o n u t
 -   0 1 2 3 4 5  initialize
 d   1 . . . . .
 u   2 . . . . .
 n   3 . . . . .
 k   4 . . . . .

 -   0 1 2 3 4 5
 d   1 ?          fill in:
 u   2    look left:    align     -     topchar
 n   3    look up:      align leftchar     -
 k   4    look up-left: align leftchar  topchar

 -   0 1 2 3 4 5
 d   1 0 1 2 3 4
 u   2 1 1 2 2 3
 n   3 2 2 1 2 3
 k   4 3 3 2 2 3   <- edit dist

dynamic programming

  • better name: subproblem memoization

  • when helpful?

    • many (exponential) subproblems

    • fewer (polynomial) critical subproblems

matrix chain multiplication

brute force
  • brute force: try all well-ordered parenthesizations

  • how many well-ordered parenthesizations of n pairs are there?

  • call this number T(n)

T(n)
n     parenthesizations                     T(n)
0           -                                 1
1     ()                                      1
2     ()()   (())                             2
3     ().... (..).. (....)
      T0*T2 + T1*T1 + T2* T0                  5
4     ()...... (..).... (....).. (......)
      T0* T3  + T1* T2 +  T2 *T1 +  T3  *T0   14
Catalan numbers
  • wiki berkeley Tom Davis

  • T(n)

    • = 1   for n = 0,1

    • = Σj=1 to n T(j-1) * T(n-j)   for n ≥ 2

    • = (2n choose n) / (n + 1)   ≈   4n / ( n3/2 √Π )

  • conclusion: brute force matrix-chain-mult Ω( 4n) time

mcm dp example
  A  *  B  *  C  *  D
[2x4] [4x5] [5x1] [1x3]

possible last multiplication:
 ( A )   ( B       C         D )
 ( A       B )   ( C         D )
 ( A       B       C )     ( D )

C[i][j]: min cost of M_i * ...  M_j

initialize:
     A     B     C     D
A    0
B          0
C                0
D                      0

next: min cost A*B =
  cost ((A) (B)) =
  cost(A*A) + cost(B*B) + 2*4*5 =
  0     +     0     +  40

     A     B     C     D
A    0    40
B          0    20
C                0    15
D                      0

next:  min cost A*B*C = min of
  (A)(BC) =  0 + 20 + 2*4*1 = 28
  (AB)(C) = 40 +  0 + 2*5*1 = 50
          = 28    using (A)(BC)

     A     B     C     D
A    0    40    28
B          0    20    32
C                0    15
D                      0

next:  min cost A*B*C*D = min of
  (A)(BCD) =  0 + 32 + 2*4*3 = 56
  (AB)(CD) = 40 + 15 + 2*5*3 = 85
  (ABC)(D) = 28 +  0 + 2*1*3 = 34
           = 34    using (ABC)(D)

     A     B     C     D
A    0    40    28    34
B          0    20    32
C                0    15
D                      0
matrix chain multiplication
import random
def genseq(n):
  S = []
  for _ in range(n): S.append(2+random.randint(1,n))
  return S

def mincost(M): #matrix dimensions
  n = len(M)-1
  C = [[0 for x in xrange(n)] for x in xrange(n)]
  for j in range(n-1):
    C[j][j+1] = M[j] * M[j+1] * M[j+2]
  for span in range(2,n):
    for i in range(n-span):
      vals = []
      j = i+span
      for t in range(i,i+span):
        z = C[i][t] + C[t+1][j] + M[i] * M[t+1] * M[j+1]
        #print i,t,j,z
        vals.append(z)
      C[i][j] = min(vals)
      del vals[:]
  return C

S = [8, 4, 7, 3, 9]
C = mincost(S); print C
S = genseq(9); print S
C = mincost(S); print C

single source reliable path

  • sssp using at most k edges

  • for each k, define D[v,k]: dist to v using at most k edges

  • D[v,k] = min { D[v,k-1], D[u,k-1] + wt(u,v) } (over all edges (u,v))

sssrp
def infinity(): return 999

def sssrp(G,source): #single source shortest reliable paths
  n = len(G)
  D = {}
  for v in G: D[v] = infinity()
  D[source] = 0
  show(D)
  for t in range(n):
    print '\nsource',source,' at most', t+1, 'edges'
    newD = {}
    for v in G: newD[v] = D[v]
    for v in G:
      for wd in G[v]:
        if wd[0]!=source:
          dfromv = D[v] + wd[1]
          if dfromv < newD[wd[0]]:
            newD[wd[0]] = dfromv
            print '   ',wd[0],'via',v,'now',dfromv
    for v in G: D[v] = newD[v]
    show(D)

def show(D):
  for v in sorted(G): print '%4s' % v,
  print ''
  for v in sorted(G): print '%4d' % D[v],
  print ''

G = {'S': [['A',1],['C',2]],
     'A': [['S',1],['B',2],['D',5]],
     'B': [['A',2],['D',1],['T',4]],
     'C': [['S',2],['D',3]],
     'D': [['S',5],['A',5],['B',1],['C',3],['T',1]],
     'T': [['B',4],['D',1]]}
sssrp(G,'S')

all pairs shortest paths

floyd-warshall all-pairs shortest paths
def fwapsp(G):
  n = len(G)
  D = [[0 for x in xrange(n)] for y in xrange(n)]
  for x in range(n):
    for y in range(n):  D[x][y] = G[x][y]
  show(D)
  for t in range(n):
    for x in range(n):
      for y in range(n):
        D[x][y] = min(D[x][y], D[x][t] + D[t][y])
    show(D)

def show(D):
  for x in range(len(D)):
    for y in range(len(D)):
      print D[x][y],
    print ''
  print ''
G = [ [0, 9, 1, 5, 4],
      [9, 0, 7, 8, 2],
      [1, 7, 0, 6, 1],
      [5, 8, 6, 0, 1],
      [4, 2, 1, 1, 0] ]
fwapsp(G)

0-1 knapsack

dp 0-1 knapsack, by total weight
import random, operator
def genvector(n):
  v = []
  for j in range(n): v.append(random.randint(n,2*n))
  return v

def showRows(a,b,K): # last b-a rows
  print '...'
  for w in range(a,b):
    print w, ; print K[w]

def sack(n,W,K):  # get 0/1 solution vector
  inSolution = [0 for j in xrange(n)]
  ndx,w  = n,W
  while (w>0) and (ndx>0):
    while (ndx>0) and (K[w][ndx]==K[w][ndx-1]):
      ndx-=1
    if (ndx>0):
      ndx -= 1
      inSolution[ndx] = 1
      w -= wt[ndx]
  return inSolution

def solveknapsack(val,wt,W): #python indexing, so -1 for wt[ ], val[ ]
  n = len(val)
  K = [[0 for j in xrange(n+1)] for w in xrange(W+1)]
  for j in range(1,n+1):
    for w in range(W+1):
      if wt[j-1]>w: K[w][j] = K[w][j-1]
      else: K[w][j] = max(K[w][j-1], K[w-wt[j-1]][j-1]+val[j-1])
  lastfew = 25
  showRows(W+1-lastfew,W+1,K) # print last few rows of K
  solvec = sack(n,W,K)
  print sum(map(operator.mul, solvec, val)), solvec

n = 10
W = (n*n*3)/4
val,wt = genvector(n),genvector(n)
print 'val  ', ; print(val)
print 'wt   ', ; print(wt)
solveknapsack(val,wt,W)