Tuesday, July 29, 2008

sicp exercise 2.70

;; Exercise 2.70.  The following eight-symbol alphabet with associated relative frequencies was designed to efficiently encode the lyrics of 1950s rock songs. (Note that the ``symbols'' of an ``alphabet'' need not be individual letters.)

;; A         2         NA         16
;; BOOM 1         SHA         3
;; GET         2         YIP         9
;; JOB         2         WAH         1
;; Use generate-huffman-tree (exercise 2.69) to generate a corresponding Huffman tree, and use encode (exercise 2.68) to encode the following message:

;; Get a job

;; Sha na na na na na na na na

;; Get a job

;; Sha na na na na na na na na

;; Wah yip yip yip yip yip yip yip yip yip

;; Sha boom

;; How many bits are required for the encoding? What is the smallest number of bits that would be needed to encode this song if we used a fixed-length code for the eight-symbol alphabet?

(define (make-leaf symbol weight)
  (list 'leaf symbol weight))
(define (leaf? object)
  (eq? (car object) 'leaf))
(define (symbol-leaf x) (cadr x))
(define (weight-leaf x) (caddr x))

(define (make-code-tree left right)
  (list left
        (append (symbols left) (symbols right))
        (+ (weight left) (weight right))))
(define (left-branch tree) (car tree))
(define (right-branch tree) (cadr tree))
(define (symbols tree)
  (if (leaf? tree)
      (list (symbol-leaf tree))
      (caddr tree)))
(define (weight tree)
  (if (leaf? tree)
      (weight-leaf tree)
      (cadddr tree)))

(define (adjoin-set x set)
  (cond ((null? set) (list x))
        ((< (weight x) (weight (car set))) (cons x set))
        (else (cons (car set)
                    (adjoin-set x (cdr set))))))

(define (make-leaf-set pairs)
  (if (null? pairs)
      (let ((pair (car pairs)))
        (adjoin-set (make-leaf (car pair)    ; symbol
                               (cadr pair))  ; frequency
                    (make-leaf-set (cdr pairs))))))

(define (successive-merge leaf-set)
  (cond ((null? leaf-set) (list))
        ((null? (cdr leaf-set)) (car leaf-set))
        (else (successive-merge (adjoin-set (make-code-tree (car leaf-set)
                                                            (cadr leaf-set))
                                            (cddr leaf-set))))))

(define (generate-huffman-tree pairs)
  (successive-merge (make-leaf-set pairs)))

(define false #f)
(define true  #t)

(define (encode message tree)
  (if (null? message)
      (append (encode-symbol (car message) tree)
              (encode (cdr message) tree))))

(define (encode-symbol symbol tree)
  (define (encode-symbol-iter symbol sub-tree)
    (cond ((leaf? sub-tree) (list))
          ((element-of? symbol (symbols (left-branch sub-tree)))
               (cons '0 (encode-symbol-iter symbol (left-branch sub-tree))))
          ((element-of? symbol (symbols (right-branch sub-tree)))
               (cons '1 (encode-symbol-iter symbol (right-branch sub-tree))))
          (else (error "error not present"))))
  (if (element-of? symbol (symbols tree))
      (encode-symbol-iter symbol tree)
      (error "error not present")))
(define (element-of? symbol set)
  (cond ((null? set) false)
        ((eq? symbol (car set)) true)
        (else (element-of? symbol (cdr set)))))

(define pairs '((a 2) (na 16) (boom 1) (Sha 3) (Get 2) (yip 9) (job 2) (Wah 1)))

(define message '(Get a job Sha na na na na na na na na Get a job Sha na na na na na na na na Wah yip yip yip yip yip yip yip yip yip Sha boom))

(define huff-tree (generate-huffman-tree pairs))
(display huff-tree) (newline)

(define encoded-message (encode message huff-tree))

(display message) (newline)
(display encoded-message) (newline)

;; number of bits with huffman encoding = 84
;; number of bits with fixed-length encoding = 108

(define (decode bits tree)
  (define (decode-1 bits current-branch)
    (if (null? bits)
        (let ((next-branch
               (choose-branch (car bits) current-branch)))
          (if (leaf? next-branch)
              (cons (symbol-leaf next-branch)
                    (decode-1 (cdr bits) tree))
              (decode-1 (cdr bits) next-branch)))))
  (decode-1 bits tree))
(define (choose-branch bit branch)
  (cond ((= bit 0) (left-branch branch))
        ((= bit 1) (right-branch branch))
        (else (error "bad bit -- CHOOSE-BRANCH" bit))))

(define decoded-message (decode encoded-message huff-tree))
(display decoded-message) (newline)

;; decoding also works :-)

