(sicp27)m2.70〜m2.71

;;m2.70
(define sample-tree2
  '((leaf NA 16) ((leaf YIP 9) (((leaf A 2) ((leaf WAH 1) (leaf BOOM 1) (WAH BOOM) 2) (A WAH BOOM) 4) ((leaf SHA 3) ((leaf JOB 2) (leaf GET 2) (JOB GET) 4) (SHA JOB GET) 7) (A WAH BOOM SHA JOB GET) 11) (YIP A WAH BOOM SHA JOB GET) 20) (NA YIP A WAH BOOM SHA JOB GET) 36))

;; (decode (encode  '(GET A JOB SHA NA NA NA NA NA NA NA WAH YIP YIP YIP YIP YIP YIP YIP YIP YIP SHA BOOM) sample-tree2) sample-tree2)
;; 3:user> => (GET A JOB SHA NA NA NA NA NA NA NA WAH YIP YIP YIP YIP YIP YIP YIP YIP YIP SHA BOOM)

;m2.71
;; 2**n - 1 = SUM(2**i)[i->n-1] が成り立つので
;; 最大頻度の記号の符号化の値は常に"0"
;; 一方、最小頻度の記号の符号化の値は"1"*(n-1)

;;==確かめ==

;;bitの長さを返す
(define (bit-cost sym tree)
  (length (encode (list sym) tree)))

(use srfi-1)
;;この関数を使ってcheckする
;; pos = 1が先頭, pos = sizeが末尾(最大頻度)
(define (check size pos)
  (let ((tree (generate-huffman-tree (zip (iota size 1) (f size)))))
    (bit-cost pos tree)))

;;2**(n-1)までの数列をつくる.
(define (f n)
  (define (iter result c state)
    (if (= c n)
        (reverse result)
        (iter (cons state result) (+ c 1) (* state 2))))
  (iter '() 0 1))

;;最低頻度の記号の長さ
(let ((lst (iota 10 1)))
(zip lst (map (cut check <> 1) lst)))
;; 7:user> => ((1 0) (2 1) (3 2) (4 3) (5 4) (6 5) (7 6) (8 7) (9 8) (10 9))

;;最高頻度の記号の長さ
(let ((lst (iota 10 1)))
(zip lst (map (lambda (x) (check x x)) lst)))
;; :user> => ((1 0) (2 1) (3 1) (4 1) (5 1) (6 1) (7 1) (8 1) (9 1) (10 1))

;;n=1のところ以外は予想通り。
;;n=1の時に0になってしまう理由を考えてみよう。
(bit-cost  'A (generate-huffman-tree '((A 1)))) ;;=> 0
;;encode->encode-symbol->いきなり(leaf ..)-> '()が返される.
;;(length '()) => 0