;file: c45.lsp

;purpose: decision tree learning of rules

;programmer: Tom Shultz

;started: 20 jan 2000

;current: 26 may 2000


;Based on Mitchell's id3 code. Replace loop with do. Use a-lists instead of property lists,

;and put everything in functions to get rid of symbols and code lying around (could pose a

;problem in multiple runs). Fix bug in most-common-value. Add m parameter. To be

;used as a new decision node, an attribute must have at least 2 values, each of which

;classifies at least m examples. Decrease m to get deeper trees. Add information gain &

;information ratio default. Add continuous-valued attributes. Add random partitions.

;Add *examples*, *train-examples*, & *test-examples*.




(defvar *examples* nil

  "All examples as list of a-lists in which first item is name of example.")


(defvar *train-examples* nil

  "Training examples.")


(defvar *test-examples* nil

  "Test examples.")


(defvar *predicted-attribute* nil

  "The attribute whose values are being predicted. Specified in the examples procedure.")


(defvar *predicted-values* nil

  "Values of the predicted attribute. Specified in the examples procedure.")


(defvar *tree* nil

  "Holds final decision tree.")


(defvar *print-gain* nil

  "Set to t if gain or gain ratios are to be printed.")


(defvar *print-attribute* nil

  "Set to t if chosen attributes are to be printed.")


(defvar *print-partition* nil

  "Set to t if chosen partitions are to be printed.")


(defvar *gain-ratio* t

  "Set to nil if simple information gain is to be used.")


(defvar *continuous-attributes* nil

  "Holds name of continuous-valued attributes.

Set automatically in find-continuous-attributes.

Should be initiallized to nil in the top-level run procedure.")


(defvar *random-partitions* nil

  "Set to t for randomly-selected partitions, instead of best partitions.")




(defun get-value (attribute example examples)

   "(attribute example examples)

Get value of attribute in example."

   (cadr (assoc attribute (cdr (assoc example examples)))))


(defun print-example (example examples)

   "(example examples)

Print named example."

   (pprint (assoc example examples)))


(defun play-examples ()


Examples for play problem."

   (setf *examples*

         '((day1 (outlook sunny) (temperature hot) (humidity high) (wind weak) (play? no))

           (day2 (outlook sunny) (temperature hot) (humidity high) (wind strong) (play? no))

           (day3 (outlook overcast) (temperature hot) (humidity high) (wind weak) (play? yes))

           (day4 (outlook rain) (temperature mild) (humidity high) (wind weak) (play? yes))

           (day5 (outlook rain) (temperature cool) (humidity normal) (wind weak) (play? yes))

           (day6 (outlook rain) (temperature cool) (humidity normal) (wind strong) (play? no))

           (day7 (outlook overcast) (temperature cool) (humidity normal) (wind strong) (play? yes))

           (day8 (outlook sunny) (temperature mild) (humidity high) (wind weak) (play? no))

           (day9 (outlook sunny) (temperature cool) (humidity normal) (wind weak) (play? yes))

           (day10 (outlook rain) (temperature mild) (humidity normal) (wind weak) (play? yes))

           (day11 (outlook sunny) (temperature mild) (humidity normal) (wind strong) (play? yes))

           (day12 (outlook overcast) (temperature mild) (humidity high) (wind strong) (play? yes))

           (day13 (outlook overcast) (temperature hot) (humidity normal) (wind weak) (play? yes))

           (day14 (outlook rain) (temperature mild) (humidity high) (wind strong) (play? no)))

         *predicted-attribute* 'play?

         *predicted-values* '(yes no)))


(defun continuous-play-examples ()


Continuous examples for play problem."

   (setf *examples*

         '((day1 (outlook sunny) (temperature 75) (humidity 70) (wind strong) (play? yes))

           (day2 (outlook sunny) (temperature 80) (humidity 90) (wind strong) (play? no))

           (day3 (outlook sunny) (temperature 85) (humidity 85) (wind weak) (play? no))

           (day4 (outlook sunny) (temperature 72) (humidity 95) (wind weak) (play? no))

           (day5 (outlook sunny) (temperature 69) (humidity 70) (wind weak) (play? yes))

           (day6 (outlook overcast) (temperature 72) (humidity 90) (wind strong) (play? yes))

           (day7 (outlook overcast) (temperature 83) (humidity 78) (wind weak) (play? yes))

           (day8 (outlook overcast) (temperature 64) (humidity 65) (wind strong) (play? yes))

           (day9 (outlook overcast) (temperature 81) (humidity 75) (wind weak) (play? yes))

           (day10 (outlook rain) (temperature 71) (humidity 80) (wind strong) (play? no))

           (day11 (outlook rain) (temperature 65) (humidity 70) (wind strong) (play? no))

           (day12 (outlook rain) (temperature 75) (humidity 80) (wind weak) (play? yes))

           (day13 (outlook rain) (temperature 68) (humidity 80) (wind weak) (play? yes))

           (day14 (outlook rain) (temperature 70) (humidity 96) (wind weak) (play? yes)))

         *predicted-attribute* 'play?

         *predicted-values* '(yes no)))


(defun example-names ()


Get example names from examples."

   (mapcar #'car *train-examples*))


(defun attributes ()


Get attribute names from examples."

   (mapcar #'car (cdar *train-examples*)))




; Tree Representation: each non-terminal tree node is a list of the form

;  (attribute (value1 subtree1) (value2 subtree2)...)

;  where subtree-n is either a non-terminal node, or a value signifying the

;  target value associated with that terminal node.


(defun learn (examples attributes m)

   "(examples attributes m)

Attributes is a list of attributes (including target-attribute) that

may be included in the learned decision tree. Return a decision tree that

predicts target-attribute over examples. Examples are example names."

   (let (firstvalue information attribute partitions (attributes (remove *predicted-attribute* attributes)))

      (setq firstvalue (get-value *predicted-attribute* (car examples) *train-examples*))


            ;If every example has same predicted-attribute value, return it as leaf node.

            ((every #'(lambda (example) (eq firstvalue (get-value *predicted-attribute* example *train-examples*)))



            ;If no attributes, return most common predicted-attribute value.

            ((null attributes)

             (most-common-value *predicted-attribute* examples))

            ;Otherwise, pick best attribute, partition training data, and

            ;learn recursively to grow subtrees below this node.


              (setq information (information examples))

              (setq partitions

                (do ((atts attributes (cdr atts))

                     (parts nil (append (partitions (car atts) examples)


                    ((null atts) (reverse parts))))

              (setq attribute (if *random-partitions*

                                 (choose-random-partition partitions m)

                                 (choose-best-partition information partitions m)))

              (cons (first attribute)

                (do ((branches (cdr attribute) (cdr branches))

                     (results nil (cons (list (caar branches)

                                          (learn (cdar branches) 

                                            (remove (car attribute) attributes)



                    ((null branches) (reverse results))))))))


(defun partitions (attribute examples)

   "(attribute examples)

Return either a list of a partition of examples according to their values for attribute

or a list of binary partitions for continuous-valued attribute."

   (if (member attribute *continuous-attributes*)

      (binary-partitions attribute examples)

      (list (partition attribute examples))))


(defun partition (attribute examples)

   "(attribute examples)

Return a partition of examples according to their values for attribute.

Partition has form (attribute (value1 e11 e12 ...) (value2 e21 e22 ...)...)."

   (do ((examples examples (cdr examples))

        (result nil)

        (values nil)

        (value nil))

       ((null examples) (cons attribute result))

      (let ((example (car examples)))

         (setq value (get-value attribute example *train-examples*))

         (if (setq values (assoc value result))

            (rplacd values (cons example (cdr values)))

            (setq result (cons (list value example)



(defun binary-partition (attribute examples threshold)

   "(attribute examples threshold)

Return a binary partition for continuous attribute."

   (do ((examples (convert-examples examples attribute threshold) (cdr examples))

        (result nil)

        (values nil)

        (value nil))

       ((null examples) (cons attribute result))

      (let ((example (car examples)))

         (setq value (cadr example))

         (if (setq values (assoc value result))

            (rplacd values (cons (car example) (cdr values)))

            (setq result (cons (list value (car example))



(defun binary-partitions (attribute examples)

   "(attribute examples)

Return n-2 binary partitions of continuous attribute."

   (do ((thresholds (thresholds attribute examples) (cdr thresholds))

        (partitions nil (cons (binary-partition attribute examples (car thresholds))


       ((null thresholds) partitions)))


(defun find-continuous-attributes ()


Find continuous-valued attributes."

   (mapcar #'(lambda (x) (if (numberp (get-value x (caar *train-examples*) *train-examples*))

                            (setq *continuous-attributes* (cons x *continuous-attributes*))))



(defun sort-examples (examples attribute)

   "(examples attribute)

Sort examples on continuous attribute."

   (let* ((pairs (mapcar #'(lambda (x) (list (get-value attribute x *train-examples*) x))


          (sorted-values (sort (copy-list (mapcar #'car pairs)) #'<)))

      (do ((values sorted-values (cdr values))

           (sorted-examples nil (cons (cadr (assoc (car values) pairs))


          ((null values) (reverse sorted-examples)))))


(defun convert-examples (examples attribute threshold)

   "(examples attribute threshold)

Covert examples with continuous attribute to binary values upto & above threshold."

   (do ((examples examples (cdr examples))

        (converted nil))

       ((null examples) (reverse converted))

      (let* ((continuous-value (get-value attribute (car examples) *train-examples*))

             (binary-value (if (> continuous-value threshold)

                              (1+ threshold)


         (setq converted (cons (list (car examples) binary-value)



(defun thresholds (attribute examples)

   "(attribute examples)

Get list of midpoint thresholds for continuous attribute & examples.

Remove smallest & largest values to avoid class of 1 item. Remove duplicates."

   (do ((values (remove-duplicates (butlast (cdr (mapcar #'(lambda (x) (get-value attribute x *train-examples*))

                                                   (sort-examples examples attribute)))))

          (cdr values))

        (result nil (if (> (length values) 1)

                       (cons (round (/ (+ (first values) (second values)) 2))



       ((< (length values) 2) (reverse result))))


(defun choose-random-partition (partitions m)

  "(partitions m)

Choose a partition randomly, satisfying m criterion.

Partitions has form ((attribute1 (val1 e11 e12 ...) (val2 e21 e22 ...)...) (attribute2 ....)."

  (let* ((mpartitions (do ((parts partitions (cdr parts))

                           (mpartitions nil))

                          ((null parts) mpartitions)

                        (if (satisfy-m? (car parts) m)

                            (setq mpartitions (cons (car parts)


         (npartitions (length mpartitions))

         (choice (if (= 0 npartitions)


                   (random npartitions))))

    (nth choice mpartitions)))


(defun choose-best-partition (information partitions m)

   "(information partitions m)

Return partition with highest information gain or gain ratio.

Partitions has form ((attribute1 (val1 e11 e12 ...) (val2 e21 e22 ...)...) (attribute2 ....)

Optional standardization of gain by split entropy of partition.

Optional printing of gain & attribute. Change highest-expected-gain to negative to allow

learning with 0 gain."

   (let ((highest-expected-gain -0.000001) gain best-partition)

      (do ((partitions partitions (cdr partitions)))

          ((null partitions))

         (let ((partition (car partitions)))

            (when (and (> (setq gain

                            (if *gain-ratio*

                               (let ((split-entropy (split-entropy partition)))

                                  (if (= split-entropy 0)


                                     (/ (- information (expected-entropy partition))


                               (- information (expected-entropy partition))))


                       (satisfy-m? partition m))

               (setq highest-expected-gain gain)

               (setq best-partition partition))

            (if *print-gain*

               (format t "gain of ~a = ~,3F~%"

                 (car partition) gain))))

      (if *print-attribute*

         (format t "choose attribute ~a ~%"

           (car best-partition)))

      (if *print-partition*

         (format t "partition ~a ~%"

           (cdr best-partition)))



(defun satisfy-m? (partition m)

   "(partition m)

Does partition satisfy m criterion? Are there >1 branches?

Does each branch classify at least m examples?"

   (and (> (length partition) 2)

        (every #'(lambda (branch) (> (length branch) m))

          (cdr partition))))


(defun information (examples)


Average expected required information for classification before partition."

   (do ((predicted-values *predicted-values* (cdr predicted-values))

        (sum 0))

       ((null predicted-values) sum)

      (setq sum

        (+ sum

           (let ((vcount (do ((exs examples (cdr exs))

                              (n 0 (if (eq (car predicted-values)

                                         (get-value *predicted-attribute* (car exs) *train-examples*))

                                      (1+ n)


                             ((null exs) n)))

                 (proportion 0))

              (setq proportion (/ vcount (length examples)))

              (* -1.0 proportion

                 (if (zerop proportion) 0 (log proportion 2))))))))


(defun expected-entropy (partition)


Return sum over possible values of predicted-attribute the proportion of examples

with this value x sample entropy of this partition."

   (do ((part (cdr partition) (cdr part))

        (sum 0))

       ((null part) sum)

      (let ((p (car part)))

         (setq sum (+ sum

                      (* (/ (length (cdr p)) (length *train-examples*))

                         (do ((predicted-values *predicted-values* (cdr predicted-values))

                              (sum1 0))

                             ((null predicted-values) sum1)

                            (setq sum1

                              (+ sum1

                                 (let ((vcount (do ((examples (cdr p) (cdr examples))

                                                    (n 0 (if (eq (car predicted-values)

                                                               (get-value *predicted-attribute* (car examples) *train-examples*))

                                                            (1+ n)


                                                   ((null examples) n)))

                                       (proportion 0))

                                    (setq proportion (/ vcount (length (cdr p))))

;                                    (format t "p: ~S, vcount: ~d, proportion: ~S~%"

;                                      p vcount proportion)

                                    (* -1.0 proportion

                                       (if (zerop proportion) 0 (log proportion 2)))))))))))))


(defun split-entropy (partition)


Return split entropy for partition."

   (let ((nexamples 0)

         (nexamples-per-partition nil)

         (proportions nil))

      (do ((part (cdr partition) (cdr part))

           (nexs-per-part 0)

           (sum 0))

          ((null part) (progn

                         (setq nexamples-per-partition (reverse nexs-per-part))

                         (setq nexamples sum)))

         (let ((n (- (length (car part)) 1)))

            (setq nexs-per-part

              (cons n


            (setq sum (+ sum n))))

      (do ((parts nexamples-per-partition (cdr parts))

           (props nil (cons (float (/ (car parts) nexamples))


          ((null parts) (setq proportions (reverse props))))

      (do ((props proportions (cdr props))

           (sum 0 (+ sum (* (car props) (log (car props) 2)))))

          ((null props) (* -1 sum)))))


(defun most-common-value (attribute examples)

   "(attribute examples)

Return most common predicted-attribute value in examples.

Mitchell's code is buggy. Add test for listp."

   (let ((length 0)

         (longest nil))

      (do ((partition (partition attribute examples) (cdr partition)))

          ((null partition))

         (let ((p (car partition)))

            (if (listp p)

               (when (> (length p)


                  (setq length (length p)

                    longest p)))))

      (car longest)))




(defun print-tree (tree &optional (depth 0))

   "(tree &optional (depth 0))

Print tree in readable form. Fixed bug for 1 leaf."

   (tab depth)

   (if (listp tree)


        (format t "~A~%" (first tree))

        (do ((rest-tree (cdr tree) (cdr rest-tree)))

            ((null rest-tree))

           (let ((subtree (car rest-tree)))

              (tab (+ depth 1))

              (format t "= ~A" (first subtree))

              (if (atom (second subtree))

                 (format t " => ~A~%" (second subtree))



                   (print-tree (second subtree) (+ depth 5)))))))

      (print tree))





(defun run (&optional (m 1))

  "(&optional (m 1))

Run learn on play examples. m is minimum number of examples classified in

each branch of a decision node."



  (setq *print-attribute* t

      *print-gain* t

      *print-partition* t

      *continuous-attributes* nil

      *gain-ratio* t

      *random-partitions* nil

      *train-examples* *examples*)

  (setq *tree* (learn (example-names)




  (print-tree *tree*))


(defun run-continuous (&optional (m 1))

  "(&optional (m 1))

Run learn on continuous-play examples. m is minimum number of examples classified in

each branch of a decision node."



  (setq *train-examples* *examples*

      *continuous-attributes* nil)


  (setq *print-attribute* t

      *print-gain* t

      *print-partition* t

      *gain-ratio* t

      *random-partitions* nil)

  (setq *tree* (learn (example-names)




  (print-tree *tree*))


;classify example


(defun classify (example tree examples)

  "(example tree examples)

Classify example-name in tree."

  (let (value branch)

    (if (atom tree)

        (return-from classify tree))

    (setq value (get-value (first tree) example examples))

    (if (numberp value)

        (setq branch (second (assoc (closer value (cdr tree)) (cdr tree))))

      (setq branch (second (assoc value (cdr tree)))))

    (classify example branch examples)))


(defun closer (value branch)

  "(value branch)

Return value in branch that is closer to value."

  (let* ((left (car branch))

         (right (cadr branch))

         (left-value (car left))

         (right-value (car right))

         (left-difference (abs (- value left-value)))

         (right-difference (abs (- value right-value))))

    (if (< left-difference right-difference)




;(classify 'day14 *tree* *train-examples*)


(defun test (examples)


Test examples for correctness, returning list of correct and incorrect frequencies & proportion


   (do ((exs examples (cdr exs))

        (ncorrect 0)

        (nwrong 0))

       ((null exs) (list ncorrect nwrong (float (/ ncorrect (+ ncorrect nwrong)))))

      (let* ((ex (car exs))

             (response (classify (car ex) *tree* examples))

             (correct (cadr (assoc *predicted-attribute* (cdr ex)))))

         (if (eq response correct)

            (setq ncorrect (1+ ncorrect))

            (setq nwrong (1+ nwrong))))))


;rules (that have no variables)







;(<- (play? yes) (outlook overcast))

;(<- (play? yes) (and (outlook rain) (wind weak)))

;(<- (play? no) (and (outlook rain) (wind strong)))

;(<- (play? no) (and (outlook sunny) (humidity high)))

;(<- (play? yes) (and (outlook sunny) (humidity normal)))


;random selection of examples


(defun integers (x y)

  "(x y)

Return integers from x to y inclusive."

  (do ((i x (1+ i))

       (result nil (cons i result)))

      ((= i (1+ y)) (reverse result))))


(defun random-range (n x y)

  "(n x y)

Return n random integers between x and y inclusive without replacement."

  (do ((i (1+ (- y x)) (1- i))

       (result nil)

       (pending (integers x y)))

      ((= (length result) n) result)

    (let* ((vec (make-array (length pending) :initial-contents pending))

           (index (random i))

           (selection (aref vec index)))

      (setq pending (remove selection pending))

      (setq result (cons selection result)))))


(defun select-random (n lst)

  "(n lst)

Randomly select n items from lst without replacement."

  (let ((size (length lst)))

    (do ((selections (random-range n 0 (1- size)) (cdr selections))

         (result nil))

        ((null selections) result)

      (push (car (nthcdr (car selections) lst)) result))))


;count rules


(defun count-leaves (tr)


Count leaves in tree."

  (cond ((null tr) 0)

        ((and (atom tr)

              (member tr *predicted-values*))


        ((atom tr) 0)

        (t (+ (count-leaves (car tr))

              (count-leaves (cdr tr))))))


(defun run-continuous-random ()


Run learn on continuous-play examples with random partitions."


  (setq *print-attribute* nil

      *print-gain* nil

      *print-partition* nil

      *gain-ratio* t

      *random-partitions* t)


  (setq *tree* (learn (example-names)




  (print-tree *tree*)

  (let ((count (count-leaves *tree*))

        (correct (test *examples*)))

    (print count)

    (print correct)

    (cons count correct)))


(defun run-n-continuous-random (&optional (n 1))

  "(&optional (n 1))

Run learn n times on continuous-play examples with random partitions."


  (setq *print-attribute* nil

      *print-gain* nil

      *print-partition* nil

      *gain-ratio* t

      *random-partitions* t)


  (setq *train-examples* *examples*

      *continuous-attributes* nil)


  (do ((i 0 (1+ i))

       (results nil (cons (run-continuous-random) results)))

      ((= i n) (lists->file results "c:\\My Documents\\models\\c45\\continuous random rules"))))


;(run-n-continuous-random 20)


;saving to file


(defun lists->file (lst file &optional (separator " "))

  "(lst file &optional (separator " "))

Save reverse lst in file. Items in flat lst are printed 1 per line.

Separator is used to separate items on a line for embedded lst."


    (output-stream file :direction :output)

    (do ((items (reverse lst) (cdr items)))

        ((null items) 'done)

      (let ((sub-item (car items)))

        (if (listp sub-item)

          (print-line sub-item separator output-stream)

          (format output-stream "~a ~%"



(defun print-line (lst &optional (separator " ") output-stream)

  "(lst &optional (separator " ") output-stream)

Print each item in list on a line separated by separator.

Then go to new line."

  (do ((lst lst (cdr lst)))

      ((null lst) (terpri output-stream))

    (princ (car lst) output-stream)

    (princ separator output-stream)))




;(test *examples*)