#!/bin/sh
exec ${GUILE-guile} -e '(ttn-do sgfc)' -s $0 "$@" # -*-scheme-*-
!#
;;; sgfc

;; Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010,
;;   2011 Thien-Thi Nguyen
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version.  There is NO WARRANTY.  See file COPYING for details.

;;; Commentary:

;; Usage: sgfc [--check-only] [SGF-FILE]
;;
;; Read SGF-FILE and write it out (if well-formed) to stdout.
;; If SGF-FILE is omitted or "-", read from standard input.
;; Optional arg ‘--check-only’ means don't write anything.

;;; Code:

(define-module (ttn-do sgfc)
  #:export (main
            *properties*
            *format-changes*
            read-sgf
            write-sgf
            children
            nodes
            get
            get-one
            analyze)
  #:use-module ((srfi srfi-13) #:select (substring/shared))
  #:use-module ((ttn-do zzz banalities) #:select (check-hv
                                                  qop<-args))
  #:use-module ((ttn-do mogrify) #:select (editing-buffer))
  #:use-module ((ttn-do zzz personally) #:select (accumulator
                                                  FE fs fso)))

;;; TODO: Expose this to documentation extractors.

;; This module provides the procs:
;; @example
;;  (read-sgf-file filename) => collection
;;  (write-sgf-file collection filename)
;;  (children tree) => list or #f
;;  (nodes tree) => list
;;  (get node prop) => list of values
;;  (get-one node prop) => value
;;  (analyze tree [prefix])
;; @end example
;;
;; @var{collection} is a list of one or more game trees.  A game tree is
;; a list w/ car @code{#t} and cdr a list of one or more nodes followed
;; by zero or more children game trees.  A node is a list of pairs, the
;; car of which is a keyword representing an SGF property and the cdr
;; the associated value, typically a string, number, two-element vector,
;; or a pair of one of these simpler types.  A pair for the property
;; value indicates a @dfn{composed type}.
;;
;; The vector elements are integers specifying column and row zero-based
;; coordinates on a board.  For example, @code{#(4 2)} represents
;; position @code{E3}.  For board size @var{n}, the vector
;; @code{#(@var{n} @var{n})} means ``PASS''.
;;
;; Additionally, this module also provides two data tables:
;; @example
;;  *properties*
;;  *format-changes*
;; @end example
;;
;; The data tables were originally from GNU Go 3.3.15 sources, massaged into
;; Scheme and surrounded by reverse-engineered algorithms one summer night,
;; sans net.cnxn.

(define sub substring/shared)
(define vr  vector-ref)

(define a-int (char->integer #\a))

(define (int<-board-letter c)
  (- (char->integer c) a-int))

(define (board-letter<-int n)
  (integer->char (+ n a-int)))

(define (happy! . whatever) #t)

(define (is-colon? c) (char=? #\: c))
(define (is-close? c) (char=? #\] c))

;; [SGF FF[4] - Smart Game Format]; FF[4] property index

;; A list of properties for Smart Game Format version 4,
;; i.e., ``SGF FF[4]''.  Each element has the form:
;;
;; @example
;; (CODE DESCRIPTION CONTEXT TYPE [SUBTYPE...])
;; @end example
;;
;; What all this means, we still need to document.  (TODO)
;;
;;-category: variable
;;
(define *properties*
  '((AB "Add Black"       setup list stone)
    (AE "Add Empty"       game  list point)
    (AN "Annotation"      game  simpletext)
    (AP "Application"     root  (simpletext . simpletext))
    (AR "Arrow"           -     list (point . point))
    (AS "Who adds stones" -     simpletext) ; (LOA)
    (AW "Add White"       setup list stone)
    (B  "Black"           move  move)
    (BL "Black time left" move  real)
    (BM "Bad move"        move  double)
    (BR "Black rank"      game  simpletext)
    (BT "Black team"      game  simpletext)
    (C  "Comment"         -     text)
    (CA "Charset"         root  simpletext)
    (CP "Copyright"       game  simpletext)
    (CR "Circle"          -     list point)
    (DD "Dim points"      -     elist point) ; (inherit)
    (DM "Even position"   -     double)
    (DO "Doubtful"        move  none)
    (DT "Date"            game  simpletext)
    (EV "Event"           game  simpletext)
    (FF "Fileformat"      root  #(number (1 . 4)))
    (FG "Figure"          -     (or none (number . simpletext)))
    (GB "Good for Black"  -     double)
    (GC "Game comment"    game  text)
    (GM "Game"            root  #(number (1 . 20)))
    (GN "Game name"       game  simpletext)
    (GW "Good for White"  -     double)
    (HA "Handicap"        game  number) ; (Go)
    (HO "Hotspot"         -     double)
    (IP "Initial pos."    game  simpletext) ; (LOA)
    (IT "Interesting"     move  none)
    (IY "Invert Y-axis"   game  simpletext)          ; (LOA)
    (KM "Komi"            game  real)                ; (Go)
    (KO "Ko"              move  none)
    (LB "Label"           -     list (point . simpletext))
    (LN "Line"            -     list (point . point))
    (MA "Mark"            -     list point)
    (MN "set move number" move  number)
    (N  "Nodename"        -     simpletext)
    (OB "OtStones Black"  move  number)
    (ON "Opening"         game  text)
    (OT "Overtime"        game  simpletext)
    (OW "OtStones White"  move  number)
    (PB "Player Black"    game  simpletext)
    (PC "Place"           game  simpletext)
    (PL "Player to play"  setup color)
    (PM "Print move mode" -     number) ; (inherit)
    (PW "Player White"    game  simpletext)
    (RE "Result"          game  simpletext)
    (RO "Round"           game  simpletext)
    (RU "Rules"           game  simpletext)
    (SE "Markup"          -     point)  ; (LOA)
    (SL "Selected"        -     list point)
    (SO "Source"          game  simpletext)
    (SQ "Square"          -     list point)
    (ST "Style"           root  #(number (0 . 3)))
    (SU "Setup type"      game  simpletext) ; (LOA)
    (SZ "Size"            root  (or number (number . number)))
    (TB "Territory Black" -     elist point) ; (Go)
    (TE "Tesuji"          move  double)
    (TM "Timelimit"       game  real)
    (TR "Triangle"        -     list point)
    (TW "Territory White" -     elist point) ; (Go)
    (UC "Unclear pos"     -     double)
    (US "User"            game  simpletext)
    (V  "Value"           -     real)
    (VW "View"            -     elist point) ; (inherit)
    (W  "White"           move  move)
    (WL "White time left" move  real)
    (WR "White rank"      game  simpletext)
    (WT "White team"      game  simpletext)
    ;; these were found in various (old) .sgf files
    (LT "Lose on time"    setup simpletext) ; Turtle matches
    (NB "Nordic Black?"   setup number)
    (NW "Nomadic White?"  setup number)
    (SY "Synthesis, Yes?" root  simpletext) ; Cgoban
    ))

;; A vector describing the changes between successive SGF versions.
;; This probably should be completely internalized.  Don't use it!
;;
(define *format-changes*                ; unused for now
  ;; vector elements:
  ;;  #f => no such format version (ERROR!)
  ;;  integer (same as index) => no info for this format version
  ;;  pair => (CHANGED . ADDED)
  (vector #f
          1
          2
          3
          '((DT FG LB RE RU SZ) .
            (AP AR AS DD IP IY LN OT PM SE SQ ST SU VW))))

;; SGF grammar:
;;
;; Collection = GameTree { GameTree }
;; GameTree   = "(" Sequence { GameTree } ")"
;; Sequence   = Node { Node }
;; Node       = ";" { Property }
;; Property   = PropIdent PropValue { PropValue }
;; PropIdent  = UcLetter { UcLetter }
;; PropValue  = "[" CValueType "]"
;; CValueType = (ValueType | Compose)
;; ValueType  = (None | Number | Real | Double | Color | SimpleText |
;;               Text | Point  | Move | Stone)
;;
;; The above grammar has a number of simple properties which enables us
;; to write a simpler parser:
;;   1) There is never a need for backtracking
;;   2) The only recursion is on gametree.
;;   3) Tokens are only one character

(define (property-specs prop)
  (cddr (or (assq-ref *properties* prop)
            (error "no such property:" prop))))

(define (compute-rproc prop)

  (define (base-type-objectifier symbol)
    (case symbol
      ((simpletext text) identity)
      ((color) (lambda (s)
                 (string->symbol (string-downcase s))))
      ((stone move point) (lambda (s)
                            (vector (int<-board-letter (string-ref s 0))
                                    (int<-board-letter (string-ref s 1)))))
      ((real double number) string->number)
      (else (error "badness!:" symbol))))

  (define (rproc<-base x f?)
    (cond ((symbol? x)
           (let ((o (base-type-objectifier x)))
             (lambda (n? r!)
               (or (n?) (error "expecting value:" (list prop x)))
               (o (r! f?)))))
          ((vector? x)
           (let ((o (base-type-objectifier (vr x 0)))
                 (ranges (cdr (vector->list x))))
             (lambda (n? r!)
               (or (n?) (error "expecting value:" (list prop x)))
               (let ((v (o (r! f?)))
                     (ok? #f))
                 (FE ranges (lambda (range)
                              (set! ok? (or ok? (<= (car range)
                                                    v
                                                    (cdr range))))))
                 (or ok? (error "out of range:" v))
                 v))))
          (else
           (error "badness!:" (list x f? prop)))))

  (define (rproc<-composition x)
    (let ((one (rproc<-base (car x) is-colon?))
          (two (rproc<-base (cdr x) is-close?)))
      (lambda (n? r!)
        (or (n?) (error "expecting value:" prop))
        (cons (one happy! r!) (two happy! r!)))))

  (define (rproc<- x f?)
    (if (pair? x)
        (rproc<-composition x)
        (rproc<-base x f?)))

  ;; do it!
  (let* ((full-spec (property-specs prop))
         (ls<- (if (memq (car full-spec) '(list elist))
                   (lambda (rproc)
                     (lambda (n? r!)
                       (let ((acc (accumulator)))
                         (let loop ((another? (n?)))
                           (if another?
                               (let ((v (rproc happy! r!)))
                                 (acc v)
                                 (loop (n?)))
                               (acc))))))
                   identity))
         (single (if (eq? identity ls<-)
                     (car full-spec)
                     (cadr full-spec))))
    (if (eq? 'none single)
        (lambda (n? r!)
          (and (n?) (error "no value expected:" prop)))
        (let* ((choice? (and (pair? single)
                             (pair? (cdr single))
                             (< 2 (length single))
                             (eq? 'or (car single))))
               (firstpick (and choice? (cadr single)))
               (optional? (eq? 'none firstpick))
               (fallback (if choice?
                             (caddr single)
                             single))
               (r-fallback (rproc<- fallback is-close?)))
          (ls<- (cond (optional?
                       (lambda (n? r!)
                         (and (n?) (r-fallback happy! r!))))
                      (choice?
                       ;; we only handle ‘(or FOO (FOO . BAR))’ because
                       ;; otherwise elaborate backtracking is required
                       (or (and (symbol? firstpick)
                                (pair? fallback)
                                (eq? firstpick (car fallback)))
                           (error "spec requires backtracking!:"
                                  (assq prop *properties*)))
                       (let* ((box (list #f))
                              (f? (lambda (c)
                                    (and (or (is-colon? c)
                                             (is-close? c))
                                         (begin
                                           (set-car! box c)
                                           #t))))
                              (r-one (rproc<-base firstpick f?)))
                         (lambda (n? r!)
                           (set-car! box #f)
                           (let ((v1 (r-one n? r!)))
                             (if (is-close? (car box))
                                 v1
                                 (let ((v2 (r-fallback happy! r!)))
                                   (cons v1 v2)))))))
                      (else
                       r-fallback)))))))

(define property-reader
  (let ((ht (make-hash-table (1+ 42))))
    ;; rv
    (lambda (prop)
      (or (hashq-ref ht prop)
          (let ((v (compute-rproc prop)))
            (hashq-set! ht prop v)
            v)))))

;; Return the collection of game trees parsed from reading @var{filename}.
;;
(define (read-sgf port)
  (let* ((s (editing-buffer port
              (buffer-string)))
         (len (string-length s))
         (pos 0))

    (define (at position)
      (string-ref s position))

    (define (sw position)
      (cond ((= len position) position)
            ((char-whitespace? (at position)) (sw (1+ position)))
            (else position)))

    (define (<> start end)
      (sub s start end))

    (define (<!> start end)
      (set! pos end)
      (sub s start end))

    (define (ncbv?)                     ; next char bears value?
      (set! pos (sw pos))
      (if (= len pos)
          #f
          (char=? #\[ (at pos))))

    (define (s1rt finish?)              ; skip 1 read 'til
      (set! pos (1+ pos))
      (if (= len pos)
          (error "unexpected EOF")
          (let ((start pos))
            (let loop ((end start))
              (if (finish? (at end))
                  (begin
                    (set! pos (if (is-close? (at end))
                                  (1+ end)
                                  end))
                    (<> start end))
                  (loop (1+ end)))))))

    (define (read-property)
      (set! pos (sw pos))
      (and (char<=? #\A (at pos) #\Z)
           (let* ((c1 (at (1+ pos)))
                  (plen (if (char<=? #\A c1 #\Z) 2 1))
                  (prop (string->symbol (<!> pos (+ pos plen))))
                  (reader (property-reader prop))
                  (value (reader ncbv? s1rt)))
             (cons prop value))))

    (define (read-node)
      (set! pos (sw pos))
      (if (= len pos)
          (error "unexpected EOF")
          (and (char=? #\; (at pos))
               (let ((acc (accumulator)))
                 (set! pos (1+ pos))
                 (let loop ((prop/value (read-property)))
                   (if prop/value
                       (begin
                         (acc prop/value)
                         (loop (read-property)))
                       (acc)))))))

    (define (read-gametree n)
      (set! pos (sw pos))
      (let ((acc (accumulator)))
        (acc (happy!))
        (let loop ((cur (at pos)))
          (case cur
            ((#\;)
             (let ((node (read-node)))
               (set! pos (sw pos))
               (acc node)
               (loop (at pos))))
            ((#\()
             (set! pos (1+ pos))
             (let ((subtree (read-gametree (1+ n))))
               (acc subtree)
               (loop (at pos))))
            ((#\))
             (set! pos (1+ pos))
             (acc))
            (else
             (set! pos (sw pos))
             (loop (at pos)))))))

    ;; do it!
    (set! pos (sw pos))
    (let ((acc (accumulator)))
      (let loop ()
        (or (and (not (= len pos))
                 (char=? #\( (at pos))
                 (begin
                   (set! pos (1+ pos))
                   (acc (read-gametree 0))
                   (loop)))
            (acc))))))

(define (compute-wproc prop)

  (define (base-type-stringifier symbol)
    (case symbol
      ((simpletext text) identity)
      ((color) (lambda (symbol)
                 (string-capitalize (symbol->string symbol))))
      ((stone move point) (lambda (v)
                            (string (board-letter<-int (vr v 0))
                                    (board-letter<-int (vr v 1)))))
      ((real double number) (lambda (n)
                              (cond ((number? n) n)
                                    ((eq? #f n) "-")
                                    (else (error "bad number:" n)))))
      (else (error "badness!:" symbol))))

  (define (wproc<-base x)
    (cond ((symbol? x)
           (let ((s (base-type-stringifier x)))
             (lambda (val)
               (display (s val)))))
          ((vector? x)
           (let ((s (base-type-stringifier (vector-ref x 0))))
             (lambda (val)
               (display (s val)))))
          (else
           (error "badness!:" (list x prop)))))

  (define (wproc<-composition x)
    (let ((one (wproc<-base (car x)))
          (two (wproc<-base (cdr x))))
      (lambda (v)
        (one (car v))
        (display ":")
        (two (cdr v)))))

  (define (wproc<- x)
    (if (pair? x)
        (wproc<-composition x)
        (wproc<-base x)))

  (let* ((full-spec (property-specs prop))
         (brace (lambda (wproc)
                  (lambda (x)
                    (display "[")
                    (wproc x)
                    (display "]"))))
         (ls<- (if (memq (car full-spec) '(list elist))
                   (lambda (wproc)
                     (lambda (val)
                       (FE val wproc)))
                   identity))
         (single (let ((v (if (eq? identity ls<-)
                              (car full-spec)
                              (cadr full-spec))))
                   ;; unlike for the read path, on the write path the ‘none’
                   ;; case does not make sense; hence this inline sanity check
                   (and (eq? 'none v) (error "insanity detected:" prop))
                   v))
         (choice? (and (pair? single)
                       (pair? (cdr single))
                       (< 2 (length single))
                       (eq? 'or (car single))))
         ;; likewise, ‘optional?’ does not make sense, however the upshot of
         ;; that case is to use ‘w-fallback’ anyway, so we need check neither
         ;; for nor against it
         (w-fallback (wproc<- (if choice?
                                  (caddr single)
                                  single))))
    (ls<- (brace (if choice?
                     ;; anyway, we should keep in mind this distillation of
                     ;; cases falls out of the ‘(or FOO (FOO . BAR))’
                     ;; restriction; the following mapping of a cons cell to
                     ;; ‘(FOO . BAR)’ needs to be updated if that restriction
                     ;; changes
                     (let ((w-one (wproc<-base (cadr single))))
                       (lambda (x)
                         (cond ((pair? x)
                                (w-one (car x))
                                (display ":")
                                (w-fallback (cdr x)))
                               (else
                                (w-one x)))))
                     w-fallback)))))

(define property-writer
  (let ((ht (make-hash-table (1+ 42))))
    ;; rv
    (lambda (prop)
      (or (hashq-ref ht prop)
          (let ((v (compute-wproc prop)))
            (hashq-set! ht prop v)
            v)))))

;; Write the @var{collection} of game trees to @var{filename}.
;;
(define (write-sgf collection)

  (define (is-gametree? x)
    (and (pair? x)
         (pair? (cdr x))
         (eq? (happy!) (car x))))

  (define (check-gametree! x)
    (or (is-gametree? x)
        (error "bad gametree:" x)))

  (define (display-gametree gametree)

    (define (display-property x)
      (let ((name (car x)))
        ;; artistic license
        (and (memq name '(C LB)) (newline))
        (display name)
        ((property-writer name) (cdr x))))

    (define (display-node x)
      (display ";")
      (FE x display-property)
      (and (pair? x) (pair? (cdr x)) (newline)))

    (display "(")
    (FE (cdr gametree)
        (lambda (x)
          ((if (is-gametree? x)
               display-gametree
               display-node)
           x)))
    (newline)
    (display ")"))

  ;; do it!
  (FE collection (lambda (x)
                   (check-gametree! x)
                   (display-gametree x))))

;;;---------------------------------------------------------------------------
;;; game tree utilities

;; Return a list of children game trees for @var{tree},
;; or #f if @var{tree} has no children.
;;
(define (children tree)                 ; => #f if none
  (let loop ((ls (cdr tree)))
    (and (not (null? ls))
         (if (and (pair? (car ls))
                  (eq? #t (caar ls)))
             ls
             (loop (cdr ls))))))

;; Return a list of nodes (minimum is one) for @var{tree}.
;;
(define (nodes tree)                    ; => list (minimum length 1)
  (let ((acc (accumulator)))
    (let loop ((ls (cdr tree)))
      (cond ((null? ls))
            ((and (pair? (car ls))
                  (eq? #t (caar ls))))
            (else (acc (car ls))
                  (loop (cdr ls)))))
    (acc)))

;; In @var{node}, return a list of all values for property @var{prop}.
;; The list may be empty or it may contain several values.
;;
(define (get node prop)
  (let ((acc (accumulator)))
    (let loop ((ls node))
      (cond ((null? ls))
            (else (and (pair? (car ls)) (eq? prop (caar ls))
                       (acc (cdar ls)))
                  (loop (cdr ls)))))
    (acc)))

;; In @var{node}, return the value associated with the first occurance
;; of @var{prop}.
;;
(define (get-one node prop)
  (let ((all (get node prop)))
    (and (pair? all)
         (car all))))

;; Display to the current output port a simple analysis of @var{tree}.
;; (This is more for debugging support than anything serious.)  Optional arg
;; @var{prefix} specifies the number of spaces to insert at the beginning of
;; the line, for recursive analysis of children trees.
;;
;;-args: (- 1 0 prefix)
;;
(define (analyze tree . prefix)
  (let* ((prefix (if (null? prefix)
                     0
                     (car prefix)))
         (fso (let ((hey (fs "HEY:~A " (make-string prefix #\space))))
                (lambda (s . args)
                  (display hey)
                  (apply fso s args)
                  (newline))))
         (all-nodes (nodes tree)))
    (fso "nodes: ~A" (length all-nodes))
    (FE all-nodes (lambda (props)
                    (and=> (get-one props #:B)
                           (lambda (B)
                             (fso "black: ~A" B)))
                    (and=> (get-one props #:W)
                           (lambda (W)
                             (fso "white: ~A" W)))))
    (and=> (children tree)
           (lambda (kids)
             (fso "kids: ~A" (length kids))
             (FE kids (lambda (kid)
                        (analyze kid (+ prefix 2))))))))

(define (main args)
  (check-hv args '((package . "ttn-do")
                   (version . "1.0")
                   (help . commentary)))
  (let* ((qop (qop<-args args '((check-only))))
         (sgf-file (and (not (null? (qop '())))
                        (car (qop '()))))
         (inp (if (or (not sgf-file)
                      (string=? "-" sgf-file))
                  (current-input-port)
                  (open-input-file sgf-file)))
         (tree (read-sgf inp)))
    (or (eq? inp (current-input-port))
        (close-port inp))
    (or (qop 'check-only)
        (write-sgf tree))))

;;; sgfc ends here