#!/bin/sh
exec ${GUILE-guile} -e "(ttn-do magic)" -s $0 "$@" # -*- scheme -*-
!#
;;; magic

;; Copyright (C) 2008, 2009, 2010 Thien-Thi Nguyen
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version.  There is NO WARRANTY.  See file COPYING for details.

;;; Commentary:

;; Usage: magic [options] [FILE...]
;;
;; Display results of scanning FILE, or "data" if type and other
;; attributes cannot be determined.  By default definitions are
;; read from ~/.magic (see magic(5) for information on its format),
;; or, preferentially, from ~/.magic.ttn-do-magic, if it exists.
;;
;; Options are:
;;
;;  -m, --magic-file MAGIC  -- read definitions from MAGIC instead
;;                             of default ~/.magic
;;  -C, --compile           -- used in conjunction with -m MAGIC
;;                             to write out MAGIC.ttn-do-magic
;;
;; The output is similar, but not identical, to that of file(1).
;;
;; Presently, this program understands ~/.magic.ttn-do-magic in
;; either .ttn-do-magic-1 or .ttn-do-magic-2 format, and writes
;; in .ttn-do-magic-2 format only.

;;; Code:

(define-module (ttn-do magic)
  #:export (main)
  #:use-module ((ttn-do zzz banalities) #:select (check-hv
                                                  qop<-args))
  #:use-module ((ice-9 mapping) #:select (hash-table-mapping
                                          mapping-ref
                                          mapping-set!))
  #:use-module ((ice-9 editing-buffer) #:select (find-file-read-only
                                                 editing-buffer))
  #:use-module ((ttn-do scm2bin) #:select ((write-punily . write-string)))
  #:use-module ((ttn-do zzz personally) #:select (accumulator
                                                  forms<-port
                                                  FE fs fso)))

(define DEBUG? (getenv "DEBUG"))

(define (NYI tag . args)                ; "Not Yet Implemented"
  (define (spew)
    (fso "NYI: ~A" tag)
    (FE args (lambda (x)
               (display #\space)
               ((if (string? x) write-string write) x)))
    (newline))
  (with-output-to-port (current-error-port) spew))

(define MAGIC (hash-table-mapping #:test string=?))

(define (compiled-name filename)
  (string-append filename ".ttn-do-magic"))

(define (rule-control magic-file compiled-ok)

  (define (make-kpair)
    (let* ((compiled (compiled-name magic-file))
           (acc (accumulator))
           (compiled? (and compiled-ok (file-exists? compiled)))
           (init (if compiled? "" (find-file-read-only magic-file))))
      (and compiled? (let ((p (open-input-file compiled)))
                       (or (memq (read p) '(.ttn-do-magic-1
                                            .ttn-do-magic-2))
                           (error "bad compiled magic file:" compiled))
                       (FE (forms<-port p) acc)
                       (close-port p)))
      (cons acc (editing-buffer init))))

  (let ((kpair (or (mapping-ref MAGIC magic-file #f)
                   (mapping-set! MAGIC magic-file (make-kpair)))))

    (editing-buffer (cdr kpair)

      (define (l-a/move rx)
        (and (looking-at rx)
             (goto-char (match-end 0))))

      (define (skip-ws)
        (l-a/move  "[ \t]+"))

      (define (read-offset)
        (define (n<- radix b e)
          (string->number (buffer-substring b e) radix))
        (let ((p (point)))
          (define (n<-hex)
            (and (l-a/move "0x[0-9a-fA-F]+")
                 (n<- 16 (+ 2 p) (point))))
          (define (n<-oct)
            (and (l-a/move "0[0-7]+")
                 (n<- 8 (1+ p) (point))))
          (define (n<-dec)
            (and (l-a/move "-*[0-9]+")
                 (n<- 10 p (point))))
          (define (n<-k)
            (or (n<-hex) (n<-oct) (n<-dec)))
          (cond ((n<-hex))
                ((n<-dec))
                ((char=? #\& (char-after))
                 (forward-char 1)
                 (list '& (read-offset)))
                ((char=? #\( (char-after))
                 (forward-char 1)
                 (let ((more-indirection? (char=? #\& (char-after)))
                       (x #f) (y #f))
                   (and more-indirection? (forward-char 1))
                   (set! p (point))
                   (set! x (n<-k))
                   (cond ((l-a/move "[.]*([bslBSL]*)([-+*/%&|^]*)")
                          (set! p (point))
                          (let ((sz (match-string 1))
                                (op (match-string 2)))
                            (set! y (if (string-null? op)
                                        '()
                                        (list (string->symbol op)
                                              (if (char=? #\( (char-after))
                                                  (begin
                                                    (forward-char 1)
                                                    (set! p (point))
                                                    (let ((rv (list (n<-k))))
                                                      (forward-char 1)
                                                      rv))
                                                  (n<-k)))))
                            (or (string-null? sz)
                                (set! y (cons (string->symbol sz) y)))
                            (forward-char 1)
                            (cons (if more-indirection?
                                      (list '& x)
                                      x)
                                  y)))
                         (else
                          (if more-indirection?
                              (list '& x)
                              x)))))
                (else
                 (l-a/move "[^ \t]+")
                 (match-string 0)))))

      (define (read-c-integer)
        (cond ((l-a/move "0x([0-9a-fA-F]+)")
               (string->number (match-string 1) 16))
              ((l-a/move "0[0-7]+")
               (string->number (match-string 0) 8))
              (else
               (let ((rv (read (buffer-port))))
                 ;; Hmm, seems `read' eats one char after token.
                 (forward-char -1)
                 rv))))

      (define (read-type)
        (re-search-forward "[ \t]+([a-z16]+)[&/ \t]")
        (let ((one (string->symbol (match-string 1))))
          (goto-char (match-end 1))
          (if (looking-at "[ \t]")
              one
              (cons one (begin
                          (forward-char 1)
                          (if (eq? 'string one)
                              (map (lambda (c)
                                     (string->symbol (make-string 1 c)))
                                   (string->list
                                    (symbol->string
                                     (read (buffer-port)))))
                              (read-c-integer)))))))

      (define (read-string-until delims . fc?)
        (set! fc? (and (not (null? fc?)) (car fc?)))
        (let* ((s (accumulator))
               (weird #f)
               (fc (and fc? (case (char-after)
                              ((#\< #\= #\> #\!)
                               (forward-char 1)
                               (string->symbol (make-string 1 (char-before))))
                              ((#\x)
                               (set! weird #t) ; sigh
                               (forward-char 1)
                               '>)
                              (else '=)))))
          (define (char<-nstr radix str)
            (integer->char (string->number str radix)))
          (let loop ((c (char-after)))
            (cond ((memq c delims))
                  ((char=? #\\ c)
                   (forward-char 1)
                   (s (cond ((looking-at "[0-3][0-7][0-7]")
                             (forward-char 3)
                             (char<-nstr 8 (match-string 0)))
                            ((looking-at "[0-7]")
                             (forward-char 1)
                             (char<-nstr 8 (match-string 0)))
                            ((looking-at "x[0-9a-fA-F][0-9a-fA-F]")
                             (forward-char 3)
                             (char<-nstr 16 (buffer-substring
                                             (- (point) 2) (point))))
                            ((looking-at "x[0-9a-fA-F][^0-9a-fA-F]")
                             (forward-char 2)
                             (char<-nstr 16 (buffer-substring
                                             (1- (point)) (point))))
                            (else
                             (forward-char 1)
                             (case (char-before)
                               ((#\t) #\ht)
                               ((#\n) #\newline)
                               ((#\f) #\np)
                               ((#\a) #\bel)
                               ((#\b) #\bs)
                               ((#\r) #\cr)
                               ((#\v) #\vt)
                               ((#\space) #\space)
                               (else (char-before))))))
                   (loop (char-after)))
                  (else
                   (s c)
                   (forward-char 1)
                   (loop (char-after)))))
          (set! s (apply string (s)))
          (and weird (not (string-null? s))
               (begin (set! fc '=)
                      (set! s (string-append "x" s))))
          (if fc? (cons s fc) s)))

      (define (read-value typ)
        (define (s/fc)
          (read-string-until '(#\space #\ht #\newline) #t))
        (skip-ws)
        (case (if (pair? typ)
                  (car typ)
                  typ)
          ((string)
           (let* ((pair (s/fc))
                  (s (car pair))
                  (fc (cdr pair))
                  (flags (if (pair? typ) (cdr typ) '()))
                  (elab (cond ((memq 'b flags)
                               (editing-buffer s
                                 (goto-char (point-min))
                                 (while (re-search-forward " " #f #t)
                                   (insert "*"))
                                 (buffer-string)))
                              ((memq 'B flags)
                               (editing-buffer s
                                 (goto-char (point-min))
                                 (while (search-forward " " #f #t)
                                   (insert "+"))
                                 (buffer-string)))
                              (else
                               s))))
             (cons fc elab)))
          ((pstring regex search)
           (let* ((pair (read-string-until '(#\space #\ht #\newline) #t))
                  (s (car pair))
                  (fc (cdr pair)))
             (cons fc s)))
          (else
           (let* ((mod (if (looking-at "[=<>&^~!]")
                           (let ((rv (string->symbol
                                       (make-string 1 (char-after)))))
                             (forward-char 1)
                             rv)
                           '=))
                  (n (begin
                       ;; sigh
                       (skip-ws)
                       ;; sigh
                       (and (char=? #\= (char-after))
                            (forward-char 1))
                       (read-c-integer))))
             (if (memq n '(x X))
                 'x
                 (list mod n))))))

      (define (read-entry)
        (while (memq (char-after) '(#\newline #\#))
          (forward-line 1))
        (and (char-after)
             (let* ((cur (if (looking-at ">+")
                             (- (match-end 0) (match-beginning 0))
                             0))
                    (ofs (begin
                           (forward-char cur)
                           (read-offset)))
                    (typ (read-type))
                    (val (read-value typ))
                    (str (begin
                           (skip-ws)
                           (let ((rv (read-string-until '(#\newline))))
                             (forward-char 1)
                             rv))))
               (list cur ofs typ val str))))

      (define (dump)
        (FE (list ".ttn-do-magic-2"
                  " ;; dump (-*- mode: text; coding: raw-text; -*-)\n"
                  ";;; from " magic-file "\n\n")
            display)
        (let loop ((ent (read-entry)))
          (cond (ent (display "(")
                     (write (car ent))
                     (FE (cdr ent)
                         (lambda (x)
                           (display " ")
                           (cond ((string? x)
                                  (write-string x))
                                 ((and (pair? x)
                                       (string? (cdr x)))
                                  (display "(")
                                  (write (car x))
                                  (display " . ")
                                  (write-string (cdr x))
                                  (display ")"))
                                 (else
                                  (write x)))))
                     (display ")")
                     (newline)
                     (loop (read-entry))))))

      (let ((rules (car kpair))
            (kls (unspecified)))
        ;; rv
        (lambda (command)
          (case command
            ((#:top!) (set! kls (rules)))
            ((#:next) (or (and (not (null? kls))
                               (let ((rv (car kls)))
                                 (set! kls (cdr kls))
                                 rv))
                          (let ((ent (read-entry)))
                            (and ent (rules ent))
                            ent)))
            ((#:dump) (dump))))))))

(define NATIVE-ENESS (if (= 1 (ntohs 1)) 'big 'little))

(define (scan magic-file stranger)
  (let ((limit (stat:size (stat stranger)))
        (rc (rule-control magic-file #t))
        (oprefix (string-append (port-filename stranger) ":" ))
        (outs ""))

    (define (uve-read!/ref uve)
      (uniform-vector-read! uve stranger)
      (uniform-vector-ref uve 0))

    (define x-u8
      (let ((uve (make-uniform-vector 1 #\nul 0)))
        (lambda ()
          (let ((rv (uve-read!/ref uve)))
            (if (negative? rv)
                (+ 256 rv)
                rv)))))

    (define x-u16
      (let ((uve (make-uniform-vector 1 's 0))
            (tem #f))
        (lambda (eness)
          (let ((rv (if (eq? NATIVE-ENESS eness)
                        (uve-read!/ref uve)
                        (begin
                          (set! tem (x-u8))
                          (if (eq? 'big eness)
                              (logior (ash tem 8) (x-u8))
                              (logior (ash (x-u8) 8) tem))))))
            (if (negative? rv)
                (+ 65536 rv)
                rv)))))

    (define x-u32
      (let ((uve (make-uniform-vector 1 1 0))
            (tem #f) (por #f) (ary #f))
        (lambda (eness)
          (let ((rv (if (eq? NATIVE-ENESS eness)
                        (uve-read!/ref uve)
                        (begin
                          (set! tem (x-u8))
                          (set! por (x-u8))
                          (set! ary (x-u8))
                          (if (eq? 'big eness)
                              (logior (ash tem 24)
                                      (ash por 16)
                                      (ash ary  8)
                                      (x-u8))
                              (logior (ash tem     0)
                                      (ash por     8)
                                      (ash ary    16)
                                      (ash (x-u8) 24)))))))
            (if (negative? rv)
                (+ 4294967296 rv)
                rv)))))

    (define (extract-integer type mask)
      (and=> (case type
               ((byte ubyte)                    (x-u8))
               ((short ushort)                  (x-u16 NATIVE-ENESS))
               ((long ulong date ldate)         (x-u32 NATIVE-ENESS))
               ((beshort ubeshort)              (x-u16 'big))
               ((belong ubelong bedate beldate) (x-u32 'big))
               ((leshort uleshort)              (x-u16 'little))
               ((lelong ulelong ledate leldate) (x-u32 'little))
               ((melong medate meldate)
                (let ((hi (x-u16 'little))
                      (lo (x-u16 'little)))
                  (+ (ash hi 16) lo)))
               (else
                (error "bad type:" type)))
             (lambda (bits)
               (if mask
                   (logand bits mask)
                   bits))))

    (define (cmp typ val)
      (define (snarf-string len)
        (let ((s (make-string len)))
          (do ((i 0 (1+ i)))
              ((= len i))
            (let ((c (read-char stranger)))
              (or (eof-object? c)
                  (string-set! s i c))))
          s))
      (define (read-line/zt-string)
        (let ((s (accumulator)))
          (let loop ((c (read-char stranger)))
            (cond ((or (eof-object? c) (memq c '(#\nul #\newline #\cr))))
                  (else (s c) (loop (read-char stranger)))))
          (apply string (s))))

      (and (eq? 'x val) (set! val '(#t #t)))
      (let* ((more (and (pair? typ) (cdr typ)))
             (type (if more (car typ) typ)))

        (define (numeric-match)
          (and=> (extract-integer type more)
                 (lambda (act)
                   (define (make-signed-maybe!)
                     (case type
                       ((byte)
                        (or (< act 128)
                            (set! act (- act 256))))
                       ((short beshort leshort)
                        (or (< act 32768)
                            (set! act (- act 65536))))
                       ((long belong lelong melong)
                        (or (< act 2147483648)
                            (set! act (- act 4294967296))))))
                   (and ((case (car val)
                           ((=)                      =)
                           ((<) (make-signed-maybe!) <)
                           ((>) (make-signed-maybe!) >)
                           ((&) (lambda (a b)
                                  (= (logand a b) b)))
                           ((^) (lambda (a b)
                                  (zero? (logand a b))))
                           ((~) (lambda (a b)
                                  (= a (lognot b))))
                           ((!) (lambda (a b)
                                  (not (= a b))))
                           ((#t) (lambda (a b)
                                   #t))
                           (else
                            (error "bad op:" (car val))))
                         act (cadr val))
                        act))))

        (define (time-match convert)
          (and=> (numeric-match)
                 (lambda (n)
                   (strftime "%c" (convert n)))))

        (case type
          ((string)
           ;; TODO: move elaboration to read phase
           (let* ((ci? (and more (memq 'c more)))
                  (fc (car val))
                  (s (cdr val))
                  (scmp (cond ((not more)
                               (case fc
                                 ((<) string>?)
                                 ((=) string=?)
                                 ((>) string<?)
                                 (else (lambda (a b)
                                         (not (string=? a b))))))
                              ((or (memq 'b more) (memq 'B more))
                               (let ((rx (apply make-regexp s
                                                (if ci?
                                                    (list regexp/icase)
                                                    '()))))
                                 (lambda (a b)
                                   (regexp-exec rx b))))
                              (ci?
                               (lambda (a b)
                                 ;; TODO: and-map char-ci=?
                                 (string-ci=? a b)))
                              (else
                               (error "bad typ:" typ))))
                  (snarfed (if (eq? '= fc)
                               (snarf-string (string-length s))
                               (read-line/zt-string))))
             (and (scmp s snarfed)
                  snarfed)))
          ((byte ubyte
                 short ushort long ulong
                 beshort ubeshort belong ubelong
                 leshort uleshort lelong ulelong
                 melong umelong)
           (numeric-match))
          ((date bedate ledate medate)
           (time-match localtime))
          ((ldate beldate leledate meldate)
           (time-match gmtime))
          ((search)
           (let ((chunk (editing-buffer (snarf-string more)))
                 (fc (car val))
                 (s (cdr val)))
             (editing-buffer chunk
               (toggle-read-only)
               (goto-char (point-min))
               (if (eq? '! fc)
                   (not (search-forward s #f #t))
                   (search-forward s #f #t)))))
          ((regex)
           (let* ((chunk (editing-buffer stranger (buffer-string))) ; sigh
                  (fc (car val))
                  (s (cdr val))
                  (rx (apply make-regexp s (if (eq? 'c more)
                                               (list regexp/icase)
                                               '()))))
             (regexp-exec rx chunk)))
          (else
           (NYI 'cmp typ val)
           #f))))

    (define (try-rule lev ofs typ val str)

      (define (safe-seek n)
        (and (< -1 n limit)
             (seek stranger n SEEK_SET)))

      (define (resolve ofs)
        (cond ((integer? ofs)
               ofs)
              ((eq? '& (car ofs))
               (let ((now (seek stranger 0 SEEK_CUR)))
                 (and=> (resolve (cadr ofs))
                        (lambda (n)
                          (+ now n)))))
              (else
               (and=>
                (resolve (car ofs))
                (lambda (base)
                  (let* ((type (case (cadr ofs)
                                 ((b B) 'byte)
                                 ((s) 'leshort) ((S) 'beshort)
                                 ((l) 'lelong) ((L) 'belong)
                                 ((m) 'melong)
                                 (else #f)))
                         (before (safe-seek base))
                         (bval (and before (extract-integer (or type 'long) #f)))
                         (opts ((if type cddr cdr) ofs)))
                    (and bval
                         (if (null? opts)
                             bval
                             (let* ((op (case (car opts)
                                          ((+) +)
                                          ((-) -)
                                          ((*) *)
                                          ((/) /)
                                          ((%) modulo)
                                          ((&) logand)
                                          ((|) logior)
                                          ((^) logxor)
                                          ;; TODO: move check earlier
                                          (else (error "bad opts:" opts))))
                                    (adj (cadr opts)))
                               (if (integer? adj)
                                   (op bval adj)
                                   (and (safe-seek (+ before (car adj)))
                                        (and=> (extract-integer 'long #f)
                                               (lambda (adj2)
                                                 (op bval adj2))))))))))))))

      (and (zero? lev) (seek stranger 0 SEEK_SET))
      (let ((okv (and (and=> (resolve ofs) safe-seek)
                      (cmp typ val))))

        (define (printable-string s)

          (define (good-snippet n)
            (cond ((= 92 n)
                   "\\\\")
                  ((<= 32 n 126)
                   (make-string 1 (integer->char n)))
                  (else
                   (let* ((d (number->string n 8))
                          (pad (case (string-length d)
                                 ((1) "00")
                                 ((2) "0")
                                 (else ""))))
                     (fs "\\~A~A" pad d)))))

          (apply string-append
                 (map good-snippet
                      (map char->integer
                           (string->list s)))))

        (define (good-news!)
          (cond ((string-index str #\%)
                 => (lambda (pct)
                      (editing-buffer str
                        (goto-char (1+ pct))
                        ;; TODO: %.8s, %02x, %ld, %hd, etc.
                        (looking-at "%[0-9.-]*[hl]*[cdiuxXs]")
                        (case (char-before (match-end 0))
                          ((#\c)
                           (set! okv (integer->char okv)))
                          ((#\x #\X)
                           (set! okv (number->string okv 16)))
                          ((#\s)
                           (set! okv (printable-string okv))))
                        (replace-match "~A")
                        (fs (buffer-string) okv))))
                (else
                 str)))

        (define (spew! s)
          (and DEBUG? (fso "~A[~A]: ~A~%" (port-filename stranger) lev s))
          (let ((sep (and (not (string-null? outs))
                          (zero? lev)
                          (string-append "\n" oprefix)))
                (bs (and (not (string-null? s))
                         (char=? #\bs (string-ref s 0)))))
            (set! outs (string-append
                        outs
                        (or sep "")
                        (if bs              "" " ")
                        (if bs (substring s 1) s)))))

        (and okv str (not (string-null? str)) (spew! (good-news!)))
        (->bool okv)))

    (rc #:top!)
    (let loop ((pre 0) (hit? #f))
      (and=> (rc #:next)
             (lambda (ent)
               (let ((cur (car ent)))
                 (and (< cur pre)
                      (set! pre cur))
                 (cond ((and (< pre cur) (not hit?))
                        (and DEBUG? (pk 'skip pre cur ent))
                        (loop pre #f))
                       (else
                        (and DEBUG? (pk 'trying ent))
                        (loop cur (apply try-rule ent))))))))
    (fso "~A~A~%" oprefix
         (if (string-null? outs)
             " data"
             outs))))

(define (magic/qop qop)
  (let ((magic-files (or (qop 'magic-file)
                         (list (in-vicinity (passwd:dir (getpwuid (getuid)))
                                            ".magic")))))
    (if (qop 'compile)
        (FE magic-files
            (lambda (filename)
              (with-output-to-file (compiled-name filename)
                (lambda () ((rule-control filename #f) #:dump)))))
        (FE (qop '())
            (lambda (given)
              (let ((p (open-input-file given)))
                (or-map (lambda (magic)
                          (scan magic p))
                        magic-files)))))))

(define (main args)
  (check-hv args '((package . "ttn-do")
                   (version . "1.1")
                   (help . commentary)))
  (magic/qop
   (qop<-args
    args '((compile (single-char #\C))
           (magic-file (single-char #\m) (value #t)
                       (merge-multiple? #t))))))

;;; magic ends here