#!/bin/sh
exec ${GUILE-guile} -e "(ttn-do magic)" -s $0 "$@" # -*- scheme -*-
!#
;;; magic
;; Copyright (C) 2008, 2009, 2010 Thien-Thi Nguyen
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version. There is NO WARRANTY. See file COPYING for details.
;;; Commentary:
;; Usage: magic [options] [FILE...]
;;
;; Display results of scanning FILE, or "data" if type and other
;; attributes cannot be determined. By default definitions are
;; read from ~/.magic (see magic(5) for information on its format),
;; or, preferentially, from ~/.magic.ttn-do-magic, if it exists.
;;
;; Options are:
;;
;; -m, --magic-file MAGIC -- read definitions from MAGIC instead
;; of default ~/.magic
;; -C, --compile -- used in conjunction with -m MAGIC
;; to write out MAGIC.ttn-do-magic
;;
;; The output is similar, but not identical, to that of file(1).
;;
;; Presently, this program understands ~/.magic.ttn-do-magic in
;; either .ttn-do-magic-1 or .ttn-do-magic-2 format, and writes
;; in .ttn-do-magic-2 format only.
;;; Code:
(define-module (ttn-do magic)
#:export (main)
#:use-module ((ttn-do zzz banalities) #:select (check-hv
qop<-args))
#:use-module ((ice-9 mapping) #:select (hash-table-mapping
mapping-ref
mapping-set!))
#:use-module ((ice-9 editing-buffer) #:select (find-file-read-only
editing-buffer))
#:use-module ((ttn-do scm2bin) #:select ((write-punily . write-string)))
#:use-module ((ttn-do zzz personally) #:select (accumulator
forms<-port
FE fs fso)))
(define DEBUG? (getenv "DEBUG"))
(define (NYI tag . args) ; "Not Yet Implemented"
(define (spew)
(fso "NYI: ~A" tag)
(FE args (lambda (x)
(display #\space)
((if (string? x) write-string write) x)))
(newline))
(with-output-to-port (current-error-port) spew))
(define MAGIC (hash-table-mapping #:test string=?))
(define (compiled-name filename)
(string-append filename ".ttn-do-magic"))
(define (rule-control magic-file compiled-ok)
(define (make-kpair)
(let* ((compiled (compiled-name magic-file))
(acc (accumulator))
(compiled? (and compiled-ok (file-exists? compiled)))
(init (if compiled? "" (find-file-read-only magic-file))))
(and compiled? (let ((p (open-input-file compiled)))
(or (memq (read p) '(.ttn-do-magic-1
.ttn-do-magic-2))
(error "bad compiled magic file:" compiled))
(FE (forms<-port p) acc)
(close-port p)))
(cons acc (editing-buffer init))))
(let ((kpair (or (mapping-ref MAGIC magic-file #f)
(mapping-set! MAGIC magic-file (make-kpair)))))
(editing-buffer (cdr kpair)
(define (l-a/move rx)
(and (looking-at rx)
(goto-char (match-end 0))))
(define (skip-ws)
(l-a/move "[ \t]+"))
(define (read-offset)
(define (n<- radix b e)
(string->number (buffer-substring b e) radix))
(let ((p (point)))
(define (n<-hex)
(and (l-a/move "0x[0-9a-fA-F]+")
(n<- 16 (+ 2 p) (point))))
(define (n<-oct)
(and (l-a/move "0[0-7]+")
(n<- 8 (1+ p) (point))))
(define (n<-dec)
(and (l-a/move "-*[0-9]+")
(n<- 10 p (point))))
(define (n<-k)
(or (n<-hex) (n<-oct) (n<-dec)))
(cond ((n<-hex))
((n<-dec))
((char=? #\& (char-after))
(forward-char 1)
(list '& (read-offset)))
((char=? #\( (char-after))
(forward-char 1)
(let ((more-indirection? (char=? #\& (char-after)))
(x #f) (y #f))
(and more-indirection? (forward-char 1))
(set! p (point))
(set! x (n<-k))
(cond ((l-a/move "[.]*([bslBSL]*)([-+*/%&|^]*)")
(set! p (point))
(let ((sz (match-string 1))
(op (match-string 2)))
(set! y (if (string-null? op)
'()
(list (string->symbol op)
(if (char=? #\( (char-after))
(begin
(forward-char 1)
(set! p (point))
(let ((rv (list (n<-k))))
(forward-char 1)
rv))
(n<-k)))))
(or (string-null? sz)
(set! y (cons (string->symbol sz) y)))
(forward-char 1)
(cons (if more-indirection?
(list '& x)
x)
y)))
(else
(if more-indirection?
(list '& x)
x)))))
(else
(l-a/move "[^ \t]+")
(match-string 0)))))
(define (read-c-integer)
(cond ((l-a/move "0x([0-9a-fA-F]+)")
(string->number (match-string 1) 16))
((l-a/move "0[0-7]+")
(string->number (match-string 0) 8))
(else
(let ((rv (read (buffer-port))))
;; Hmm, seems `read' eats one char after token.
(forward-char -1)
rv))))
(define (read-type)
(re-search-forward "[ \t]+([a-z16]+)[&/ \t]")
(let ((one (string->symbol (match-string 1))))
(goto-char (match-end 1))
(if (looking-at "[ \t]")
one
(cons one (begin
(forward-char 1)
(if (eq? 'string one)
(map (lambda (c)
(string->symbol (make-string 1 c)))
(string->list
(symbol->string
(read (buffer-port)))))
(read-c-integer)))))))
(define (read-string-until delims . fc?)
(set! fc? (and (not (null? fc?)) (car fc?)))
(let* ((s (accumulator))
(weird #f)
(fc (and fc? (case (char-after)
((#\< #\= #\> #\!)
(forward-char 1)
(string->symbol (make-string 1 (char-before))))
((#\x)
(set! weird #t) ; sigh
(forward-char 1)
'>)
(else '=)))))
(define (char<-nstr radix str)
(integer->char (string->number str radix)))
(let loop ((c (char-after)))
(cond ((memq c delims))
((char=? #\\ c)
(forward-char 1)
(s (cond ((looking-at "[0-3][0-7][0-7]")
(forward-char 3)
(char<-nstr 8 (match-string 0)))
((looking-at "[0-7]")
(forward-char 1)
(char<-nstr 8 (match-string 0)))
((looking-at "x[0-9a-fA-F][0-9a-fA-F]")
(forward-char 3)
(char<-nstr 16 (buffer-substring
(- (point) 2) (point))))
((looking-at "x[0-9a-fA-F][^0-9a-fA-F]")
(forward-char 2)
(char<-nstr 16 (buffer-substring
(1- (point)) (point))))
(else
(forward-char 1)
(case (char-before)
((#\t) #\ht)
((#\n) #\newline)
((#\f) #\np)
((#\a) #\bel)
((#\b) #\bs)
((#\r) #\cr)
((#\v) #\vt)
((#\space) #\space)
(else (char-before))))))
(loop (char-after)))
(else
(s c)
(forward-char 1)
(loop (char-after)))))
(set! s (apply string (s)))
(and weird (not (string-null? s))
(begin (set! fc '=)
(set! s (string-append "x" s))))
(if fc? (cons s fc) s)))
(define (read-value typ)
(define (s/fc)
(read-string-until '(#\space #\ht #\newline) #t))
(skip-ws)
(case (if (pair? typ)
(car typ)
typ)
((string)
(let* ((pair (s/fc))
(s (car pair))
(fc (cdr pair))
(flags (if (pair? typ) (cdr typ) '()))
(elab (cond ((memq 'b flags)
(editing-buffer s
(goto-char (point-min))
(while (re-search-forward " " #f #t)
(insert "*"))
(buffer-string)))
((memq 'B flags)
(editing-buffer s
(goto-char (point-min))
(while (search-forward " " #f #t)
(insert "+"))
(buffer-string)))
(else
s))))
(cons fc elab)))
((pstring regex search)
(let* ((pair (read-string-until '(#\space #\ht #\newline) #t))
(s (car pair))
(fc (cdr pair)))
(cons fc s)))
(else
(let* ((mod (if (looking-at "[=<>&^~!]")
(let ((rv (string->symbol
(make-string 1 (char-after)))))
(forward-char 1)
rv)
'=))
(n (begin
;; sigh
(skip-ws)
;; sigh
(and (char=? #\= (char-after))
(forward-char 1))
(read-c-integer))))
(if (memq n '(x X))
'x
(list mod n))))))
(define (read-entry)
(while (memq (char-after) '(#\newline #\#))
(forward-line 1))
(and (char-after)
(let* ((cur (if (looking-at ">+")
(- (match-end 0) (match-beginning 0))
0))
(ofs (begin
(forward-char cur)
(read-offset)))
(typ (read-type))
(val (read-value typ))
(str (begin
(skip-ws)
(let ((rv (read-string-until '(#\newline))))
(forward-char 1)
rv))))
(list cur ofs typ val str))))
(define (dump)
(FE (list ".ttn-do-magic-2"
" ;; dump (-*- mode: text; coding: raw-text; -*-)\n"
";;; from " magic-file "\n\n")
display)
(let loop ((ent (read-entry)))
(cond (ent (display "(")
(write (car ent))
(FE (cdr ent)
(lambda (x)
(display " ")
(cond ((string? x)
(write-string x))
((and (pair? x)
(string? (cdr x)))
(display "(")
(write (car x))
(display " . ")
(write-string (cdr x))
(display ")"))
(else
(write x)))))
(display ")")
(newline)
(loop (read-entry))))))
(let ((rules (car kpair))
(kls (unspecified)))
;; rv
(lambda (command)
(case command
((#:top!) (set! kls (rules)))
((#:next) (or (and (not (null? kls))
(let ((rv (car kls)))
(set! kls (cdr kls))
rv))
(let ((ent (read-entry)))
(and ent (rules ent))
ent)))
((#:dump) (dump))))))))
(define NATIVE-ENESS (if (= 1 (ntohs 1)) 'big 'little))
(define (scan magic-file stranger)
(let ((limit (stat:size (stat stranger)))
(rc (rule-control magic-file #t))
(oprefix (string-append (port-filename stranger) ":" ))
(outs ""))
(define (uve-read!/ref uve)
(uniform-vector-read! uve stranger)
(uniform-vector-ref uve 0))
(define x-u8
(let ((uve (make-uniform-vector 1 #\nul 0)))
(lambda ()
(let ((rv (uve-read!/ref uve)))
(if (negative? rv)
(+ 256 rv)
rv)))))
(define x-u16
(let ((uve (make-uniform-vector 1 's 0))
(tem #f))
(lambda (eness)
(let ((rv (if (eq? NATIVE-ENESS eness)
(uve-read!/ref uve)
(begin
(set! tem (x-u8))
(if (eq? 'big eness)
(logior (ash tem 8) (x-u8))
(logior (ash (x-u8) 8) tem))))))
(if (negative? rv)
(+ 65536 rv)
rv)))))
(define x-u32
(let ((uve (make-uniform-vector 1 1 0))
(tem #f) (por #f) (ary #f))
(lambda (eness)
(let ((rv (if (eq? NATIVE-ENESS eness)
(uve-read!/ref uve)
(begin
(set! tem (x-u8))
(set! por (x-u8))
(set! ary (x-u8))
(if (eq? 'big eness)
(logior (ash tem 24)
(ash por 16)
(ash ary 8)
(x-u8))
(logior (ash tem 0)
(ash por 8)
(ash ary 16)
(ash (x-u8) 24)))))))
(if (negative? rv)
(+ 4294967296 rv)
rv)))))
(define (extract-integer type mask)
(and=> (case type
((byte ubyte) (x-u8))
((short ushort) (x-u16 NATIVE-ENESS))
((long ulong date ldate) (x-u32 NATIVE-ENESS))
((beshort ubeshort) (x-u16 'big))
((belong ubelong bedate beldate) (x-u32 'big))
((leshort uleshort) (x-u16 'little))
((lelong ulelong ledate leldate) (x-u32 'little))
((melong medate meldate)
(let ((hi (x-u16 'little))
(lo (x-u16 'little)))
(+ (ash hi 16) lo)))
(else
(error "bad type:" type)))
(lambda (bits)
(if mask
(logand bits mask)
bits))))
(define (cmp typ val)
(define (snarf-string len)
(let ((s (make-string len)))
(do ((i 0 (1+ i)))
((= len i))
(let ((c (read-char stranger)))
(or (eof-object? c)
(string-set! s i c))))
s))
(define (read-line/zt-string)
(let ((s (accumulator)))
(let loop ((c (read-char stranger)))
(cond ((or (eof-object? c) (memq c '(#\nul #\newline #\cr))))
(else (s c) (loop (read-char stranger)))))
(apply string (s))))
(and (eq? 'x val) (set! val '(#t #t)))
(let* ((more (and (pair? typ) (cdr typ)))
(type (if more (car typ) typ)))
(define (numeric-match)
(and=> (extract-integer type more)
(lambda (act)
(define (make-signed-maybe!)
(case type
((byte)
(or (< act 128)
(set! act (- act 256))))
((short beshort leshort)
(or (< act 32768)
(set! act (- act 65536))))
((long belong lelong melong)
(or (< act 2147483648)
(set! act (- act 4294967296))))))
(and ((case (car val)
((=) =)
((<) (make-signed-maybe!) <)
((>) (make-signed-maybe!) >)
((&) (lambda (a b)
(= (logand a b) b)))
((^) (lambda (a b)
(zero? (logand a b))))
((~) (lambda (a b)
(= a (lognot b))))
((!) (lambda (a b)
(not (= a b))))
((#t) (lambda (a b)
#t))
(else
(error "bad op:" (car val))))
act (cadr val))
act))))
(define (time-match convert)
(and=> (numeric-match)
(lambda (n)
(strftime "%c" (convert n)))))
(case type
((string)
;; TODO: move elaboration to read phase
(let* ((ci? (and more (memq 'c more)))
(fc (car val))
(s (cdr val))
(scmp (cond ((not more)
(case fc
((<) string>?)
((=) string=?)
((>) string<?)
(else (lambda (a b)
(not (string=? a b))))))
((or (memq 'b more) (memq 'B more))
(let ((rx (apply make-regexp s
(if ci?
(list regexp/icase)
'()))))
(lambda (a b)
(regexp-exec rx b))))
(ci?
(lambda (a b)
;; TODO: and-map char-ci=?
(string-ci=? a b)))
(else
(error "bad typ:" typ))))
(snarfed (if (eq? '= fc)
(snarf-string (string-length s))
(read-line/zt-string))))
(and (scmp s snarfed)
snarfed)))
((byte ubyte
short ushort long ulong
beshort ubeshort belong ubelong
leshort uleshort lelong ulelong
melong umelong)
(numeric-match))
((date bedate ledate medate)
(time-match localtime))
((ldate beldate leledate meldate)
(time-match gmtime))
((search)
(let ((chunk (editing-buffer (snarf-string more)))
(fc (car val))
(s (cdr val)))
(editing-buffer chunk
(toggle-read-only)
(goto-char (point-min))
(if (eq? '! fc)
(not (search-forward s #f #t))
(search-forward s #f #t)))))
((regex)
(let* ((chunk (editing-buffer stranger (buffer-string))) ; sigh
(fc (car val))
(s (cdr val))
(rx (apply make-regexp s (if (eq? 'c more)
(list regexp/icase)
'()))))
(regexp-exec rx chunk)))
(else
(NYI 'cmp typ val)
#f))))
(define (try-rule lev ofs typ val str)
(define (safe-seek n)
(and (< -1 n limit)
(seek stranger n SEEK_SET)))
(define (resolve ofs)
(cond ((integer? ofs)
ofs)
((eq? '& (car ofs))
(let ((now (seek stranger 0 SEEK_CUR)))
(and=> (resolve (cadr ofs))
(lambda (n)
(+ now n)))))
(else
(and=>
(resolve (car ofs))
(lambda (base)
(let* ((type (case (cadr ofs)
((b B) 'byte)
((s) 'leshort) ((S) 'beshort)
((l) 'lelong) ((L) 'belong)
((m) 'melong)
(else #f)))
(before (safe-seek base))
(bval (and before (extract-integer (or type 'long) #f)))
(opts ((if type cddr cdr) ofs)))
(and bval
(if (null? opts)
bval
(let* ((op (case (car opts)
((+) +)
((-) -)
((*) *)
((/) /)
((%) modulo)
((&) logand)
((|) logior)
((^) logxor)
;; TODO: move check earlier
(else (error "bad opts:" opts))))
(adj (cadr opts)))
(if (integer? adj)
(op bval adj)
(and (safe-seek (+ before (car adj)))
(and=> (extract-integer 'long #f)
(lambda (adj2)
(op bval adj2))))))))))))))
(and (zero? lev) (seek stranger 0 SEEK_SET))
(let ((okv (and (and=> (resolve ofs) safe-seek)
(cmp typ val))))
(define (printable-string s)
(define (good-snippet n)
(cond ((= 92 n)
"\\\\")
((<= 32 n 126)
(make-string 1 (integer->char n)))
(else
(let* ((d (number->string n 8))
(pad (case (string-length d)
((1) "00")
((2) "0")
(else ""))))
(fs "\\~A~A" pad d)))))
(apply string-append
(map good-snippet
(map char->integer
(string->list s)))))
(define (good-news!)
(cond ((string-index str #\%)
=> (lambda (pct)
(editing-buffer str
(goto-char (1+ pct))
;; TODO: %.8s, %02x, %ld, %hd, etc.
(looking-at "%[0-9.-]*[hl]*[cdiuxXs]")
(case (char-before (match-end 0))
((#\c)
(set! okv (integer->char okv)))
((#\x #\X)
(set! okv (number->string okv 16)))
((#\s)
(set! okv (printable-string okv))))
(replace-match "~A")
(fs (buffer-string) okv))))
(else
str)))
(define (spew! s)
(and DEBUG? (fso "~A[~A]: ~A~%" (port-filename stranger) lev s))
(let ((sep (and (not (string-null? outs))
(zero? lev)
(string-append "\n" oprefix)))
(bs (and (not (string-null? s))
(char=? #\bs (string-ref s 0)))))
(set! outs (string-append
outs
(or sep "")
(if bs "" " ")
(if bs (substring s 1) s)))))
(and okv str (not (string-null? str)) (spew! (good-news!)))
(->bool okv)))
(rc #:top!)
(let loop ((pre 0) (hit? #f))
(and=> (rc #:next)
(lambda (ent)
(let ((cur (car ent)))
(and (< cur pre)
(set! pre cur))
(cond ((and (< pre cur) (not hit?))
(and DEBUG? (pk 'skip pre cur ent))
(loop pre #f))
(else
(and DEBUG? (pk 'trying ent))
(loop cur (apply try-rule ent))))))))
(fso "~A~A~%" oprefix
(if (string-null? outs)
" data"
outs))))
(define (magic/qop qop)
(let ((magic-files (or (qop 'magic-file)
(list (in-vicinity (passwd:dir (getpwuid (getuid)))
".magic")))))
(if (qop 'compile)
(FE magic-files
(lambda (filename)
(with-output-to-file (compiled-name filename)
(lambda () ((rule-control filename #f) #:dump)))))
(FE (qop '())
(lambda (given)
(let ((p (open-input-file given)))
(or-map (lambda (magic)
(scan magic p))
magic-files)))))))
(define (main args)
(check-hv args '((package . "ttn-do")
(version . "1.1")
(help . commentary)))
(magic/qop
(qop<-args
args '((compile (single-char #\C))
(magic-file (single-char #\m) (value #t)
(merge-multiple? #t))))))
;;; magic ends here