#!/bin/sh
exec ${GUILE-guile} -e '(ttn-do xml2sexp)' -s $0 "$@" # -*-scheme-*-
!#
;;; xml2sexp
;; Copyright (C) 2007, 2008, 2009, 2010 Thien-Thi Nguyen
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version. There is NO WARRANTY. See file COPYING for details.
;;; Commentary:
;; Usage: xml2sexp [options] XMLFILE...
;;
;; Pretty-print contents of XMLFILE... to stdout.
;; If XMLFILE is "-", read from standard input.
;; If XMLFILE ends with ".gz", filter it through "gzip -dc" first.
;; The contents are read as xml and written as a sexp.
;;
;; Options:
;; -f, --name-format [FMT] -- do (simple-format cep FMT filename) and
;; (newline cep), where cep is the current
;; error port, prior to the normal output for
;; each XMLFILE; disabled if XMLFILE is "-"
;;
;; -w, --keep-whitespace -- do NOT ignore `character-data' nodes where
;; all characters are `char-whitespace?'
;;
;; -e, --empty STRING -- suppress "XML_ERROR_NO_ELEMENTS" error
;; on empty files; display STRING followed
;; by newline, instead
;;
;; -x, --omit NODETYPE -- ignore NODETYPE, one of:
;; character-data
;; comment
;; this option may be specified multiply
;;; Code:
(define-module (ttn-do xml2sexp)
#:export (main xml2sexp)
#:use-module ((ttn-do zzz banalities) #:select (check-hv
qop<-args))
#:use-module ((ice-9 regex) #:select (string-match))
#:use-module ((ice-9 popen) #:select (open-input-pipe))
#:use-module ((ice-9 pretty-print) #:select (pretty-print))
#:use-module ((ttn-do zzz personally) #:select (accumulator
FE fs fse))
#:use-module ((ttn-do zz xml mixp) #:select (xml->tree)))
(define (pp x)
(pretty-print x #:escape-strings? #t)
(force-output))
(define (crunch qop form)
(let* ((omit (or (qop 'omit (lambda (v) (map string->symbol v)))
'()))
(no-comment (memq 'comment omit))
(no-character-data (memq 'character-data omit))
(no-s (if (qop 'keep-whitespace)
(lambda (s)
#f)
(lambda (s)
(and-map char-whitespace? (string->list s))))))
(define (crunch-1 form)
(let ((acc (accumulator)))
(let loop ((ls form))
(if (null? ls)
(acc)
(let ((head (car ls))
(tail (cdr ls)))
(or (pair? head)
(error "(not (pair? head)) !!!" head))
(and (pair? (car head))
(error "(pair? (car head)) !!!" head))
(case (car head)
((element)
(let* ((x (cadr head))
(name (string->symbol (car x)))
(attrs (map (lambda (pair)
(cons (string->symbol (car pair))
(cdr pair)))
(reverse! (cadr x)))))
(acc name)
(acc attrs)
(FE (map crunch-1 (map list (cddr head)))
(lambda (res)
(cond ((null? res))
((and-map string? res)
(FE res acc))
(else
(acc res)))))))
((character-data)
(or no-character-data
(let ((s (cadr head)))
(or (no-s s)
(acc s)))))
((comment)
(or no-comment
(acc head)))
(else
(acc head)))
(loop tail))))))
(crunch-1 form)))
;; Read XML from @var{port} and return a sexp.
;;
;; @var{options} is an alist with symbolic keys @code{keep-whitespace},
;; @code{empty}, @code{omit}; these control behavior identically to the
;; command-line options of the same name described above. The value for
;; @code{keep-whitespace} is a boolean, whereas that for @code{omit} is a
;; list of symbols.
;;
(define (xml2sexp options port)
(crunch (lambda (k . proc)
(cond ((assq-ref options k)
=> (lambda (v)
(if (not (null? proc))
((car proc) v)
v)))
(else #f)))
(xml->tree port)))
(define (main/qop qop)
(let ((pre (or (qop 'name-format
(lambda (fmt)
(set! fmt (string-append fmt "~%"))
(lambda (filename)
(fse fmt filename))))
identity))
(all (qop '())))
(define (one filename)
(let* ((stdin? (string=? "-" filename))
(port (cond (stdin?
(current-input-port))
((string-match "\\.gz$" filename)
(open-input-pipe (fs "gzip -dc ~S" filename)))
(else
(open-input-file filename)))))
(or stdin? (pre filename))
(pp (crunch qop (xml->tree port)))
(or stdin? (close-port port))))
(or (qop 'empty
(lambda (empty-string)
(FE all (lambda (filename)
(catch 'XML_ERROR_NO_ELEMENTS
(lambda () (one filename))
(lambda ignored
(write-line empty-string)
(force-output)))))))
(FE all one))))
(define (main args)
(check-hv args '((package . "ttn-do")
(version . "1.3")
(help . commentary)))
(main/qop
(qop<-args
args '((name-format (single-char #\f) (value #t))
(keep-whitespace (single-char #\w))
(omit (single-char #\x) (value #t) (merge-multiple? #t))
(empty (single-char #\e) (value #t))))))
;;; xml2sexp ends here