#!/bin/sh
exec ${GUILE-guile} -e '(ttn-do xml2sexp)' -s $0 "$@" # -*-scheme-*-
!#
;;; xml2sexp
;; Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 Thien-Thi Nguyen
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version. There is NO WARRANTY. See file COPYING for details.
;;; Commentary:
;; Usage: xml2sexp [options] XMLFILE...
;;
;; Pretty-print contents of XMLFILE... to stdout.
;; If XMLFILE is "-", read from standard input.
;; If XMLFILE ends with ".gz", filter it through "gzip -dc" first.
;; The contents are read as xml and written as a sexp.
;;
;; Options:
;; -f, --name-format [FMT] -- do (simple-format cep FMT filename) and
;; (newline cep), where cep is the current
;; error port, prior to the normal output for
;; each XMLFILE; disabled if XMLFILE is "-"
;;
;; -w, --keep-whitespace -- do NOT ignore ‘character-data’ nodes where
;; all characters are ‘char-whitespace?’
;;
;; -e, --empty STRING -- suppress "XML_ERROR_NO_ELEMENTS" error
;; on empty files; display STRING followed
;; by newline, instead
;;
;; -x, --omit NODETYPE -- ignore NODETYPE, one of:
;; character-data
;; comment
;; this option may be specified multiply
;;
;; -s, --sxml N -- output SXML N normal form, where N
;; is one of 0, 1, 2, 3 (EXPERIMENTAL)
;;; Code:
(define-module (ttn-do xml2sexp)
#:export (main xml2sexp)
#:use-module ((ttn-do zzz banalities) #:select (check-hv
qop<-args))
#:use-module ((srfi srfi-13) #:select (string-suffix?))
#:use-module ((ice-9 rdelim) #:select (write-line))
#:use-module ((ice-9 popen) #:select (open-input-pipe))
#:use-module ((ttn-do pp) #:select (pp))
#:use-module ((ttn-do zzz personally) #:select (accumulator
FE fs fse))
#:use-module ((mixp utils) #:select (xml->tree)))
(define (crunch qop form)
(let* ((omit (or (qop 'omit (lambda (v) (map string->symbol v)))
'()))
(sxml (qop 'sxml string->number))
(attribute-association (if sxml list cons))
(attribute-collect (case sxml
((#f) (lambda (acc attr)
(acc attr)))
((0 1) (lambda (acc attr)
(or (null? attr)
(acc (cons '@ attr)))))
((2 3) (lambda (acc attr)
(acc (cons '@ attr))))
(else (error "bad sxml normal form:" sxml))))
(no-comment (or (memq 'comment omit)
(and sxml (<= 2 sxml))))
;; TODO: SXML 2NF also specifies ‘no-entity’.
(no-character-data (memq 'character-data omit))
(no-s (if (qop 'keep-whitespace)
(lambda (s)
#f)
(lambda (s)
(and-map char-whitespace? (string->list s))))))
(define (crunch-1 form)
(let ((acc (accumulator)))
(let loop ((ls form))
(if (null? ls)
(acc)
(let ((head (car ls))
(tail (cdr ls)))
(or (pair? head)
(error "(not (pair? head)) !!!" head))
(and (pair? (car head))
(error "(pair? (car head)) !!!" head))
(case (car head)
((element)
(let* ((x (cadr head))
(name (string->symbol (car x)))
(attrs (map (lambda (pair)
(attribute-association
(string->symbol (car pair))
(cdr pair)))
(cadr x))))
(acc name)
(attribute-collect acc attrs)
(FE (map crunch-1 (map list (cddr head)))
(lambda (res)
(cond ((null? res))
((and-map string? res)
(FE res acc))
(else
(acc res)))))))
((character-data)
(or no-character-data
(let ((s (cadr head)))
(or (no-s s)
;; TODO: SXML 3NF specifies all text
;; strings must be joined.
(acc s)))))
((comment)
(or no-comment
(acc head)))
(else
(acc head)))
(loop tail))))))
(crunch-1 form)))
;; Read XML from @var{port} and return a sexp.
;;
;; @var{options} is an alist with symbolic keys @code{keep-whitespace},
;; @code{empty}, @code{omit}; these control behavior identically to the
;; command-line options of the same name described above. The value for
;; @code{keep-whitespace} is a boolean, whereas that for @code{omit} is a
;; list of symbols.
;;
(define (xml2sexp options port)
;; FIXME: ‘crunch’ takes ‘qop’, which expects strings as the alist
;; values, requiring (suboptimal) normalization of ‘options’.
(define (norm x)
(case (car x)
((sxml) (cons (car x) (number->string (cdr x))))
((omit) (cons (car x) (map symbol->string (cdr x))))
(else x)))
(let ((normalized (map norm options)))
(define (fake-qop k . proc)
(cond ((assq-ref normalized k)
=> (lambda (v)
(if (not (null? proc))
((car proc) v)
v)))
(else #f)))
(crunch fake-qop (xml->tree port))))
(define (main/qop qop)
(let ((pre (or (qop 'name-format
(lambda (fmt)
(set! fmt (string-append fmt "~%"))
(lambda (filename)
(fse fmt filename))))
identity))
(all (qop '())))
(define (one filename)
(let* ((stdin? (string=? "-" filename))
(port (cond (stdin?
(current-input-port))
((string-suffix? ".gz" filename)
(open-input-pipe (fs "gzip -dc ~S" filename)))
(else
(open-input-file filename)))))
(or stdin? (pre filename))
(pp (crunch qop (xml->tree port)))
(force-output)
(or stdin? (close-port port))))
(or (qop 'empty
(lambda (empty-string)
(FE all (lambda (filename)
(catch 'XML_ERROR_NO_ELEMENTS
(lambda () (one filename))
(lambda ignored
(write-line empty-string)
(force-output)))))))
(FE all one))))
(define (main args)
(check-hv args '((package . "ttn-do")
(version . "1.4")
;; 1.5 -- add EXPERIMENTAL ‘--sxml N’
;; 1.4 -- bugfix: maintain elem attr order
;; 1.3 -- export ‘xml2sexp’
;; 1.2 -- handle stdin, gzipped input
;; 1.1 -- add more callbacks; add ‘--omit X’
;; 1.0 -- initial revision
(help . commentary)))
(main/qop
(qop<-args
args '((name-format (single-char #\f) (value #t))
(keep-whitespace (single-char #\w))
(omit (single-char #\x) (value #t) (merge-multiple? #t))
(empty (single-char #\e) (value #t))
(sxml (single-char #\s) (value #t))))))
;;; xml2sexp ends here