#!/bin/sh
exec ${GUILE-guile} -e '(ttn-do valid-ucs-p)' -s $0 "$@" # -*-scheme-*-
!#
;;; valid-ucs-p
;; Copyright (C) 2009, 2010, 2011 Thien-Thi Nguyen
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version. There is NO WARRANTY. See file COPYING for details.
;;; Commentary:
;; Usage: valid-ucs-p [--utf8] FILENAME...
;;
;; Check that each file in FILENAME... contains valid UCS data.
;; Optional arg ‘--utf8’ means check only that the file contains
;; valid UTF8 data (some of which may not be valid UCS).
;;
;; If there are no problems, exit successfully (and silently).
;; Otherwise, display a message to stderr, one of:
;; FILENAME:LNO:COL: KEY (HEX-BYTE-VALUE...)
;; where HEX-BYTE-VALUE is a hexadecimal integer and KEY is
;; one of: ‘invalid-utf8’, ‘invalid-ucs’; and exit failurefully.
;;; Code:
(define-module (ttn-do valid-ucs-p)
#:export (scan!-proc
check-ports
main)
#:use-module ((ttn-do zzz banalities) #:select (check-hv
qop<-args))
#:use-module ((srfi srfi-1) #:select (car+cdr))
#:use-module ((srfi srfi-11) #:select (let-values))
#:use-module ((ttn-do zzz personally) #:select (FE condition-case fs fse))
#:use-module ((ttn-do zzz emacsdream) #:select (utf8-reader valid-ucs?)))
;; Return a procedure @code{p} capable of reading from a port
;; and checking for valid UCS data. If @var{skip-ucs-check?}
;; is non-@code{#f}, check for valid UTF8 data only.
;;
;; Procedure @code{p} may throw @code{invalid-utf8} or
;; @code{invalid-ucs}, with a single list argument:
;; @code{(@var{bytes} @var{port} @var{lno} @var{col})},
;; where @var{bytes} is the list of offending bytes read from
;; @var{port} at line number @var{lno}, column @var{col}.
;;
(define (scan!-proc skip-ucs-check?)
(lambda (port)
(let* ((lno 1)
(col 0)
(r (utf8-reader port))
(b (r #:rvbox)))
(define (another)
(set! col (1+ col))
(r))
(define (badness key bytes)
(throw key (list bytes port lno col)))
(let loop ()
(and (catch 'invalid-utf8 another badness)
(let ((c (cdr b)))
(or skip-ucs-check?
(valid-ucs? c)
(badness 'invalid-ucs c))
(cond ((= 10 c)
(set! lno (1+ lno))
(set! col 0)))
#t)
(loop))))))
;; Check each port in @var{ports} using the procedure @var{p}
;; returned calling @code{scan!-proc} with arg @var{skip-ucs-check?}.
;; @var{ports} may also be a single input-port.
;;
;; If scanning results in @code{invalid-utf8} or @code{invalid-ucs}
;; errors, display the location of the error in the form:
;;
;; @example
;; FILENAME:LNO:COL: KEY (HEX-BYTE-VALUE...)
;; @end example
;;
;; where @var{key} is either @code{invalid-utf8} or @code{invalid-ucs},
;; and @code{hex-byte-value} is a hexadecimal integer in the range [0,255];
;; then do @code{(exit #f)}.
;;
(define (check-ports skip-ucs-check? ports)
(condition-case badness
(FE (if (pair? ports)
ports
(list ports))
(scan!-proc skip-ucs-check?))
((invalid-utf8 invalid-ucs)
(let-values (((bytes where) (car+cdr (cadr badness))))
(fse "~A:~A: ~A ~A~%"
(port-filename (car where))
(apply fs "~A:~A" (cdr where))
(car badness)
(map (lambda (n)
(number->string n 16))
(if (pair? bytes)
bytes
(list bytes)))))
(exit #f))))
(define (valid-ucs-p/qop qop)
(check-ports (qop 'utf8)
(map (lambda (filename)
(if (file-is-directory? filename)
(error (fs "Invalid filename: ~S (is a directory)"
filename))
(open-input-file filename)))
(qop '()))))
(define (main args)
(check-hv args '((package . "ttn-do")
(version . "1.0")
(help . commentary)))
(valid-ucs-p/qop
(qop<-args
args '((utf8)))))
;;; valid-ucs-p ends here