#!/bin/sh
exec ${GUILE-guile} -e '(ttn-do valid-ucs-p)' -s $0 "$@" # -*-scheme-*-
!#
;;; valid-ucs-p

;; Copyright (C) 2009, 2010, 2011 Thien-Thi Nguyen
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version.  There is NO WARRANTY.  See file COPYING for details.

;;; Commentary:

;; Usage: valid-ucs-p [--utf8] FILENAME...
;;
;; Check that each file in FILENAME... contains valid UCS data.
;; Optional arg ‘--utf8’ means check only that the file contains
;; valid UTF8 data (some of which may not be valid UCS).
;;
;; If there are no problems, exit successfully (and silently).
;; Otherwise, display a message to stderr, one of:
;;   FILENAME:LNO:COL: KEY (HEX-BYTE-VALUE...)
;; where HEX-BYTE-VALUE is a hexadecimal integer and KEY is
;; one of: ‘invalid-utf8’, ‘invalid-ucs’; and exit failurefully.

;;; Code:

(define-module (ttn-do valid-ucs-p)
  #:export (scan!-proc
            check-ports
            main)
  #:use-module ((ttn-do zzz banalities) #:select (check-hv
                                                  qop<-args))
  #:use-module ((srfi srfi-1) #:select (car+cdr))
  #:use-module ((srfi srfi-11) #:select (let-values))
  #:use-module ((ttn-do zzz personally) #:select (FE condition-case fs fse))
  #:use-module ((ttn-do zzz emacsdream) #:select (utf8-reader valid-ucs?)))

;; Return a procedure @code{p} capable of reading from a port
;; and checking for valid UCS data.  If @var{skip-ucs-check?}
;; is non-@code{#f}, check for valid UTF8 data only.
;;
;; Procedure @code{p} may throw @code{invalid-utf8} or
;; @code{invalid-ucs}, with a single list argument:
;; @code{(@var{bytes} @var{port} @var{lno} @var{col})},
;; where @var{bytes} is the list of offending bytes read from
;; @var{port} at line number @var{lno}, column @var{col}.
;;
(define (scan!-proc skip-ucs-check?)
  (lambda (port)
    (let* ((lno 1)
           (col 0)
           (r (utf8-reader port))
           (b (r #:rvbox)))
      (define (another)
        (set! col (1+ col))
        (r))
      (define (badness key bytes)
        (throw key (list bytes port lno col)))
      (let loop ()
        (and (catch 'invalid-utf8 another badness)
             (let ((c (cdr b)))
               (or skip-ucs-check?
                   (valid-ucs? c)
                   (badness 'invalid-ucs c))
               (cond ((= 10 c)
                      (set! lno (1+ lno))
                      (set! col 0)))
               #t)
             (loop))))))

;; Check each port in @var{ports} using the procedure @var{p}
;; returned calling @code{scan!-proc} with arg @var{skip-ucs-check?}.
;; @var{ports} may also be a single input-port.
;;
;; If scanning results in @code{invalid-utf8} or @code{invalid-ucs}
;; errors, display the location of the error in the form:
;;
;; @example
;; FILENAME:LNO:COL: KEY (HEX-BYTE-VALUE...)
;; @end example
;;
;; where @var{key} is either @code{invalid-utf8} or @code{invalid-ucs},
;; and @code{hex-byte-value} is a hexadecimal integer in the range [0,255];
;; then do @code{(exit #f)}.
;;
(define (check-ports skip-ucs-check? ports)
  (condition-case badness
      (FE (if (pair? ports)
              ports
              (list ports))
          (scan!-proc skip-ucs-check?))
    ((invalid-utf8 invalid-ucs)
     (let-values (((bytes where) (car+cdr (cadr badness))))
       (fse "~A:~A: ~A ~A~%"
            (port-filename (car where))
            (apply fs "~A:~A" (cdr where))
            (car badness)
            (map (lambda (n)
                   (number->string n 16))
                 (if (pair? bytes)
                     bytes
                     (list bytes)))))
     (exit #f))))

(define (valid-ucs-p/qop qop)
  (check-ports (qop 'utf8)
               (map (lambda (filename)
                      (if (file-is-directory? filename)
                          (error (fs "Invalid filename: ~S (is a directory)"
                                     filename))
                          (open-input-file filename)))
                    (qop '()))))

(define (main args)
  (check-hv args '((package . "ttn-do")
                   (version . "1.0")
                   (help . commentary)))
  (valid-ucs-p/qop
   (qop<-args
    args '((utf8)))))

;;; valid-ucs-p ends here