;;; lookingfor.scm

;; Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2009,
;;   2010, 2011, 2012 Thien-Thi Nguyen
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version.  There is NO WARRANTY.  See file COPYING for details.

;;; Code:

(define-module (ttn-do zzz lookingfor)
  #:export (grep grep-matches grep-l)
  #:use-module ((ice-9 regex) #:select (regexp-quote))
  #:use-module ((ice-9 rdelim) #:select (read-line))
  #:use-module ((ttn-do zzz personally) #:select (accumulator
                                                  FE)))

(define (grepper-proc l/picker flags)
  (let ((flags (map (lambda (x)
                      (if (symbol? x)
                          (symbol->keyword x)
                          x))
                    flags)))
    (lambda (re strings)
      (let* ((rx (or (and (regexp? re) re)
                     (apply make-regexp
                            (if (memq #:literal flags)
                                (regexp-quote re)
                                re)
                            (or (and (memq #:fold-case flags)
                                     (list regexp/icase))
                                '()))))
             (criteria (if (memq #:invert flags)
                           (lambda (string) (not (regexp-exec rx string)))
                           (lambda (string) (regexp-exec rx string)))))
        ((if (memq #:count flags) car cdr)
         (l/picker criteria strings))))))

;; Return list of matches to regexp @var{re} from @var{strings} list.
;; Optional @var{flags} are keywords (or symbols with the same name)
;; which modify regexp matching:
;;
;; @table @code
;; @item #:invert
;; the result list is those strings that do NOT match
;;
;; @item #:fold-case
;; consider upper and lower case to be identical
;;
;; @item #:count
;; return length of result list instead of list
;;
;; @item #:literal
;; @var{re} specifies a literal string to match
;; @end table
;;
;; Return list order is the same as in the input list.
;;
(define (grep re strings . flags)
  (define (len/pick pred ls)
    (let ((count 0) (acc (accumulator)))
      (FE ls (lambda (x)
               (cond ((pred x)
                      (set! count (1+ count))
                      (acc x)))))
      (cons count (acc))))
  ((grepper-proc len/pick flags) re strings))

;; Return non-#f match results of @code{regexp-exec} of regexp @var{re} on
;; @var{strings} list.  Optional @var{flags} are keywords (or symbols with
;; the same name) which modify regexp matching:
;;
;; @table @code
;; @item #:invert
;; the result list is composed entirely of #t values, with
;; length equal to the number of strings that do NOT match
;; (useful with @code{count} flag below)
;;
;; @item #:fold-case
;; consider upper and lower case to be identical
;;
;; @item #:count
;; return length of result list instead of list
;;
;; @item #:literal
;; @var{re} specifies a literal string to match
;; @end table
;;
;; Return list order is the same as in the input list.
;;
(define (grep-matches re strings . flags)
  (define (len/pick-mappings pred ls)
    (let ((count 0) (acc (accumulator)))
      (FE ls (lambda (x)
               (and=> (pred x)
                      (lambda (mapping)
                        (set! count (1+ count))
                        (acc mapping)))))
      (cons count (acc))))
  ((grepper-proc len/pick-mappings flags) re strings))

;; Search for regular expression @var{re} in @var{files}.
;; Return a list of those that match.  @var{re} specifies a regular expression
;; that matches on one line (multi-line results not currently supported).
;; @var{files} is a list, each element of which can either be a filename or a
;; seekable port.  In the returned list, if the element is a port, its read
;; offset is left at the beginning of the line of the first match.
;;
(define (grep-l re files)
  (let* ((rx (if (regexp? re)
                 re
                 (make-regexp re)))
         (acc (accumulator)))
    (FE files (lambda (file)
                (let* ((bol (and (port? file) 0))
                       (in? (string? file))
                       (p (cond (bol (seek file bol SEEK_SET) file)
                                (in? (open-input-file file))
                                (else (error "bad file:" file)))))
                  (let loop ((line (read-line p)))
                    (cond ((eof-object? line)
                           (and in? (close-port p)))
                          ((regexp-exec rx line)
                           (and bol (seek p bol SEEK_SET))
                           (acc (if bol p file)))
                          (else
                           (and bol (set! bol (seek p 0 SEEK_CUR)))
                           (loop (read-line p))))))))
    (acc)))

;;; lookingfor.scm ends here