#! /bin/sh
exec ${GUILE-guile} -e "(ttn-do rcs-fast-export)" -s $0 "$@" # -*-scheme-*-
!#
;;; rcs-fast-export

;; Copyright (C) 2010, 2011 Thien-Thi Nguyen
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version.  There is NO WARRANTY.  See file COPYING for details.
;;
;; Description: Spew RCS info to stdout in "fast-import" format.

;;; Commentary:

;; rcs-fast-export [OPTIONS] [FILE...]
;;
;; Write "fast import" format data from RCS repositories to stdout.
;; FILE should be either the repository filename (typically foo,v),
;; or the working file name, or a directory name, in which case its
;; {RCS/}*,v (if any) are scanned, without descending into subdirs.
;; If no FILE is specified, scan "." (the current directory).
;;
;; Options:
;;  -A, --authors FILE   -- Read FILE for author information.
;;  -p, --prefix RELDIR  -- Prefix each filename with RELDIR.
;;
;; Each line in the authors file has the form:
;;
;;  USERNAME=FULL-INFO
;;
;; where USERNAME should correspond to the Unix username, and
;; FULL-INFO should include actual name and email address, such
;; as "J. R. Hacker <jrh@example.com>", but without the quotes.
;; See git-cvsimport(1) for more info.
;;
;; Normally, to compute the relative filename of the exported
;; repository, rcs-fast-export strips the common root directory of
;; all the working filenames (both specified and discovered).
;; The option --prefix RELDIR adds RELDIR to the beginning of
;; each exported filename. For example, given the two filenames:
;;
;;  /etc/foo/bar/RCS/f1,v
;;  /etc/foo/f2  (presuming /etc/foo/RCS/f2,v exists)
;;
;; then the working files are /etc/foo/bar/f1, /etc/foo/f2;
;; the common root is /etc/foo; and the exported filenames
;; are (normally, with "-p baz/qux", e.g.):
;;
;;   bar/f1, f2
;;   baz/qux/bar/f1, baz/qux/f2
;;
;; RELDIR must not be absolute.
;;
;; Limitations:
;;
;; - No tags (symbolic names) handling.
;;
;; - State changes may show up as empty commits.
;;
;; - The branch created is "from-rcs-BRNUM", where BRNUM is the
;;   part of the RCS revision prior to the last integer, with
;;   "." replaced by "-".  For example, for revision "1.3.5.7",
;;   the BRNUM will be "1-3-5".
;;
;;   Furthermore, branch detection is not file-specific.  This
;;   means that foo:1.3.5.7 and bar:1.3.5.7 will eventually map
;;   to the same branch (BRNUM "1-3-5").
;;
;; - Incomplete error checking, recovery, diagnostics, testing.
;;   In other words, code enhancement opportunities abound!

;;; Code:

(define-module (ttn-do rcs-fast-export)
  #:export (main)
  #:use-module ((ttn-do zzz banalities) #:select (check-hv
                                                  qop<-args))
  #:use-module ((ttn-do zzz personally) #:select (FE fs fso))
  #:use-module ((ttn-do zzz filesystem) #:select (dir-exists?
                                                  filtered-files-in-vicinity
                                                  filename-components-append
                                                  filename-components
                                                  expand-file-name))
  #:use-module ((srfi srfi-1) #:select (drop-right
                                        fold))
  #:use-module ((srfi srfi-13) #:select (string-trim-both
                                         string-take
                                         string-drop-right
                                         string-join
                                         string-prefix-length
                                         string-index-right
                                         string-prefix?
                                         string-suffix?))
  #:use-module ((ttn-do mogrify) #:select (find-file-read-only
                                           editing-buffer))
  #:use-module ((ice-9 q) #:select (make-q
                                    q-push!
                                    enq!))
  #:use-module ((ice-9 popen) #:select (open-input-pipe)))

(define (grok-authors filename)
  (let ((q (make-q)))
    (editing-buffer (find-file-read-only filename)
      (while (re-search-forward "^([^=]+)=(.+)$" #f #t)
        (enq! q (cons (string->symbol (string-trim-both (match-string 1)))
                      (string-trim-both (match-string 2))))))
    (car q)))

(define (progress s . args)
  (display "progress ")
  (apply fso s args)
  (newline))

(define (find-common-root filenames)
  (and (pair? filenames)
       (fold (lambda (filename so-far)
               (let* ((len (string-prefix-length filename so-far))
                      (slash (string-index-right filename #\/ 0 len)))
                 (string-take so-far (if slash
                                         (min len (1+ slash))
                                         len))))
             (car filenames)
             filenames)))

(define (canonicalize-revision string)
  (let ((q (make-q)))
    (editing-buffer string
      (goto-char (point-min))
      (while (re-search-forward "[0-9]+" #f #t)
        (enq! q (string->number (match-string 0)))))
    (car q)))

(define (utc<-string s)
  (car (mktime (car (strptime "%Y/%m/%d %H:%M:%S" s)) "UTC")))

(define (grok authors filenames)

  (define (utc< a b)
    (< (car a) (car b)))

  (define (working filename)
    (let ((rev (reverse! (filename-components (expand-file-name filename)))))
      (and (string-suffix? ",v" filename)
           (let ((stem (car rev)))
             (set-car! rev (string-drop-right stem 2))))
      (and (pair? (cdr rev))
           (string=? "RCS" (cadr rev))
           (set-cdr! rev (cddr rev)))
      (filename-components-append (reverse! rev))))

  (define (scan filename)
    (let ((buf (editing-buffer (open-input-pipe (fs "rlog '~A'" filename))))
          (wfn (working filename))
          (q (make-q))
          (last-utc -1))

      (define (add! utc . rest)
        ((if (< utc last-utc)
             q-push!
             enq!)
         q (cons* utc wfn rest))
        (set! last-utc utc))

      (editing-buffer buf
        (toggle-read-only 1)
        (goto-char (point-min))
        (while (re-search-forward "^revision ([0-9.]+)" #f #t)
          (let ((rev (canonicalize-revision (match-string 1)))
                (utc (and (search-forward "date: ")
                          (utc<-string (buffer-substring (point) (+ 19 (point))))))
                (author (and (re-search-forward "author: ([^;]+);")
                             (let ((raw (string->symbol (match-string 1))))
                               (or (assq-ref authors raw)
                                   raw))))
                (log-beg (begin (forward-line 1) (point)))
                (log-end (and (re-search-forward "^[-=]+$" #f #t)
                              (match-beginning 0))))
            (add! utc rev author (list log-beg log-end buf)))))
      (sort! (car q) utc<)))

  (fold (lambda (one all)
          (merge! all one utc<))
        '()
        (map scan filenames)))

(define LF #\newline)

(define (spew outp prefix rlen mark)
  (let ((blob-mark (mark))
        (buf (editing-buffer ""))
        (branches '())
        (latest #f))

    (define (output-blob! retrieve)
      (editing-buffer buf
        (erase-buffer)
        (insert (open-input-pipe retrieve) LF)
        (let ((size (- (point) 2)))
          (goto-char (point-min))
          (insert "blob" LF
                  "mark :" blob-mark LF
                  "data " size LF))
        (goto-char (point-max))))

    (define (output-commit filename utc actor rev blurb)
      (let* ((branch (drop-right rev 1))
             (cur-branch (assoc-ref branches branch))
             (br-name (or (and=> cur-branch car)
                          (let ((name (fs "refs/heads/from-rcs-~A"
                                          (string-join
                                           (map number->string branch)
                                           "-"))))
                            (set! cur-branch (cons name #f))
                            (set! branches (acons branch cur-branch branches))
                            (editing-buffer buf
                              (insert "reset " name LF))
                            name)))
             (m (mark)))
        (editing-buffer buf
          (insert "commit " br-name LF
                  "mark :" m LF
                  "author " actor LF
                  "committer " actor LF)
          (apply-to-args
           blurb (lambda (log-beg log-end why-buf)
                   (insert "data " (- log-end log-beg) LF
                           (editing-buffer why-buf
                             (buffer-substring log-beg log-end)))))
          (and=> (cdr cur-branch)
                 (lambda (from)
                   (insert "from :" from LF)))
          (insert "M 100644 :" blob-mark #\space prefix
                  (substring filename rlen)
                  LF LF)
          (write-to-port outp))
        (set-cdr! cur-branch m)
        m))

    (lambda (x)
      (apply-to-args
       x (lambda (utc filename rev author blurb)
           (let ((ym (strftime "%Y %B" (gmtime utc))))
             (or (equal? ym latest)
                 (begin (progress "starting ~A" ym)
                        (set! latest ym))))
           (output-blob! (fs "co -kk -p~A '~A' 2>/dev/null"
                             (string-join
                              (map number->string rev)
                              ".")
                             filename))
           (output-commit filename
                          utc (fs "~A ~A +0000" author utc)
                          rev blurb))))))

(define (comma-v-under directory)
  (define (yes dir)
    (filtered-files-in-vicinity dir (lambda (filename)
                                      (string-suffix? ",v" filename))
                                #:filter-prefixed))
  (append! (yes directory)
           (let ((more (fs "~A/RCS" directory)))
             (if (dir-exists? more)
                 (yes more)
                 '()))))

(define (main/qop qop)
  (let* ((n 0)
         (ls (map expand-file-name
                  (fold (lambda (one all)
                          (if (file-is-directory? one)
                              (append! (comma-v-under one) all)
                              (cons one all)))
                        '()
                        (let ((files (qop '())))
                          (if (null? files)
                              '(".")
                              files)))))
         (count (length ls))
         (root (and (positive? count) (find-common-root ls)))
         (rlen (and root (string-length root))))
    (progress "~A file~A~A" count (if (= 1 count) "" "s")
              (if root (fs ", root ~A" root) ""))
    (FE (grok (or (qop 'authors grok-authors)
                  '())
              ls)
        (spew (current-output-port)
              (or (qop 'prefix (lambda (prefix)
                                 (if (string-suffix? "/" prefix)
                                     prefix
                                     (fs "~A/" prefix))))
                  "")
              rlen
              (lambda ()
                (set! n (1+ n))
                n)))
    (progress "~A commits" (1- n)))
  #t)

(define (main args)
  (check-hv args '((package . "ttn-do")
                   (version . "0.2")
                   (help . commentary)))
  (main/qop
   (qop<-args
    args `((prefix (single-char #\p) (value #t)
                   (predicate ,(lambda (s)
                                 (and (not (string-null? s))
                                      (not (string-prefix? "/" s))))))
           (authors (single-char #\A) (value #t)
                    (predicate ,file-exists?))))))

;;; rcs-fast-export ends here