cl-grep

Simple implementation of grep
Log | Files | Refs

grep.lisp (7423B)


      1 (in-package :cl-grep)
      2 
      3 (declaim (optimize (speed 3) (debug 0) (safety 0)))
      4 (declaim (inline format-file-result search-line))
      5 
      6 (defparameter +numw+ 4)
      7 (defparameter +file-lc-blacklist+ '(#\~ #\#))
      8 (defparameter +file-ext-blacklist+
      9   '("pwd" "png" "svg" "jpeg" "gif" "mov" "mp4" "mkv"
     10     "blob" "o" "so" "a"))
     11 (defparameter +dir-blacklist+ '(".git" ".venv"))
     12 
     13 (defparameter *match* nil)
     14 
     15 (defparameter *queue* nil)
     16 (defparameter *print-queue* nil)
     17 (defun setup-tasks-mgmt ()
     18   (setf *queue* (lq:make-queue))
     19   (setf *print-queue* (lq:make-queue)))
     20 
     21 (defmacro color-magenta (str)
     22    `(format nil "~c[35m~a~c[0m" #\ESC ,str #\ESC))
     23 (defmacro color-green (str)
     24    `(format nil "~c[32m~a~c[0m" #\ESC ,str #\ESC))
     25 (defmacro color-red (str)
     26    `(format nil "~c[91m~a~c[0m" #\ESC ,str #\ESC))
     27 
     28 (defstruct file-result
     29   name entries)
     30 
     31 ;; rg like combined format
     32 (defun format-file-result (fr stream)
     33   (format stream "~a: ~%" (color-magenta (file-result-name fr)))
     34   (loop for e in (file-result-entries fr) do
     35     ;; print the file name
     36     (format stream "~a: " (color-green (car e)))
     37     ;; now print the line with color coding of matching portions
     38     (let ((line (cadr e))
     39           (locs (caddr e))
     40           (lm (length *match*)))
     41       (loop with s = 0
     42             for n from 0 below (length locs) do
     43             (let ((m (nth n locs)))
     44               (write-string (subseq line s m) stream)
     45               (write-string (color-red (subseq line m (+ m lm))) stream)
     46               (setf s (+ m lm)))
     47             finally
     48             (format stream "~a~%" (subseq line s (length line))))))
     49   (format stream "~%"))
     50 
     51 ;; ;; alternative grep like format to easily test parity
     52 ;; (defun format-file-result (fr stream)
     53 ;;   (loop for e in (file-result-entries fr) do
     54 ;;     (write-string (format nil "~a: ~a~%" (file-result-name fr) e) stream)))
     55 
     56 ;; Search a line for all matches of our target
     57 ;; returns a list of positions in the line
     58 (defun search-line (line match-idx fmatch)
     59   (declare (type fixnum fmatch))
     60   (loop with res = (list fmatch)
     61         ;; todo: of-type fixnum here leads to failure
     62         with curr = (+ fmatch (length *match*))
     63         while (setf curr (sm:search-bmh8 match-idx line :start2 curr))
     64         do
     65           (setf res (append res (list curr)))
     66           (incf curr (length *match*))
     67         finally (return res)))
     68 
     69 ;; TODO: de-duplicate the functions below
     70 ;; it has a lot of common code copied
     71 
     72 (defun grep-file-disk (file match-idx)
     73   (with-open-file (stream file)
     74     (let ((results
     75            (loop for line = (read-line stream nil :eof)
     76                  with i of-type fixnum = 0 with fmatch = nil
     77                  until (eq line :eof)
     78                  do (incf i)
     79                  ;; search for location of first match
     80                  if (setf fmatch (sm:search-bmh8 match-idx line))
     81                  ;; now run the rest of the matches
     82                  collect (list i line (search-line line match-idx fmatch)))))
     83       (when results
     84         (lq:push-queue (make-file-result :name file :entries results)
     85                        *print-queue*)))))
     86 
     87 (defun grep-file-memory (file match-idx)
     88   (let* ((mf (init-mfile file))
     89          (results (loop for line = (fetch-line mf)
     90                         with i of-type fixnum = 0 with fmatch = nil
     91                         until (eq line :eof)
     92                         do (incf i)
     93                         ;; search for location of first match
     94                         if (setf fmatch (sm:search-bmh8 match-idx line))
     95                         ;; now run the rest of the matches
     96                         collect (list i line (search-line line match-idx fmatch)))))
     97     (if results
     98       (lq:push-queue (make-file-result :name file :entries results) *print-queue*))
     99     (end-mfile mf)))
    100 
    101 (defparameter grep-file-mode 'disk)
    102 
    103 (defun grep-file (file match-idx)
    104   (lo "Grepping file: ~a" file)
    105   (handler-case
    106     (if (eq grep-file-mode 'disk)
    107       (grep-file-disk file match-idx)
    108       (grep-file-memory file match-idx))
    109     (stream-error (stream)
    110       (lo "Skipping binary file: ~a" file))
    111     (error (c)
    112       (lo "Unknown error: ~a when reading file ~a" c file))))
    113 
    114 (defun process-task ()
    115   (let ((match-idx (sm:initialize-bmh8 *match*)))
    116     (loop
    117      (let ((task (lq:pop-queue *queue*)))
    118        (if (eq task :done)
    119          (return))
    120        (grep-file task match-idx)))))
    121 
    122 (defun result-printer ()
    123   (let ((bufstream (make-string-output-stream))
    124         (count 0))
    125     (declare (type fixnum count))
    126     (loop
    127       (handler-case
    128         (let ((task (lq:pop-queue *print-queue*)))
    129           (when (eq task :done)
    130              ;; flush out remaining stuff
    131              (princ (get-output-stream-string bufstream))
    132              (return))
    133           (format-file-result task bufstream)
    134           (incf count)
    135           ;; buffer and print out
    136           (when (eq count 10)
    137             (princ (get-output-stream-string bufstream))
    138             (clear-output)
    139             (setf count 0)))
    140         (error (c)
    141           (lo "Error in printing: ~a~%" c))))))
    142 
    143 (defun queue-file (file)
    144   (lo "Visiting file: ~a" file)
    145   (let* ((name (pu:file-name file))
    146          (lc (char name (- (length name) 1))))
    147     (when (member lc +file-lc-blacklist+)
    148       (lo "Skipping ignorable file: ~a" file)
    149       (return-from queue-file nil))
    150     (when (member (pathname-type file) +file-ext-blacklist+ :test #'equalp)
    151       (return-from queue-file nil))
    152     ;; do the actual thing here
    153     (lq:push-queue file *queue*)))
    154 
    155 (defun fs-walker (dir)
    156   (lo "Visiting dir: ~a" dir)
    157   (when (not dir)
    158     (lo "Obtained invalid dir nil, somehow.")
    159     (return-from fs-walker nil))
    160 
    161   (handler-case
    162     (progn
    163       ;; check if we should visit this dir
    164       (let ((dirname (pu:directory-name dir)))
    165         (when (member dirname +dir-blacklist+ :test #'equalp)
    166           (lo "Skipping dir: ~a" dir)
    167           (return-from fs-walker nil)))
    168       ;; process files in this dir
    169       (fsu:map-directory #'queue-file dir
    170                          :type ':file)
    171       ;; go through sub-folders
    172       (fsu:map-directory #'fs-walker dir
    173                             :type ':directory))
    174     (error (c)
    175       (lo "Error in walking dir ~a: ~a" dir c))))
    176 
    177 ;; Main entrypoint for the grep functionality
    178 (defun grep-launcher (str dir)
    179   (log-setup)
    180   (setf *match* str)
    181   (setup-tasks-mgmt)
    182   (let* ((workers (loop for i from 0 below +numw+
    183                         collect (bt:make-thread #'process-task)))
    184          (print-worker (bt:make-thread #'result-printer)))
    185     (fs-walker dir)
    186     (lo "Finished processing target walking!")
    187     (loop for i from 0 below +numw+
    188           do (lq:push-queue :done *queue*))
    189     (loop for w in workers do (bt:join-thread w))
    190     (lq:push-queue :done *print-queue*)
    191     (bt:join-thread print-worker)))
    192 
    193 (defun grep-func (str dirstr)
    194   (let ((dir (car (directory dirstr))))
    195     (grep-launcher str dir)))
    196 
    197 (defun main ()
    198   (let* ((raw-args (uiop:command-line-arguments))
    199          (args (process-options raw-args))
    200          (nargs (length args)))
    201     (when (or (eq nargs 0) (> nargs 2))
    202       (format t "Incorrect number of args passed!~%")
    203       (format t "Usage: cl-grep <str> <dir>~%")
    204       (return-from main nil))
    205     (let* ((arg2 (if (eq nargs 2) (cadr args) "."))
    206            (dirs (directory arg2)))
    207       (when (not dirs)
    208         (format t "~a found to not be a valid directory.~%" arg2)
    209         (format t "Quitting!~%"))
    210       (grep-launcher (car args) (car dirs)))))