cl-grep

Simple implementation of grep
Log | Files | Refs

commit b7be5964b64783d21d4e1933cdff638c90a95926
parent 6eaa3635f00bb1b143ea149abc2fe18f8938a199
Author: ChanderG <[email protected]>
Date:   Thu,  4 Dec 2025 20:35:34 +0530

add v1 of mmap based mechanism to read; NO PERF IMPROV

current version does not deal correctly with lines at chunk borders - tbd
surprisingly, still slow!

Diffstat:
Mgrep.asd | 4+++-
Mgrep.lisp | 48++++++++++++++++++++++++++++++++++--------------
Ammap.lisp | 44++++++++++++++++++++++++++++++++++++++++++++
Mpackage.lisp | 2+-
4 files changed, 82 insertions(+), 16 deletions(-)

diff --git a/grep.asd b/grep.asd @@ -3,6 +3,7 @@ :author "Chander Govindarajan" :components ((:file "package") (:file "log") + (:file "mmap") (:file "grep")) :depends-on (:uiop :filesystem-utils @@ -10,7 +11,8 @@ :vom :lparallel :bordeaux-threads - :cl-string-match) + :cl-string-match + :mmap) :build-operation "program-op" :build-pathname "cl-grep" :entry-point "cl-grep:main") diff --git a/grep.lisp b/grep.lisp @@ -42,7 +42,7 @@ (write-string (color-red (subseq line m (+ m lm))) stream) (setf s (+ m lm))) finally - (format stream "~a~%" (subseq line s (- (length line) 1)))))) + (format stream "~a~%" (subseq line s (- (length line) 1)))))) (format stream "~%")) ;; ;; alternative grep like format to easily test parity @@ -63,22 +63,42 @@ (incf curr (length *match*)) finally (return res))) +;; TODO: de-duplicate the functions below +;; it has a lot of common code copied + +(defun grep-file-disk (file match-idx) + (with-open-file (stream file) + (let ((results + (loop for line = (read-line stream nil :eof) + with i of-type fixnum = 0 with fmatch = nil + until (eq line :eof) + do (incf i) + ;; search for location of first match + if (setf fmatch (sm:search-bmh8 match-idx line)) + ;; now run the rest of the matches + collect (list i line (search-line line match-idx fmatch))))) + (when results + (lq:push-queue (make-file-result :name file :entries results) + *print-queue*))))) + +(defun grep-file-memory (file match-idx) + (let* ((mf (init-mfile file)) + (results (loop for line = (fetch-line mf) + with i of-type fixnum = 0 with fmatch = nil + until (eq line :eof) + do (incf i) + ;; search for location of first match + if (setf fmatch (sm:search-bmh8 match-idx line)) + ;; now run the rest of the matches + collect (list i line (search-line line match-idx fmatch))))) + (if results + (lq:push-queue (make-file-result :name file :entries results) *print-queue*)) + (end-mfile mf))) + (defun grep-file (file match-idx) (lo "Grepping file: ~a" file) (handler-case - (with-open-file (stream file) - (let ((results - (loop for line = (read-line stream nil :eof) - with i of-type fixnum = 0 with fmatch = nil - until (eq line :eof) - do (incf i) - ;; search for location of first match - if (setf fmatch (sm:search-bmh8 match-idx line)) - ;; now run the rest of the matches - collect (list i line (search-line line match-idx fmatch))))) - (when results - (lq:push-queue (make-file-result :name file :entries results) - *print-queue*)))) + (grep-file-memory file match-idx) (stream-error (stream) (lo "Skipping binary file: ~a" file)) (error (c) diff --git a/mmap.lisp b/mmap.lisp @@ -0,0 +1,44 @@ +(in-package :cl-grep) + +(defstruct mfile + name + ptr fd size + offset + buf) + +(defun init-mfile (path) + (let ((mf (make-mfile :name path :buf nil :offset 0))) + (multiple-value-bind (addr fd size) (mmap:mmap path + :open '(:read :direct) + :mmap '(:private :populate)) + (setf (mfile-ptr mf) addr) + (setf (mfile-fd mf) fd) + (setf (mfile-size mf) size)) + mf)) + +;; TODO: deal correctly with partial entries +;; For now: lines at the border are split and line-numbers are disturbed throughout + +;; TODO: deal with partial entries here +(defun update-buffer (mf) + (with-slots (ptr size offset buf) mf + (when (eq offset size) + (setf buf (list :eof)) + (return-from update-buffer nil)) + (let* ((num (if (> (+ offset 4000) size) + (- size offset) + 4000)) + (nptr (cffi:inc-pointer ptr offset)) + (data (cffi:foreign-string-to-lisp nptr :count num :encoding :utf-8))) + (incf offset num) + (setf buf (uiop:split-string data :separator '(#\Newline #\Return)))))) + +;; TODO: deal with partial entries here +(defun fetch-line (mf) + (with-slots (ptr size offset buf) mf + (if (not buf) + (update-buffer mf)) + (pop buf))) + +(defun end-mfile (mf) + (mmap:munmap (mfile-ptr mf) (mfile-fd mf) (mfile-size mf))) diff --git a/package.lisp b/package.lisp @@ -3,4 +3,4 @@ (:local-nicknames (:fsu :org.shirakumo.filesystem-utils) (:pu :org.shirakumo.pathname-utils) (:lq :lparallel.queue)) - (:export #:main)) + (:export #:main #:grep-launcher))