commit b7be5964b64783d21d4e1933cdff638c90a95926
parent 6eaa3635f00bb1b143ea149abc2fe18f8938a199
Author: ChanderG <[email protected]>
Date: Thu, 4 Dec 2025 20:35:34 +0530
add v1 of mmap based mechanism to read; NO PERF IMPROV
current version does not deal correctly with lines at chunk borders - tbd
surprisingly, still slow!
Diffstat:
4 files changed, 82 insertions(+), 16 deletions(-)
diff --git a/grep.asd b/grep.asd
@@ -3,6 +3,7 @@
:author "Chander Govindarajan"
:components ((:file "package")
(:file "log")
+ (:file "mmap")
(:file "grep"))
:depends-on (:uiop
:filesystem-utils
@@ -10,7 +11,8 @@
:vom
:lparallel
:bordeaux-threads
- :cl-string-match)
+ :cl-string-match
+ :mmap)
:build-operation "program-op"
:build-pathname "cl-grep"
:entry-point "cl-grep:main")
diff --git a/grep.lisp b/grep.lisp
@@ -42,7 +42,7 @@
(write-string (color-red (subseq line m (+ m lm))) stream)
(setf s (+ m lm)))
finally
- (format stream "~a~%" (subseq line s (- (length line) 1))))))
+ (format stream "~a~%" (subseq line s (- (length line) 1))))))
(format stream "~%"))
;; ;; alternative grep like format to easily test parity
@@ -63,22 +63,42 @@
(incf curr (length *match*))
finally (return res)))
+;; TODO: de-duplicate the functions below
+;; it has a lot of common code copied
+
+(defun grep-file-disk (file match-idx)
+ (with-open-file (stream file)
+ (let ((results
+ (loop for line = (read-line stream nil :eof)
+ with i of-type fixnum = 0 with fmatch = nil
+ until (eq line :eof)
+ do (incf i)
+ ;; search for location of first match
+ if (setf fmatch (sm:search-bmh8 match-idx line))
+ ;; now run the rest of the matches
+ collect (list i line (search-line line match-idx fmatch)))))
+ (when results
+ (lq:push-queue (make-file-result :name file :entries results)
+ *print-queue*)))))
+
+(defun grep-file-memory (file match-idx)
+ (let* ((mf (init-mfile file))
+ (results (loop for line = (fetch-line mf)
+ with i of-type fixnum = 0 with fmatch = nil
+ until (eq line :eof)
+ do (incf i)
+ ;; search for location of first match
+ if (setf fmatch (sm:search-bmh8 match-idx line))
+ ;; now run the rest of the matches
+ collect (list i line (search-line line match-idx fmatch)))))
+ (if results
+ (lq:push-queue (make-file-result :name file :entries results) *print-queue*))
+ (end-mfile mf)))
+
(defun grep-file (file match-idx)
(lo "Grepping file: ~a" file)
(handler-case
- (with-open-file (stream file)
- (let ((results
- (loop for line = (read-line stream nil :eof)
- with i of-type fixnum = 0 with fmatch = nil
- until (eq line :eof)
- do (incf i)
- ;; search for location of first match
- if (setf fmatch (sm:search-bmh8 match-idx line))
- ;; now run the rest of the matches
- collect (list i line (search-line line match-idx fmatch)))))
- (when results
- (lq:push-queue (make-file-result :name file :entries results)
- *print-queue*))))
+ (grep-file-memory file match-idx)
(stream-error (stream)
(lo "Skipping binary file: ~a" file))
(error (c)
diff --git a/mmap.lisp b/mmap.lisp
@@ -0,0 +1,44 @@
+(in-package :cl-grep)
+
+(defstruct mfile
+ name
+ ptr fd size
+ offset
+ buf)
+
+(defun init-mfile (path)
+ (let ((mf (make-mfile :name path :buf nil :offset 0)))
+ (multiple-value-bind (addr fd size) (mmap:mmap path
+ :open '(:read :direct)
+ :mmap '(:private :populate))
+ (setf (mfile-ptr mf) addr)
+ (setf (mfile-fd mf) fd)
+ (setf (mfile-size mf) size))
+ mf))
+
+;; TODO: deal correctly with partial entries
+;; For now: lines at the border are split and line-numbers are disturbed throughout
+
+;; TODO: deal with partial entries here
+(defun update-buffer (mf)
+ (with-slots (ptr size offset buf) mf
+ (when (eq offset size)
+ (setf buf (list :eof))
+ (return-from update-buffer nil))
+ (let* ((num (if (> (+ offset 4000) size)
+ (- size offset)
+ 4000))
+ (nptr (cffi:inc-pointer ptr offset))
+ (data (cffi:foreign-string-to-lisp nptr :count num :encoding :utf-8)))
+ (incf offset num)
+ (setf buf (uiop:split-string data :separator '(#\Newline #\Return))))))
+
+;; TODO: deal with partial entries here
+(defun fetch-line (mf)
+ (with-slots (ptr size offset buf) mf
+ (if (not buf)
+ (update-buffer mf))
+ (pop buf)))
+
+(defun end-mfile (mf)
+ (mmap:munmap (mfile-ptr mf) (mfile-fd mf) (mfile-size mf)))
diff --git a/package.lisp b/package.lisp
@@ -3,4 +3,4 @@
(:local-nicknames (:fsu :org.shirakumo.filesystem-utils)
(:pu :org.shirakumo.pathname-utils)
(:lq :lparallel.queue))
- (:export #:main))
+ (:export #:main #:grep-launcher))