cl-grep

Simple implementation of grep
Log | Files | Refs

commit 6eb2dfa20a332dc6530f59a78e7df499069749ad
parent 93a2b4f5a90f0f8f40453191832df59cdd39a28f
Author: ChanderG <[email protected]>
Date:   Sun,  7 Dec 2025 21:50:18 +0530

deal with incomplete entries at end of chunks

result parity now with rg (in terms of line-numbers)

Diffstat:
Mmmap.lisp | 40+++++++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/mmap.lisp b/mmap.lisp @@ -1,13 +1,16 @@ (in-package :cl-grep) +(defparameter +chunk-size+ 4000) + (defstruct mfile name ptr fd size offset - buf) + buf + extra) (defun init-mfile (path) - (let ((mf (make-mfile :name path :buf nil :offset 0))) + (let ((mf (make-mfile :name path :buf nil :offset 0 :extra nil))) (multiple-value-bind (addr fd size) (mmap:mmap path :open '(:read :direct) :mmap '(:private :populate)) @@ -16,28 +19,43 @@ (setf (mfile-size mf) size)) mf)) -;; TODO: deal correctly with partial entries -;; For now: lines at the border are split and line-numbers are disturbed throughout +(defun format-buffer (mf raw) + (with-slots (ptr size offset buf extra) mf + (setf buf (uiop:split-string raw :separator '(#\Newline))) + (when extra + (setf (nth 0 buf) (concatenate 'string extra (nth 0 buf))) + (lo "Using extra from previous batch: ~a. New starter: ~a" extra (nth 0 buf)) + (setf extra nil)) + (when (not (eq #\Newline (char raw (- (length raw) 1)))) + (lo "Last line is not clean. Marking as extra: ~a" (car (last buf))) + (setf extra (car (last buf)))) + ;; always pop-off the last entry + ;; in normal cases that's the extra + ;; in case of newline being at the very end, + ;; there is a spurious empty line otherwise + (if (> (length buf) 1) + (nbutlast buf) + (setf buf nil)))) -;; TODO: deal with partial entries here (defun update-buffer (mf) (with-slots (ptr size offset buf) mf (when (eq offset size) (setf buf (list :eof)) (return-from update-buffer nil)) - (let* ((num (if (> (+ offset 4000) size) + (let* ((num (if (> (+ offset +chunk-size+) size) (- size offset) - 4000)) + +chunk-size+)) (nptr (cffi:inc-pointer ptr offset)) (data (cffi:foreign-string-to-lisp nptr :count num :encoding :utf-8))) (incf offset num) - (setf buf (uiop:split-string data :separator '(#\Newline #\Return)))))) + (format-buffer mf data)))) -;; TODO: deal with partial entries here (defun fetch-line (mf) (with-slots (ptr size offset buf) mf - (if (not buf) - (update-buffer mf)) + ;; in case the line is too long that it doesn't full the buffer immediately + (loop while (not buf) + do (update-buffer mf)) + (lo "Item in buf: ~a" (car buf)) (pop buf))) (defun end-mfile (mf)