From a21354a0cfc16990676d286724f85f25adf0d13e Mon Sep 17 00:00:00 2001 From: hat0uma <55551571+hat0uma@users.noreply.github.com> Date: Sun, 7 Dec 2025 18:56:19 +0900 Subject: [PATCH 1/5] refactor(metrics): modularize into column tracker, row builder, and row mapper --- lua/csvview/metrics.lua | 402 ++++------------------------ lua/csvview/metrics_column.lua | 133 +++++++++ lua/csvview/metrics_row_builder.lua | 134 ++++++++++ lua/csvview/metrics_row_mapper.lua | 223 +++++++++++++++ 4 files changed, 548 insertions(+), 344 deletions(-) create mode 100644 lua/csvview/metrics_column.lua create mode 100644 lua/csvview/metrics_row_builder.lua create mode 100644 lua/csvview/metrics_row_mapper.lua diff --git a/lua/csvview/metrics.lua b/lua/csvview/metrics.lua index 072feb4..3d1374b 100644 --- a/lua/csvview/metrics.lua +++ b/lua/csvview/metrics.lua @@ -1,23 +1,24 @@ local nop = function() end +local ColumnTracker = require("csvview.metrics_column") local CsvViewMetricsRow = require("csvview.metrics_row") +local RowBuilder = require("csvview.metrics_row_builder") +local RowMapper = require("csvview.metrics_row_mapper") ----------------------------------------------------------------------------- --- Metrics class +-- Metrics class (Facade) +-- Coordinates row storage, column tracking, and line mapping ----------------------------------------------------------------------------- ---- Get field by byte offset + --- @class CsvView.Metrics --- @field private _rows CsvView.Metrics.Row[] ---- @field private _columns CsvView.Metrics.Column[] +--- @field private _columns CsvView.ColumnTracker +--- @field private _mapper CsvView.RowMapper --- @field private _bufnr integer --- @field private _opts CsvView.InternalOptions --- @field private _parser CsvView.Parser --- @field private _current_parse { cancelled: boolean }? local CsvViewMetrics = {} ---- @class CsvView.Metrics.Column ---- @field max_width integer ---- @field max_row integer - --- Create new CsvViewMetrics instance ---@param bufnr integer ---@param opts CsvView.InternalOptions @@ -31,8 +32,16 @@ function CsvViewMetrics:new(bufnr, opts, parser) obj._opts = opts obj._parser = parser obj._rows = {} - obj._columns = {} + -- Create row mapper with callbacks to access rows + local get_row = function(lnum) + return obj._rows[lnum] + end + local row_count = function() + return #obj._rows + end + obj._columns = ColumnTracker:new(get_row, row_count) + obj._mapper = RowMapper:new(get_row, row_count) return setmetatable(obj, self) end @@ -41,10 +50,7 @@ function CsvViewMetrics:clear() for _ = 1, #self._rows do table.remove(self._rows) end - - for _ = 1, #self._columns do - table.remove(self._columns) - end + self._columns:clear() end --- @@ -69,9 +75,9 @@ function CsvViewMetrics:row(opts) assert(not (opts.lnum and opts.row_idx), "opts.lnum and opts.row_idx are mutually exclusive") if opts.lnum then - return self:_get_row_by_lnum(opts.lnum) + return self._rows[opts.lnum] else - return self:_get_row_by_row_idx(opts.row_idx) + return self._mapper:get_row_by_row_idx(opts.row_idx) end end @@ -85,14 +91,14 @@ end ---@param col_idx 1-indexed column index ---@return CsvView.Metrics.Column? function CsvViewMetrics:column(col_idx) - return self._columns[col_idx] + return self._columns:get(col_idx) end --- Compute metrics for the entire buffer ---@param on_end fun(err:string|nil)? callback for when the update is complete function CsvViewMetrics:compute_buffer(on_end) on_end = on_end or nop - self:_compute_metrics(nil, nil, {}, on_end) + self:_compute_metrics(nil, nil, on_end) end --- Update metrics for specified range @@ -100,11 +106,11 @@ end --- Metrics are optimized to recalculate only the changed range. --- However, the entire column is recalculated in the following cases. --- (1) If the line recorded as the maximum width of the column is deleted. ---- See: [MAX_ROW_DELETION] (in `_mark_recalculation_on_delete`) +--- See: [MAX_ROW_DELETION] (in ColumnTracker:mark_dirty_on_row_delete) --- (2) If a field was deleted and it was the maximum width in its column. ---- See: [MAX_FIELD_DELETION] (in `_mark_recalculation_on_decrease_fields`) +--- See: [MAX_FIELD_DELETION] (in ColumnTracker:mark_dirty_on_field_decrease) --- (3) If the maximum width has shrunk. ---- See: [SHRINK_WIDTH] (in `_adjust_column_metrics_for_row`) +--- See: [SHRINK_WIDTH] (in ColumnTracker:update_width) --- ---@param first integer first line number ---@param prev_last integer previous last line @@ -121,20 +127,16 @@ function CsvViewMetrics:update(first, prev_last, last, on_end) -- Get the range of affected lines local start_reparse, end_reparse = self:_calculate_reparse_range(first, prev_last, last) - ---@type table - local recalculate_columns = {} - - -- print("update", first, prev_last, last) local delta = last - prev_last if delta > 0 then self:_add_row_placeholders(prev_last + 1, delta) elseif delta < 0 then self:_remove_rows(last + 1, math.abs(delta)) - self:_mark_recalculation_on_delete(prev_last, last, recalculate_columns) + self:_mark_recalculation_on_delete(prev_last, last) end -- update metrics - self:_compute_metrics(start_reparse, end_reparse, recalculate_columns, on_end) + self:_compute_metrics(start_reparse, end_reparse, on_end) end --- Calculate the range of logical CSV rows for the changed lines @@ -148,12 +150,12 @@ function CsvViewMetrics:_calculate_reparse_range(first, prev_last, last) local start_reparse, end_reparse --- @type integer, integer if (first + 1) <= #self._rows then -- if adding a new row before the last row - local field_start_lnum, field_end_lnum = self:get_logical_row_range(first + 1) + local field_start_lnum, field_end_lnum = self._mapper:get_logical_row_range(first + 1) start_reparse = field_start_lnum end_reparse = math.max(field_end_lnum, last) elseif first ~= 0 and first <= #self._rows then -- if adding a new row at the end of the last row - local field_start_lnum, field_end_lnum = self:get_logical_row_range(first) + local field_start_lnum, field_end_lnum = self._mapper:get_logical_row_range(first) start_reparse = field_start_lnum end_reparse = math.max(field_end_lnum, last) else @@ -173,144 +175,16 @@ function CsvViewMetrics:_calculate_reparse_range(first, prev_last, last) return start_reparse, end_reparse end ---- Get row metrics by line number ----@param lnum integer 1-indexed line number ----@return CsvView.Metrics.Row? -function CsvViewMetrics:_get_row_by_lnum(lnum) - return self._rows[lnum] -end - ---- Get row metrics by CSV row index ----@param row_idx integer 1-indexed CSV row index ----@return CsvView.Metrics.Row? -function CsvViewMetrics:_get_row_by_row_idx(row_idx) - local logical_row_count = 0 - - for i = 1, #self._rows do - local row = self._rows[i] - - -- Count only the start of logical rows - if row.type == "singleline" or row.type == "multiline_start" then - logical_row_count = logical_row_count + 1 - if logical_row_count == row_idx then - return row - end - end - end - - return nil -- Row not found -end - ---- Compute row metrics ----@param lnum integer line number ----@param is_comment boolean ----@param parsed_fields CsvView.Parser.FieldInfo[] ----@param parsed_endlnum integer end line number of the parsed row ----@param terminated boolean whether the row is terminated, if false, parser reached lookahead limit ----@return CsvView.Metrics.Row[] -local function construct_rows(lnum, is_comment, parsed_fields, parsed_endlnum, terminated) - if is_comment then - return { CsvViewMetricsRow.new_comment_row() } - end - - if parsed_endlnum == lnum then -- Single line row - local row_fields = {} ---@type CsvView.Metrics.Field[] - for _, field in ipairs(parsed_fields) do - local field_text = field.text - assert(type(field_text) == "string") - - local width = vim.fn.strdisplaywidth(field_text) - table.insert(row_fields, { - offset = field.start_pos - 1, - len = #field_text, - display_width = width, - is_number = tonumber(field_text) ~= nil, - }) - end - return { CsvViewMetricsRow.new_single_row(row_fields) } - end - - -- Multi-line row - local total_rows = parsed_endlnum - lnum + 1 - local row_fields = {} --- @type table - local row_skipped_ncol = {} --- @type table - - -- Initialize field arrays for each row - for i = 1, total_rows do - row_fields[i] = {} - row_skipped_ncol[i] = 0 - end - - -- First pass: distribute fields to rows and calculate skipped columns - local current_row_index = 1 - for field_index, field in ipairs(parsed_fields) do - local field_text = field.text - - if type(field_text) == "table" then - -- Multi-line field - for i, text in ipairs(field_text) do - -- first line starts at field.start_pos, others are 0 - local offset = i == 1 and field.start_pos - 1 or 0 - local width = vim.fn.strdisplaywidth(text) - table.insert(row_fields[current_row_index], { - offset = offset, - len = #text, - display_width = width, - is_number = false, - }) - - -- Set skipped columns for continuation rows - if i > 1 and row_skipped_ncol[current_row_index] == 0 then - row_skipped_ncol[current_row_index] = field_index - 1 - end - - -- Move to next row if not the last line of this field - if i ~= #field_text then - current_row_index = current_row_index + 1 - end - end - else - -- Single-line field - table.insert(row_fields[current_row_index], { - offset = field.start_pos - 1, - len = #field.text, - display_width = vim.fn.strdisplaywidth(field_text), - is_number = tonumber(field.text) ~= nil, - }) - end - end - - -- Second pass: create rows with all fields initialized - local rows = {} --- @type CsvView.Metrics.Row[] - for i = 1, total_rows do - if i == 1 then - rows[i] = CsvViewMetricsRow.new_multiline_start_row(parsed_endlnum - lnum, terminated, row_fields[i]) - else - rows[i] = CsvViewMetricsRow.new_multiline_continuation_row( - i - 1, -- relative start line offset - parsed_endlnum - lnum - i + 1, -- relative end line offset - row_skipped_ncol[i], - terminated, - row_fields[i] - ) - end - end - - return rows -end - --- Compute metrics ---- ---@param startlnum integer? if present, compute only specified range ---@param endlnum integer? if present, compute only specified range ----@param recalculate_columns table recalculate specified columns ---@param on_end fun(err:string|nil) callback for when the update is complete -function CsvViewMetrics:_compute_metrics(startlnum, endlnum, recalculate_columns, on_end) +function CsvViewMetrics:_compute_metrics(startlnum, endlnum, on_end) -- Parse specified range and update metrics. self._parser:parse_lines(self._opts.parser.async_chunksize, { on_line = function(lnum, is_comment, fields, parsed_endlnum, terminated) local new_endlnum = nil ---@type integer? - local rows = construct_rows(lnum, is_comment, fields, parsed_endlnum, terminated) + local rows = RowBuilder.construct_rows(lnum, is_comment, fields, parsed_endlnum, terminated) assert(#rows == parsed_endlnum - lnum + 1, "Invalid number of rows computed") -- Update row metrics and adjust column metrics @@ -325,8 +199,8 @@ function CsvViewMetrics:_compute_metrics(startlnum, endlnum, recalculate_columns new_endlnum = vim.api.nvim_buf_line_count(self._bufnr) - 1 end - self:_mark_recalculation_on_decrease_fields(line, prev_row, recalculate_columns) - self:_adjust_column_metrics_for_row(line, recalculate_columns) + self:_mark_recalculation_on_decrease_fields(line, prev_row, row) + self:_update_column_metrics_for_row(line) end return new_endlnum end, @@ -336,48 +210,18 @@ function CsvViewMetrics:_compute_metrics(startlnum, endlnum, recalculate_columns return end - -- Recalculate column metrics if necessary - -- vim.print("recalculate_columns", recalculate_columns) - for col_idx, _ in pairs(recalculate_columns) do - self:_recalculate_column(col_idx) - end + -- Recalculate dirty columns + self._columns:recalculate_dirty() - -- notify the end of the update on_end() end, }, startlnum, endlnum, self._current_parse) end ---- Recalculate column metrics for the specified column ----@param col_idx integer -function CsvViewMetrics:_recalculate_column(col_idx) - local max_width = 0 - local max_row = nil - - -- Find the maximum width in the column - for row_idx, row in ipairs(self._rows) do - local field = row:field(col_idx) - if field and field.display_width > max_width then - max_width = field.display_width - max_row = row_idx - end - end - - if max_row then - -- Update column metrics - self._columns[col_idx].max_width = max_width - self._columns[col_idx].max_row = max_row - else - -- Remove column if it is empty - self._columns[col_idx] = nil - end -end - --- Mark column for recalculation on delete ---@param prev_last integer ---@param last integer ----@param recalculate_columns table -function CsvViewMetrics:_mark_recalculation_on_delete(prev_last, last, recalculate_columns) +function CsvViewMetrics:_mark_recalculation_on_delete(prev_last, last) -- [MAX_ROW_DELETION] -- If the deleted line was the maximum line of the column, it is recalculated. -- e.g. @@ -389,9 +233,9 @@ function CsvViewMetrics:_mark_recalculation_on_delete(prev_last, last, recalcula -- -- -> prev_last = 1, last = 0 -- In this case, the column metrics for the first column need to be recalculated. - for col_idx, column in ipairs(self._columns) do + for col_idx, column in self._columns:iter() do if column.max_row > last and column.max_row <= prev_last then - recalculate_columns[col_idx] = true + self._columns:mark_dirty(col_idx) end end end @@ -399,8 +243,8 @@ end --- Mark column for recalculation on decrease fields ---@param row_idx integer ---@param prev_row CsvView.Metrics.Row | nil ----@param recalculate_columns table -function CsvViewMetrics:_mark_recalculation_on_decrease_fields(row_idx, prev_row, recalculate_columns) +---@param curr_row CsvView.Metrics.Row +function CsvViewMetrics:_mark_recalculation_on_decrease_fields(row_idx, prev_row, curr_row) -- [MAX_FIELD_DELETION] -- If a field is deleted and it was the maximum width in its column, mark the column for recalculation. -- e.g. @@ -416,14 +260,13 @@ function CsvViewMetrics:_mark_recalculation_on_decrease_fields(row_idx, prev_row return end - local row = self._rows[row_idx] - for col_idx, _ in prev_row:iter() do -- Check if the column exists and if the current row was the maximum width row for this column. - if self._columns[col_idx] and self._columns[col_idx].max_row == row_idx then - local current_field = row:field(col_idx) + local column = self._columns:get(col_idx) + if column and column.max_row == row_idx then + local current_field = curr_row:field(col_idx) if not current_field then - recalculate_columns[col_idx] = true + self._columns:mark_dirty(col_idx) end end end @@ -431,33 +274,15 @@ end --- Adjust column metrics for the specified row ---@param row_idx integer row index ----@param recalculate_columns table recalculate columns -function CsvViewMetrics:_adjust_column_metrics_for_row(row_idx, recalculate_columns) +function CsvViewMetrics:_update_column_metrics_for_row(row_idx) local row = self._rows[row_idx] -- Update column metrics + -- [SHRINK_WIDTH] is handled in ColumnTracker:update_width + -- If the max width shrinks, the column is marked for recalculation. for col_idx, field in row:iter() do - local column = self:_ensure_column(col_idx) - local width = field.display_width - - if width > column.max_width then - column.max_width = width - column.max_row = row_idx - elseif column.max_row == row_idx and width < column.max_width then - -- [SHRINK_WIDTH] Mark for recalculation if max width shrinks - recalculate_columns[col_idx] = true - end - end -end - ---- Ensure column metrics ----@param col_idx integer ----@return CsvView.Metrics.Column -function CsvViewMetrics:_ensure_column(col_idx) - if not self._columns[col_idx] then - self._columns[col_idx] = { max_width = 0, max_row = 0 } + self._columns:update_width(col_idx, row_idx, field.display_width) end - return self._columns[col_idx] end --- Add row placeholders @@ -500,125 +325,40 @@ function CsvViewMetrics:_remove_rows(start, num) end end +----------------------------------------------------------------------------- +-- Mapping API (delegated to RowMapper) +----------------------------------------------------------------------------- + --- Find the start of the logical row containing the given physical line number ---@param lnum integer physical line number ---@return integer logical_start_lnum, integer logical_end_lnum function CsvViewMetrics:get_logical_row_range(lnum) - local row = self._rows[lnum] - if not row then - error(string.format("Row out of bounds lnum=%d", lnum)) - end - - if row.type == "multiline_continuation" then - local start_lnum = lnum - row.start_loffset - local endlnum = start_lnum + self._rows[start_lnum].end_loffset - return start_lnum, endlnum - elseif row.type == "multiline_start" then - return lnum, lnum + row.end_loffset - else - return lnum, lnum - end + return self._mapper:get_logical_row_range(lnum) end --- Get logical row number from physical line number ---@param physical_lnum integer Physical line number (1-based) ---@return integer? logical_row_num Logical row number (1-based) function CsvViewMetrics:get_logical_row_idx(physical_lnum) - local logical_row_num = 0 - - for i = 1, physical_lnum do - local row = self._rows[i] - if not row then - return nil -- Out of bounds - end - - -- Count only the start of logical rows - if row.type == "singleline" or row.type == "multiline_start" or row.type == "comment" then - logical_row_num = logical_row_num + 1 - end - end - - return logical_row_num + return self._mapper:physical_to_logical(physical_lnum) end --- Get the physical line number for a logical row number ---@param logical_row_num integer Logical row number (1-based) ---@return integer? physical_lnum Physical line number (1-based) function CsvViewMetrics:get_physical_line_number(logical_row_num) - local logical_count = 0 - - for i = 1, #self._rows do - local row = self._rows[i] - - -- Count only the start of logical rows - if row.type == "singleline" or row.type == "multiline_start" or row.type == "comment" then - logical_count = logical_count + 1 - if logical_count == logical_row_num then - return i - end - end - end - - return nil -- Not found + return self._mapper:logical_to_physical(logical_row_num) end ---- @alias CsvView.Metrics.LogicalFieldRange { start_row: integer, start_col: integer, end_row: integer, end_col: integer } - --- Get field ranges for a logical row containing the given physical line number. ---@param opts { lnum?: integer, row_idx?:integer } specify either `lnum` or `row_idx` ---@return CsvView.Metrics.LogicalFieldRange[] ranges List of logical field ranges for the row function CsvViewMetrics:get_logical_row_fields(opts) - local lnum = opts.lnum or self:get_physical_line_number(opts.row_idx) + local lnum = opts.lnum or self._mapper:logical_to_physical(opts.row_idx) if not lnum then error(string.format("Invalid lnum or row_idx: lnum=%s, row_idx=%s", opts.lnum, opts.row_idx)) end - - local row = self:row({ lnum = lnum }) - if not row then - error(string.format("Row not found for lnum=%d", lnum)) - end - local ranges = {} --- @type CsvView.Metrics.LogicalFieldRange[] - - -- Handle comment or empty rows - if row.type == "comment" or row:field_count() == 0 then - return ranges - end - - if row.type == "singleline" then - for _, field in row:iter() do - local start_col = field.offset - local range = { --- @type CsvView.Metrics.LogicalFieldRange - start_row = lnum, - start_col = start_col, - end_row = lnum, - end_col = math.max(field.offset + field.len, start_col), - } - table.insert(ranges, range) - end - return ranges - end - - -- Handle multi-line rows - local logical_start_lnum, logical_end_lnum = self:get_logical_row_range(lnum) - for i = logical_start_lnum, logical_end_lnum do - local logical_row = assert(self:row({ lnum = i })) - for col_idx, field in logical_row:iter() do - if not ranges[col_idx] then - ranges[col_idx] = { --- @type CsvView.Metrics.LogicalFieldRange - start_row = i, - start_col = field.offset, - end_row = i, - end_col = field.offset + field.len, - } - else - -- Extend the end row and column if this field continues on the same logical row - ranges[col_idx].end_row = i - ranges[col_idx].end_col = field.offset + field.len - end - end - end - - return ranges + return self._mapper:get_logical_row_fields(lnum) end --- Get the logical field range for a given line number and byte offset. @@ -627,33 +367,7 @@ end ---@return integer col_idx Column index of the field containing the byte offset ---@return CsvView.Metrics.LogicalFieldRange range Logical field range for the given line and offset function CsvViewMetrics:get_logical_field_by_offet(lnum, offset) - -- Convert the byte position to a column index - local ranges = self:get_logical_row_fields({ lnum = lnum }) - if #ranges == 0 then - error(string.format("No fields found for lnum=%d", lnum)) - end - - local col_idx ---@type integer - for i = 2, #ranges do - if lnum < ranges[i].start_row then - col_idx = i - 1 - break - end - if lnum == ranges[i].start_row and offset < ranges[i].start_col then - -- If the line number is the same but the byte position is before the start of this range - col_idx = i - 1 - break - end - end - if not col_idx then - col_idx = #ranges - end - - return col_idx, ranges[col_idx] + return self._mapper:get_logical_field_by_offset(lnum, offset) end ----------------------------------------------------- --- Row functions ----------------------------------------------------- - return CsvViewMetrics diff --git a/lua/csvview/metrics_column.lua b/lua/csvview/metrics_column.lua new file mode 100644 index 0000000..b32c320 --- /dev/null +++ b/lua/csvview/metrics_column.lua @@ -0,0 +1,133 @@ +----------------------------------------------------------------------------- +-- Column Tracker Module +-- Responsible for tracking column max widths and managing recalculation +----------------------------------------------------------------------------- + +--- @class CsvView.ColumnTracker +--- @field private _columns CsvView.Metrics.Column[] +--- @field private _get_row fun(lnum: integer): CsvView.Metrics.Row? function to get row by physical line number +--- @field private _row_count fun(): integer function to get total row count +local ColumnTracker = {} + +--- @class CsvView.Metrics.Column +--- @field max_width integer +--- @field max_row integer +--- @field dirty boolean? whether the column needs recalculation + +--- Create new ColumnTracker instance +---@param get_row fun(lnum: integer): CsvView.Metrics.Row? function to get row by physical line number +---@param row_count fun(): integer function to get total row count +---@return CsvView.ColumnTracker +function ColumnTracker:new(get_row, row_count) + self.__index = self + local obj = { + _columns = {}, + _get_row = get_row, + _row_count = row_count, + } + return setmetatable(obj, self) +end + +--- Clear all column data +function ColumnTracker:clear() + self._columns = {} +end + +--- Get column metrics +---@param col_idx integer 1-indexed column index +---@return CsvView.Metrics.Column? +function ColumnTracker:get(col_idx) + return self._columns[col_idx] +end + +--- Get number of columns +---@return integer +function ColumnTracker:count() + local max_col = 0 + for col_idx, _ in pairs(self._columns) do + if col_idx > max_col then + max_col = col_idx + end + end + return max_col +end + +--- Ensure column exists +---@param col_idx integer +---@return CsvView.Metrics.Column +function ColumnTracker:ensure(col_idx) + if not self._columns[col_idx] then + self._columns[col_idx] = { max_width = 0, max_row = 0, dirty = false } + end + return self._columns[col_idx] +end + +--- Update column width for a specific row +--- Returns true if the global max width increased +---@param col_idx integer 1-indexed column index +---@param row_idx integer 1-indexed row index +---@param width integer display width of the field +function ColumnTracker:update_width(col_idx, row_idx, width) + local column = self:ensure(col_idx) + + if width > column.max_width then + column.max_width = width + column.max_row = row_idx + elseif column.max_row == row_idx and width < column.max_width then + -- [SHRINK_WIDTH] Mark for recalculation if max width shrinks + column.dirty = true + end +end + +--- Mark a column as dirty (needs recalculation) +---@param col_idx integer +function ColumnTracker:mark_dirty(col_idx) + local column = self._columns[col_idx] + if column then + column.dirty = true + end +end + +--- Iterate over all columns +---@return fun(): integer?, CsvView.Metrics.Column? +---@return CsvView.Metrics.Column[] +function ColumnTracker:iter() + return pairs(self._columns) +end + +--- Recalculate a column's max width by scanning all rows +---@param col_idx integer +function ColumnTracker:recalculate(col_idx) + local max_width = 0 + local max_row = nil + + for row_idx = 1, self._row_count() do + local row = self._get_row(row_idx) + local field = row and row:field(col_idx) or nil + if field and field.display_width > max_width then + max_width = field.display_width + max_row = row_idx + end + end + + local column = self._columns[col_idx] + if max_row then + column.max_width = max_width + column.max_row = max_row + column.dirty = false + else + -- Remove column if it is empty + self._columns[col_idx] = nil + end +end + +--- Recalculate all dirty columns +function ColumnTracker:recalculate_dirty() + for col_idx, column in pairs(self._columns) do + if column.dirty then + self:recalculate(col_idx) + end + end +end + +return ColumnTracker diff --git a/lua/csvview/metrics_row_builder.lua b/lua/csvview/metrics_row_builder.lua new file mode 100644 index 0000000..3d1b0a6 --- /dev/null +++ b/lua/csvview/metrics_row_builder.lua @@ -0,0 +1,134 @@ +local CsvViewMetricsRow = require("csvview.metrics_row") + +----------------------------------------------------------------------------- +-- Row Builder Module +-- Responsible for constructing CsvView.Metrics.Row objects from parser output +----------------------------------------------------------------------------- + +local M = {} + +--- Build field info from parser field +---@param field CsvView.Parser.FieldInfo +---@param field_text string +---@return CsvView.Metrics.Field +local function build_field(field, field_text) + return { + offset = field.start_pos - 1, + len = #field_text, + display_width = vim.fn.strdisplaywidth(field_text), + is_number = tonumber(field_text) ~= nil, + } +end + +--- Build field info for multiline field continuation +---@param text string +---@param offset integer +---@return CsvView.Metrics.Field +local function build_multiline_field(text, offset) + return { + offset = offset, + len = #text, + display_width = vim.fn.strdisplaywidth(text), + is_number = false, + } +end + +--- Build single line row +---@param parsed_fields CsvView.Parser.FieldInfo[] +---@return CsvView.Metrics.Row[] +local function build_single_row(parsed_fields) + local row_fields = {} ---@type CsvView.Metrics.Field[] + for _, field in ipairs(parsed_fields) do + local field_text = field.text + assert(type(field_text) == "string") + table.insert(row_fields, build_field(field, field_text)) + end + return { CsvViewMetricsRow.new_single_row(row_fields) } +end + +--- Build multiline rows +---@param lnum integer line number +---@param parsed_fields CsvView.Parser.FieldInfo[] +---@param parsed_endlnum integer end line number of the parsed row +---@param terminated boolean whether the row is terminated +---@return CsvView.Metrics.Row[] +local function build_multiline_rows(lnum, parsed_fields, parsed_endlnum, terminated) + local total_rows = parsed_endlnum - lnum + 1 + local row_fields = {} --- @type table + local row_skipped_ncol = {} --- @type table + + -- Initialize field arrays for each row + for i = 1, total_rows do + row_fields[i] = {} + row_skipped_ncol[i] = 0 + end + + -- First pass: distribute fields to rows and calculate skipped columns + local current_row_index = 1 + for field_index, field in ipairs(parsed_fields) do + local field_text = field.text + + if type(field_text) == "table" then + -- Multi-line field + for i, text in ipairs(field_text) do + -- first line starts at field.start_pos, others are 0 + local offset = i == 1 and field.start_pos - 1 or 0 + table.insert(row_fields[current_row_index], build_multiline_field(text, offset)) + + -- Set skipped columns for continuation rows + if i > 1 and row_skipped_ncol[current_row_index] == 0 then + row_skipped_ncol[current_row_index] = field_index - 1 + end + + -- Move to next row if not the last line of this field + if i ~= #field_text then + current_row_index = current_row_index + 1 + end + end + else + -- Single-line field + table.insert(row_fields[current_row_index], build_field(field, field_text)) + end + end + + -- Second pass: create rows with all fields initialized + local rows = {} --- @type CsvView.Metrics.Row[] + for i = 1, total_rows do + if i == 1 then + rows[i] = CsvViewMetricsRow.new_multiline_start_row(parsed_endlnum - lnum, terminated, row_fields[i]) + else + rows[i] = CsvViewMetricsRow.new_multiline_continuation_row( + i - 1, -- relative start line offset + parsed_endlnum - lnum - i + 1, -- relative end line offset + row_skipped_ncol[i], + terminated, + row_fields[i] + ) + end + end + + return rows +end + +--- Construct row metrics from parser output +---@param lnum integer line number +---@param is_comment boolean +---@param parsed_fields CsvView.Parser.FieldInfo[] +---@param parsed_endlnum integer end line number of the parsed row +---@param terminated boolean whether the row is terminated, if false, parser reached lookahead limit +---@return CsvView.Metrics.Row[] +function M.construct_rows(lnum, is_comment, parsed_fields, parsed_endlnum, terminated) + if is_comment then + return { CsvViewMetricsRow.new_comment_row() } + end + + if parsed_endlnum == lnum then + -- Single line row + return build_single_row(parsed_fields) + end + + -- Multi-line row + return build_multiline_rows(lnum, parsed_fields, parsed_endlnum, terminated) +end + +return M diff --git a/lua/csvview/metrics_row_mapper.lua b/lua/csvview/metrics_row_mapper.lua new file mode 100644 index 0000000..c417640 --- /dev/null +++ b/lua/csvview/metrics_row_mapper.lua @@ -0,0 +1,223 @@ +----------------------------------------------------------------------------- +-- Row Mapper Module +-- Responsible for mapping between physical line numbers and logical row indices +----------------------------------------------------------------------------- + +--- @class CsvView.RowMapper +--- @field private _get_row fun(lnum: integer): CsvView.Metrics.Row? function to get row by physical line number +--- @field private _row_count fun(): integer function to get total row count +local RowMapper = {} + +--- Create new RowMapper instance +---@param get_row fun(lnum: integer): CsvView.Metrics.Row? function to get row by physical line number +---@param row_count fun(): integer function to get total row count +---@return CsvView.RowMapper +function RowMapper:new(get_row, row_count) + self.__index = self + local obj = { + _get_row = get_row, + _row_count = row_count, + } + return setmetatable(obj, self) +end + +--- Get row by physical line number +---@param lnum integer 1-indexed physical line number +---@return CsvView.Metrics.Row? +function RowMapper:_row(lnum) + return self._get_row(lnum) +end + +--- Check if row type is a logical row start +---@param row_type string +---@return boolean +local function is_logical_row_start(row_type) + return row_type == "singleline" or row_type == "multiline_start" or row_type == "comment" +end + +--- Get logical row number from physical line number +---@param physical_lnum integer Physical line number (1-based) +---@return integer? logical_row_num Logical row number (1-based) +function RowMapper:physical_to_logical(physical_lnum) + local logical_row_num = 0 + + for i = 1, physical_lnum do + local row = self:_row(i) + if not row then + return nil -- Out of bounds + end + + -- Count only the start of logical rows + if is_logical_row_start(row.type) then + logical_row_num = logical_row_num + 1 + end + end + + return logical_row_num +end + +--- Get the physical line number for a logical row number +---@param logical_row_num integer Logical row number (1-based) +---@return integer? physical_lnum Physical line number (1-based) +function RowMapper:logical_to_physical(logical_row_num) + local logical_count = 0 + local total_rows = self._row_count() + + for i = 1, total_rows do + local row = self:_row(i) + if not row then + return nil + end + + -- Count only the start of logical rows + if is_logical_row_start(row.type) then + logical_count = logical_count + 1 + if logical_count == logical_row_num then + return i + end + end + end + + return nil -- Not found +end + +--- Get row by logical row index (1-indexed) +---@param row_idx integer 1-indexed CSV row index +---@return CsvView.Metrics.Row? +function RowMapper:get_row_by_row_idx(row_idx) + local logical_row_count = 0 + local total_rows = self._row_count() + + for i = 1, total_rows do + local row = self:_row(i) + if not row then + return nil + end + + -- Count only the start of logical rows + if row.type == "singleline" or row.type == "multiline_start" then + logical_row_count = logical_row_count + 1 + if logical_row_count == row_idx then + return row + end + end + end + + return nil -- Row not found +end + +--- Find the start and end of the logical row containing the given physical line number +---@param lnum integer physical line number +---@return integer logical_start_lnum, integer logical_end_lnum +function RowMapper:get_logical_row_range(lnum) + local row = self:_row(lnum) + if not row then + error(string.format("Row out of bounds lnum=%d", lnum)) + end + + if row.type == "multiline_continuation" then + local start_lnum = lnum - row.start_loffset + local start_row = self:_row(start_lnum) + if not start_row then + error(string.format("Start row not found for lnum=%d", lnum)) + end + local endlnum = start_lnum + start_row.end_loffset + return start_lnum, endlnum + elseif row.type == "multiline_start" then + return lnum, lnum + row.end_loffset + else + return lnum, lnum + end +end + +--- @alias CsvView.Metrics.LogicalFieldRange { start_row: integer, start_col: integer, end_row: integer, end_col: integer } + +--- Get field ranges for a logical row containing the given physical line number. +---@param lnum integer physical line number +---@return CsvView.Metrics.LogicalFieldRange[] ranges List of logical field ranges for the row +function RowMapper:get_logical_row_fields(lnum) + local row = self:_row(lnum) + if not row then + error(string.format("Row not found for lnum=%d", lnum)) + end + + local ranges = {} --- @type CsvView.Metrics.LogicalFieldRange[] + + -- Handle comment or empty rows + if row.type == "comment" or row:field_count() == 0 then + return ranges + end + + if row.type == "singleline" then + for _, field in row:iter() do + local start_col = field.offset + local range = { --- @type CsvView.Metrics.LogicalFieldRange + start_row = lnum, + start_col = start_col, + end_row = lnum, + end_col = math.max(field.offset + field.len, start_col), + } + table.insert(ranges, range) + end + return ranges + end + + -- Handle multi-line rows + local logical_start_lnum, logical_end_lnum = self:get_logical_row_range(lnum) + for i = logical_start_lnum, logical_end_lnum do + local logical_row = self:_row(i) + if not logical_row then + error(string.format("Logical row not found for lnum=%d", i)) + end + + for col_idx, field in logical_row:iter() do + if not ranges[col_idx] then + ranges[col_idx] = { --- @type CsvView.Metrics.LogicalFieldRange + start_row = i, + start_col = field.offset, + end_row = i, + end_col = field.offset + field.len, + } + else + -- Extend the end row and column if this field continues on the same logical row + ranges[col_idx].end_row = i + ranges[col_idx].end_col = field.offset + field.len + end + end + end + + return ranges +end + +--- Get the logical field range for a given line number and byte offset. +---@param lnum integer Line number (1-based) +---@param offset integer Byte offset within the line +---@return integer col_idx Column index of the field containing the byte offset +---@return CsvView.Metrics.LogicalFieldRange range Logical field range for the given line and offset +function RowMapper:get_logical_field_by_offset(lnum, offset) + -- Convert the byte position to a column index + local ranges = self:get_logical_row_fields(lnum) + if #ranges == 0 then + error(string.format("No fields found for lnum=%d", lnum)) + end + + local col_idx ---@type integer + for i = 2, #ranges do + if lnum < ranges[i].start_row then + col_idx = i - 1 + break + end + if lnum == ranges[i].start_row and offset < ranges[i].start_col then + -- If the line number is the same but the byte position is before the start of this range + col_idx = i - 1 + break + end + end + if not col_idx then + col_idx = #ranges + end + + return col_idx, ranges[col_idx] +end + +return RowMapper From e44603c767e9c56cec2dbb6acd2480eb3deda018 Mon Sep 17 00:00:00 2001 From: hat0uma <55551571+hat0uma@users.noreply.github.com> Date: Sun, 14 Dec 2025 01:18:03 +0900 Subject: [PATCH 2/5] refactor(parser): use event-based callbacks and pass line with offset/endpos - Refactor parser to use event-based callbacks (on_field, on_record_start, on_record_end) - Change on_field signature to pass full line with offset/endpos instead of extracted field text - Remove parse_lines, consolidate into parse_line and parse_records - Move FieldBuffer from metrics_row_builder.lua into metrics_row.lua - Update parser tests to use parse_line instead of parse_lines --- lua/csvview/init.lua | 2 + lua/csvview/metrics.lua | 105 ++++-- lua/csvview/metrics_row.lua | 145 ++++--- lua/csvview/metrics_row_builder.lua | 134 ------- lua/csvview/parser.lua | 564 +++++++++++++++++----------- tests/parser_spec.lua | 42 ++- 6 files changed, 564 insertions(+), 428 deletions(-) delete mode 100644 lua/csvview/metrics_row_builder.lua diff --git a/lua/csvview/init.lua b/lua/csvview/init.lua index fc7457f..5856c39 100644 --- a/lua/csvview/init.lua +++ b/lua/csvview/init.lua @@ -70,6 +70,8 @@ function M.enable(bufnr, opts) else -- Handle normal buffer update events view:lock() + -- clear line cache before parsing to ensure fresh data + parser:invalidate_cache() metrics:update(first, last, last_updated, function(err) if err and err ~= "cancelled" then vim.notify("csvview: failed to update metrics: " .. err, vim.log.levels.ERROR) diff --git a/lua/csvview/metrics.lua b/lua/csvview/metrics.lua index 3d1374b..1932784 100644 --- a/lua/csvview/metrics.lua +++ b/lua/csvview/metrics.lua @@ -1,11 +1,10 @@ local nop = function() end local ColumnTracker = require("csvview.metrics_column") -local CsvViewMetricsRow = require("csvview.metrics_row") -local RowBuilder = require("csvview.metrics_row_builder") +local Row = require("csvview.metrics_row") local RowMapper = require("csvview.metrics_row_mapper") ----------------------------------------------------------------------------- --- Metrics class (Facade) +-- Metrics class -- Coordinates row storage, column tracking, and line mapping ----------------------------------------------------------------------------- @@ -180,30 +179,91 @@ end ---@param endlnum integer? if present, compute only specified range ---@param on_end fun(err:string|nil) callback for when the update is complete function CsvViewMetrics:_compute_metrics(startlnum, endlnum, on_end) - -- Parse specified range and update metrics. - self._parser:parse_lines(self._opts.parser.async_chunksize, { - on_line = function(lnum, is_comment, fields, parsed_endlnum, terminated) - local new_endlnum = nil ---@type integer? - local rows = RowBuilder.construct_rows(lnum, is_comment, fields, parsed_endlnum, terminated) - assert(#rows == parsed_endlnum - lnum + 1, "Invalid number of rows computed") - - -- Update row metrics and adjust column metrics - for i, row in ipairs(rows) do - local line = lnum + i - 1 - local prev_row = self._rows[line] - self._rows[line] = row - - if prev_row and prev_row.type == "multiline_start" and row.type == "multiline_continuation" then + -- State for building rows from parse events + local field_buffer = Row.FieldBuffer:new() + local line_field_counts = {} ---@type integer[] + local record_start_lnum = 0 + + -- Parse using parse_records with event callbacks + self._parser:parse_records(self._opts.parser.async_chunksize, { + on_comment = function(lnum) + local prev_row = self._rows[lnum] + local row = Row.new_comment() + self._rows[lnum] = row + self:_mark_recalculation_on_decrease_fields(lnum, prev_row, row) + end, + + on_record_start = function(lnum) + -- clear record state + record_start_lnum = lnum + field_buffer:reset() + for k in pairs(line_field_counts) do + line_field_counts[k] = nil + end + end, + + on_field = function(_, lnum, line, offset, endpos) + local len = endpos - offset -- endpos is 1-based end position, offset is 0-based start + local text = string.sub(line, offset + 1, endpos) + local display_width = vim.fn.strdisplaywidth(text, offset) + local is_number = tonumber(text) ~= nil + field_buffer:add(offset, len, display_width, is_number) + + -- field count per lines + local rel_idx = lnum - record_start_lnum + line_field_counts[rel_idx] = (line_field_counts[rel_idx] or 0) + 1 + end, + + on_record_end = function(record_start, record_end, terminated) + local new_endlnum = nil + local is_multiline = record_start ~= record_end + local current_skipped = 0 + + -- Track buffer offset as we consume fields for each row + local current_buffer_offset = 0 + for lnum = record_start, record_end do + local prev_row = self._rows[lnum] + + local rel_idx = lnum - record_start + local field_count = line_field_counts[rel_idx] or 0 + local skipped_ncol = current_skipped + + -- Create appropriate row type + local new_row --- @type CsvView.Metrics.Row + if not is_multiline then + new_row = Row.new_singleline(field_count) + elseif lnum == record_start then + local endloffset = record_end - record_start + new_row = Row.new_multiline_start(field_count, endloffset, terminated) + else + local start_loffset = lnum - record_start + local end_loffset = record_end - lnum + new_row = Row.new_multiline_continuation(field_count, start_loffset, end_loffset, skipped_ncol, terminated) + end + + -- Copy fields from buffer to row + field_buffer:copy_to_row(new_row, current_buffer_offset, field_count) + current_buffer_offset = current_buffer_offset + field_count + + -- update skipped count + if field_count > 0 then + current_skipped = current_skipped + (field_count - 1) + end + + self._rows[lnum] = new_row + if prev_row and prev_row.type == "multiline_start" and new_row.type == "multiline_continuation" then -- If the structure of the multi-line field is broken, it affects all subsequent rows, -- so all rows need to be recalculated. new_endlnum = vim.api.nvim_buf_line_count(self._bufnr) - 1 end - self:_mark_recalculation_on_decrease_fields(line, prev_row, row) - self:_update_column_metrics_for_row(line) + self:_mark_recalculation_on_decrease_fields(lnum, prev_row, new_row) + self:_update_column_metrics_for_row(lnum) end + return new_endlnum end, + on_end = function(err) if err then on_end(err) @@ -212,7 +272,6 @@ function CsvViewMetrics:_compute_metrics(startlnum, endlnum, on_end) -- Recalculate dirty columns self._columns:recalculate_dirty() - on_end() end, }, startlnum, endlnum, self._current_parse) @@ -302,7 +361,7 @@ function CsvViewMetrics:_add_row_placeholders(start, num) self._rows[i + num] = self._rows[i] end for i = start, start + num - 1 do - self._rows[i] = CsvViewMetricsRow.new_single_row({}) + self._rows[i] = Row.new_singleline(0) end end @@ -325,10 +384,6 @@ function CsvViewMetrics:_remove_rows(start, num) end end ------------------------------------------------------------------------------ --- Mapping API (delegated to RowMapper) ------------------------------------------------------------------------------ - --- Find the start of the logical row containing the given physical line number ---@param lnum integer physical line number ---@return integer logical_start_lnum, integer logical_end_lnum diff --git a/lua/csvview/metrics_row.lua b/lua/csvview/metrics_row.lua index 7e8726d..6b3f167 100644 --- a/lua/csvview/metrics_row.lua +++ b/lua/csvview/metrics_row.lua @@ -1,6 +1,6 @@ local ffi = require("ffi") -local CsvViewMetricsRow = {} +local M = {} --- @class CsvView.Metrics.Field: ffi.cdata* --- @field offset integer @@ -180,6 +180,19 @@ function prototype.get_type(row) end end +--- Set field data at the specified index +---@param row CsvView.Metrics._RowStruct +---@param index integer 1-based index +---@param text string +---@param offset integer +function prototype.set_field(row, index, text, offset) + local field = row._fields[index - 1] ---@type CsvView.Metrics.Field + field.offset = offset + field.len = #text + field.display_width = vim.fn.strdisplaywidth(text) + field.is_number = tonumber(text) ~= nil +end + ----------------------------------------- -- Create a new row type ----------------------------------------- @@ -188,7 +201,7 @@ local csvview_row_t = ffi.metatype("csvview_row_t", mt) --- Create a new comment row ---@return CsvView.Metrics.CommentRow -function CsvViewMetricsRow.new_comment_row() +function M.new_comment() ---@diagnostic disable-next-line: assign-type-mismatch local row = csvview_row_t(0) ---@type CsvView.Metrics.CommentRow row._type = ROW_TYPE.COMMENT @@ -197,81 +210,123 @@ function CsvViewMetricsRow.new_comment_row() end --- Create a new single line row ----@param fields CsvView.Metrics.Field[] +---@param field_count integer ---@return CsvView.Metrics.SinglelineRow -function CsvViewMetricsRow.new_single_row(fields) - local field_count = #fields - +function M.new_singleline(field_count) ---@diagnostic disable-next-line: assign-type-mismatch local row = csvview_row_t(field_count) ---@type CsvView.Metrics.SinglelineRow row._type = ROW_TYPE.SINGLELINE - row._field_count = field_count - for i, field_info in ipairs(fields) do - local field = row._fields[i - 1] ---@type CsvView.Metrics.Field - field.offset = field_info.offset - field.len = field_info.len - field.display_width = field_info.display_width - field.is_number = field_info.is_number - end - return row end --- Create a new multiline start row +---@param field_count integer ---@param end_loffset integer ---@param terminated boolean ----@param fields CsvView.Metrics.Field[] ---@return CsvView.Metrics.MultilineStartRow -function CsvViewMetricsRow.new_multiline_start_row(end_loffset, terminated, fields) - local field_count = #fields - +function M.new_multiline_start(field_count, end_loffset, terminated) ---@diagnostic disable-next-line: assign-type-mismatch local row = csvview_row_t(field_count) ---@type CsvView.Metrics.MultilineStartRow row._type = ROW_TYPE.MULTILINE_START - row._terminated = terminated and 1 or 0 - row._end_loffset = end_loffset - row._field_count = field_count - for i, field_info in ipairs(fields) do - local field = row._fields[i - 1] ---@type CsvView.Metrics.Field - field.offset = field_info.offset - field.len = field_info.len - field.display_width = field_info.display_width - field.is_number = field_info.is_number - end - + row._end_loffset = end_loffset + row._terminated = terminated and 1 or 0 return row end --- Create a new multiline continuation row +---@param field_count integer ---@param start_loffset integer ---@param end_loffset integer ---@param skipped_ncol integer ---@param terminated boolean ----@param fields CsvView.Metrics.Field[] ---@return CsvView.Metrics.MultilineContinuationRow -function CsvViewMetricsRow.new_multiline_continuation_row(start_loffset, end_loffset, skipped_ncol, terminated, fields) - local field_count = #fields - - --- @diagnostic disable-next-line: assign-type-mismatch +function M.new_multiline_continuation(field_count, start_loffset, end_loffset, skipped_ncol, terminated) + ---@diagnostic disable-next-line: assign-type-mismatch local row = csvview_row_t(field_count) ---@type CsvView.Metrics.MultilineContinuationRow row._type = ROW_TYPE.MULTILINE_CONTINUATION - row._terminated = terminated and 1 or 0 + row._field_count = field_count row._start_loffset = start_loffset row._end_loffset = end_loffset row._skipped_ncol = skipped_ncol + row._terminated = terminated and 1 or 0 + return row +end - row._field_count = field_count - for i, field_info in ipairs(fields) do - local field = row._fields[i - 1] ---@type CsvView.Metrics.Field - field.offset = field_info.offset - field.len = field_info.len - field.display_width = field_info.display_width - field.is_number = field_info.is_number +----------------------------------------------------------------------------- +-- FieldBuffer: Temporary buffer for parsing +----------------------------------------------------------------------------- + +--- @class CsvView.FieldBuffer +--- @field private _buffer ffi.cdata* FFI array of csvview_field_t +--- @field private _capacity integer current buffer capacity +--- @field private _index integer current write index (0-based) +local FieldBuffer = {} +FieldBuffer.__index = FieldBuffer + +--- Create a new FieldBuffer +---@param initial_capacity integer? initial capacity (default: 8192) +---@return CsvView.FieldBuffer +function FieldBuffer:new(initial_capacity) + initial_capacity = initial_capacity or 8192 + local obj = setmetatable({}, self) + obj._buffer = ffi.new("csvview_field_t[?]", initial_capacity) + obj._capacity = initial_capacity + obj._index = 0 + return obj +end + +--- Grow the buffer to accommodate more fields +---@private +function FieldBuffer:_grow() + local new_capacity = self._capacity * 2 + local new_buffer = ffi.new("csvview_field_t[?]", new_capacity) + ffi.copy(new_buffer, self._buffer, self._index * ffi.sizeof("csvview_field_t")) + self._buffer = new_buffer + self._capacity = new_capacity +end + +--- Reset the buffer for a new record +function FieldBuffer:reset() + self._index = 0 +end + +--- Add a field to the buffer +---@param offset integer 0-based byte offset +---@param len integer field length in bytes +---@param display_width integer display width +---@param is_number boolean whether the field is a number +function FieldBuffer:add(offset, len, display_width, is_number) + if self._index >= self._capacity then + self:_grow() end - return row + local field = self._buffer[self._index] ---@type CsvView.Metrics.Field + field.offset = offset + field.len = len + field.display_width = display_width + field.is_number = is_number + + self._index = self._index + 1 +end + +--- Get current field count +---@return integer +function FieldBuffer:count() + return self._index end -return CsvViewMetricsRow +--- Copy a slice of fields to a row's _fields array +---@param row CsvView.Metrics._RowStruct destination row +---@param start_offset integer 0-based start index in buffer +---@param field_count integer number of fields to copy +function FieldBuffer:copy_to_row(row, start_offset, field_count) + if field_count > 0 then + ffi.copy(row._fields, self._buffer + start_offset, field_count * ffi.sizeof("csvview_field_t")) + end +end + +M.FieldBuffer = FieldBuffer + +return M diff --git a/lua/csvview/metrics_row_builder.lua b/lua/csvview/metrics_row_builder.lua deleted file mode 100644 index 3d1b0a6..0000000 --- a/lua/csvview/metrics_row_builder.lua +++ /dev/null @@ -1,134 +0,0 @@ -local CsvViewMetricsRow = require("csvview.metrics_row") - ------------------------------------------------------------------------------ --- Row Builder Module --- Responsible for constructing CsvView.Metrics.Row objects from parser output ------------------------------------------------------------------------------ - -local M = {} - ---- Build field info from parser field ----@param field CsvView.Parser.FieldInfo ----@param field_text string ----@return CsvView.Metrics.Field -local function build_field(field, field_text) - return { - offset = field.start_pos - 1, - len = #field_text, - display_width = vim.fn.strdisplaywidth(field_text), - is_number = tonumber(field_text) ~= nil, - } -end - ---- Build field info for multiline field continuation ----@param text string ----@param offset integer ----@return CsvView.Metrics.Field -local function build_multiline_field(text, offset) - return { - offset = offset, - len = #text, - display_width = vim.fn.strdisplaywidth(text), - is_number = false, - } -end - ---- Build single line row ----@param parsed_fields CsvView.Parser.FieldInfo[] ----@return CsvView.Metrics.Row[] -local function build_single_row(parsed_fields) - local row_fields = {} ---@type CsvView.Metrics.Field[] - for _, field in ipairs(parsed_fields) do - local field_text = field.text - assert(type(field_text) == "string") - table.insert(row_fields, build_field(field, field_text)) - end - return { CsvViewMetricsRow.new_single_row(row_fields) } -end - ---- Build multiline rows ----@param lnum integer line number ----@param parsed_fields CsvView.Parser.FieldInfo[] ----@param parsed_endlnum integer end line number of the parsed row ----@param terminated boolean whether the row is terminated ----@return CsvView.Metrics.Row[] -local function build_multiline_rows(lnum, parsed_fields, parsed_endlnum, terminated) - local total_rows = parsed_endlnum - lnum + 1 - local row_fields = {} --- @type table - local row_skipped_ncol = {} --- @type table - - -- Initialize field arrays for each row - for i = 1, total_rows do - row_fields[i] = {} - row_skipped_ncol[i] = 0 - end - - -- First pass: distribute fields to rows and calculate skipped columns - local current_row_index = 1 - for field_index, field in ipairs(parsed_fields) do - local field_text = field.text - - if type(field_text) == "table" then - -- Multi-line field - for i, text in ipairs(field_text) do - -- first line starts at field.start_pos, others are 0 - local offset = i == 1 and field.start_pos - 1 or 0 - table.insert(row_fields[current_row_index], build_multiline_field(text, offset)) - - -- Set skipped columns for continuation rows - if i > 1 and row_skipped_ncol[current_row_index] == 0 then - row_skipped_ncol[current_row_index] = field_index - 1 - end - - -- Move to next row if not the last line of this field - if i ~= #field_text then - current_row_index = current_row_index + 1 - end - end - else - -- Single-line field - table.insert(row_fields[current_row_index], build_field(field, field_text)) - end - end - - -- Second pass: create rows with all fields initialized - local rows = {} --- @type CsvView.Metrics.Row[] - for i = 1, total_rows do - if i == 1 then - rows[i] = CsvViewMetricsRow.new_multiline_start_row(parsed_endlnum - lnum, terminated, row_fields[i]) - else - rows[i] = CsvViewMetricsRow.new_multiline_continuation_row( - i - 1, -- relative start line offset - parsed_endlnum - lnum - i + 1, -- relative end line offset - row_skipped_ncol[i], - terminated, - row_fields[i] - ) - end - end - - return rows -end - ---- Construct row metrics from parser output ----@param lnum integer line number ----@param is_comment boolean ----@param parsed_fields CsvView.Parser.FieldInfo[] ----@param parsed_endlnum integer end line number of the parsed row ----@param terminated boolean whether the row is terminated, if false, parser reached lookahead limit ----@return CsvView.Metrics.Row[] -function M.construct_rows(lnum, is_comment, parsed_fields, parsed_endlnum, terminated) - if is_comment then - return { CsvViewMetricsRow.new_comment_row() } - end - - if parsed_endlnum == lnum then - -- Single line row - return build_single_row(parsed_fields) - end - - -- Multi-line row - return build_multiline_rows(lnum, parsed_fields, parsed_endlnum, terminated) -end - -return M diff --git a/lua/csvview/parser.lua b/lua/csvview/parser.lua index b4a9f12..64f30be 100644 --- a/lua/csvview/parser.lua +++ b/lua/csvview/parser.lua @@ -1,58 +1,147 @@ local util = require("csvview.util") ----@class CsvView.Parser.FieldInfo ----@field start_pos integer 1-based start position of the fields ----@field text string|string[] the text of the field. if the field is a quoted field, it will be a string array. +local str_byte = string.byte +local str_sub = string.sub + +---@class CsvView.Parser.AsyncChunkOptions +---@field chunksize integer +---@field startlnum integer +---@field endlnum integer +---@field cancel_token? { cancelled: boolean } +---@field on_end fun(err: string?) + +--- Run async chunked processing +---@param opts CsvView.Parser.AsyncChunkOptions +---@param process_chunk fun(chunk_start: integer, chunk_end: integer): integer, integer? +--- Returns: next_lnum, new_endlnum? +local function run_async_chunked(opts, process_chunk) + local current_lnum = opts.startlnum + local endlnum = opts.endlnum + + -- Notification wrapper for long operations + local iter_num = (endlnum - opts.startlnum) / opts.chunksize + local on_success = opts.on_end + if iter_num > 1000 then + local start_time = vim.uv.now() + vim.notify("csvview: parsing buffer, please wait...") + on_success = function() + opts.on_end() + local elapsed = vim.uv.now() - start_time + vim.notify(string.format("csvview: parsing buffer done in %d[ms]", elapsed)) + end + end ---- ----@class Csvview.Parser.Callbacks ---- ---- the callback to be called for each parsed line ---- If the callback returns a new end line number, the parser will continue parsing until that line. ----@field on_line fun(lnum:integer,is_comment:boolean,fields:CsvView.Parser.FieldInfo[], endlnum: integer, terminated:boolean): integer? ---- ---- the callback to be called when parsing is done. ---- If an error occurs, the `err` parameter will be a string with the error message. ---- "cancelled" will be passed if the parsing was cancelled. ----@field on_end fun(err?:string) - ----@class CsvView.Parser.DelimiterPolicy ----@field match fun(s:string, pos:integer, char:integer, match_count:integer): CsvView.Parser.DelimiterPolicy.MatchState - ----@enum CsvView.Parser.DelimiterPolicy.MatchState -local MatchState = { - NO_MATCH = 0, - MATCHING = 1, - MATCH_COMPLETE = 2, -} - ---- Plain text delimiter ----@param delim string ----@return CsvView.Parser.DelimiterPolicy -local function plain_text_delimiter(delim) - local delim_len = #delim - local delim_bytes = { string.byte(delim, 1, delim_len) } - return { ---@type CsvView.Parser.DelimiterPolicy - match = function(_, _, char, match_count) - if char == delim_bytes[match_count + 1] then - return match_count + 1 == delim_len and MatchState.MATCH_COMPLETE or MatchState.MATCHING - else - return MatchState.NO_MATCH - end - end, - } + local iter ---@type fun() + local function do_step() + local ok, err = xpcall(iter, util.wrap_stacktrace) + if not ok then + opts.on_end(util.format_error(err)) + end + end + + iter = function() + if opts.cancel_token and opts.cancel_token.cancelled then + opts.on_end("cancelled") + return + end + + local chunk_end = math.min(current_lnum + opts.chunksize - 1, endlnum) + local next_lnum, new_endlnum = process_chunk(current_lnum, chunk_end) + current_lnum = next_lnum + if new_endlnum then + endlnum = new_endlnum + end + + if current_lnum <= endlnum then + vim.schedule(do_step) + else + on_success() + end + end + + do_step() end --- @class CsvView.Parser.Source ---- @field get_line fun(lnum:integer):string? Function to get a line by line number. lnum is 1-indexed. ---- @field get_line_count fun():integer Function to get the total number of lines in the buffer. +--- @field get_line fun(lnum:integer):string? +--- @field get_line_count fun():integer +--- @field invalidate? fun() + +--- New buffer source +---@param bufnr integer +---@param chunk_size integer +---@return CsvView.Parser.Source +local function new_buffer_source(bufnr, chunk_size) + local cache = nil --- @type string[]? + local cache_start = 0 --- @type integer + local cache_end = -1 --- @type integer + local total_lines = nil --- @type integer? + + --- Get line + ---@param lnum integer + ---@return string + local function get_line(lnum) + -- Check if line is in current cache + if cache and lnum >= cache_start and lnum <= cache_end then + return cache[lnum - cache_start + 1] + end + + -- Ensure total_lines is initialized + if not total_lines then + total_lines = vim.api.nvim_buf_line_count(bufnr) + end + + -- Cache miss: Fetch next chunk (e.g., 100 lines) + local start_row = lnum - 1 + local end_row = math.min(start_row + chunk_size, total_lines) + + cache = vim.api.nvim_buf_get_lines(bufnr, start_row, end_row, true) + cache_start = lnum + cache_end = lnum + #cache - 1 + + return cache[1] + end + + local function get_line_count() + if total_lines then + return total_lines + end + + total_lines = vim.api.nvim_buf_line_count(bufnr) + return total_lines + end + + local function invalidate() + cache = nil + cache_start = 0 + cache_end = -1 + total_lines = nil + end + + return { --- @type CsvView.Parser.Source + get_line_count = get_line_count, + get_line = get_line, + invalidate = invalidate, + } +end + +---@class CsvView.Parser.FieldInfo +---@field start_pos integer 1-based start position of the fields +---@field text string|string[] the text of the field. if the field is a quoted field, it will be a string array. + +---@class CsvView.Parser.Events +---@field comment fun(lnum: integer) +---@field record_start fun(startlnum: integer) +---@field record_end fun(startlnum: integer, endlnum: integer, terminated: boolean) +---@field field fun(col_idx: integer, lnum: integer, line: string, offset: integer, endpos: integer) ---@class CsvView.Parser ----@field private _quote_char integer Quote character byte. ----@field private _delimiter CsvView.Parser.DelimiterPolicy Delimiter policy. ----@field private _comments string[] Comment prefixes. ----@field private _max_lookahead integer Maximum number of lines to look ahead for multi-line fields. ----@field private _source CsvView.Parser.Source Source for getting lines +---@field private _quote_char integer +---@field private _delim_bytes integer[] +---@field private _delim_str string +---@field private _comments string[] +---@field private _max_lookahead integer +---@field private _source CsvView.Parser.Source local CsvViewParser = {} CsvViewParser.__index = CsvViewParser @@ -65,17 +154,11 @@ CsvViewParser.__index = CsvViewParser function CsvViewParser:new(bufnr, opts, quote_char, delimiter) local obj = setmetatable({}, self) obj._quote_char = quote_char:byte() - obj._delimiter = plain_text_delimiter(delimiter) + obj._delim_bytes = { delimiter:byte(1, #delimiter) } + obj._delim_str = delimiter obj._comments = opts.parser.comments obj._max_lookahead = opts.parser.max_lookahead - obj._source = { - get_line = function(lnum) - return vim.api.nvim_buf_get_lines(bufnr, lnum - 1, lnum, true)[1] - end, - get_line_count = function() - return vim.api.nvim_buf_line_count(bufnr) - end, - } + obj._source = new_buffer_source(bufnr, 1000) return obj end @@ -89,7 +172,8 @@ end function CsvViewParser:new_with_source(quote_char, delimiter, comments, max_lookahead, source) local obj = setmetatable({}, self) obj._quote_char = quote_char - obj._delimiter = plain_text_delimiter(delimiter) + obj._delim_bytes = { delimiter:byte(1, #delimiter) } + obj._delim_str = delimiter obj._comments = comments or {} obj._max_lookahead = max_lookahead obj._source = source @@ -108,212 +192,266 @@ function CsvViewParser:_is_comment_line(line) return false end ---- Parse CSV logical line. ----@param lnum integer 1-indexed line number. ----@return boolean is_comment_line Whether the line is a comment line. ----@return CsvView.Parser.FieldInfo[] fields An array of field information. ----@return integer endlnum The end line number. ----@return boolean terminated Whether the closing quote was found within the lookahead limit. -function CsvViewParser:parse_line(lnum) - -- Assume CSV format compliant with RFC 4180 - -- - Each record is separated by a newline or delimiter. - -- - If a field contains commas or newlines, enclose it in quote characters. - -- - If a field contains quote characters, escape them by doubling the quote characters. - -- - -- Additional rules - -- - Ignore comment lines - -- - Limit the logical line parsing to a certain number of lines ahead - -- (Parsing is triggered by user edits, so without this limit, adding a quote would re-parse all lines.) - local fields = {} ---@type CsvView.Parser.FieldInfo[] - local terminated = true - local current_lnum = lnum +function CsvViewParser:invalidate_cache() + if self._source.invalidate then + self._source.invalidate() + end +end - -- Get initial line +--- Returns an iterator that yields parsing events for the record starting at `lnum`. +---@param lnum integer +---@param events CsvView.Parser.Events +function CsvViewParser:parse_record(lnum, events) local line = self._source.get_line(lnum) if not line then - return false, fields, current_lnum, terminated + return end - -- Check if the line is a comment line + -- Comment Check if self:_is_comment_line(line) then - return true, fields, current_lnum, terminated + events.comment(lnum) + return end - local line_count = self._source.get_line_count() + events.record_start(lnum) + if #line == 0 then + events.record_end(lnum, lnum, true) + return + end + local len = #line local pos = 1 - local delimiter_match_count = 0 - local field_start = { lnum = lnum, pos = 1 } - local multiline_field_parts = {} ---@type string[] - - --- Skip until the closing quote is found. - --- This function advances `pos` until the closing quote is found. - --- If the closing quote is not found within the lookahead limit, it returns false. - ---@return boolean closed - local function skip_until_closing_quote() - while true do - local found - found, pos = self:_find_closing_quote_within_line(line, pos) - if found then - return true - end + local field_start = 1 + local col_idx = 1 + local current_lnum = lnum + local terminated = true + + local delim_bytes = self._delim_bytes + local delim_first_byte = delim_bytes[1] + local max_lookahead = self._max_lookahead + local delim_len = #self._delim_bytes + local quote_char = self._quote_char + local source = self._source - if current_lnum >= math.min(lnum + self._max_lookahead, line_count) then - -- Reached the lookahead limit without finding the closing quote - terminated = false - return false + while pos <= len do + local b = str_byte(line, pos) + + if b == quote_char then + -- QUOTED FIELD + pos = pos + 1 -- Skip opening quote + + while true do + local close_pos = self:_find_closing_quote(line, pos) + if close_pos then + -- Found closing quote on this line + pos = close_pos + 1 + break + end + + -- Multi-line field logic + -- Grab rest of line + events.field(col_idx, current_lnum, line, field_start - 1, #line) + + -- Check limits + if current_lnum >= math.min(lnum + max_lookahead, source.get_line_count()) then + terminated = false + events.record_end(lnum, current_lnum, terminated) + return + end + + -- Fetch next line + current_lnum = current_lnum + 1 + local next_line = source.get_line(current_lnum) + if not next_line then -- EOF + terminated = false + events.record_end(lnum, current_lnum, terminated) + return + end + -- Reset for new line + line = next_line + len = #line + pos = 1 + field_start = 1 end - -- Add the current line to the field text and continue to the next line - local part = current_lnum == field_start.lnum and line:sub(field_start.pos) or line - table.insert(multiline_field_parts, part) - - -- Look for the next line - current_lnum = current_lnum + 1 - pos = 1 - line = self._source.get_line(current_lnum) - if not line then - -- Reached the end of the buffer - return false + -- DELIMITER CHECK + elseif b == delim_first_byte then + local is_match = true + if delim_len > 1 then + -- Compare remaining bytes of multi-char delimiter + for i = 2, delim_len do + if str_byte(line, pos + i - 1) ~= delim_bytes[i] then + is_match = false + break + end + end end - end - end - --- Add a field to the list of fields. - ---@param end_pos integer - local function add_field(end_pos) - local is_field_multiline = current_lnum > field_start.lnum - if is_field_multiline then - local text = line:sub(1, end_pos) - table.insert(multiline_field_parts, text) - table.insert(fields, { start_pos = field_start.pos, text = multiline_field_parts }) - multiline_field_parts = {} + if is_match then + -- Field Complete + events.field(col_idx, current_lnum, line, field_start - 1, pos - 1) + + col_idx = col_idx + 1 + pos = pos + delim_len + field_start = pos + else + pos = pos + 1 + end else - local text = line:sub(field_start.pos, end_pos) - table.insert(fields, { start_pos = field_start.pos, text = text }) + -- Normal character, just advance + pos = pos + 1 end end - -- Process the current line and potentially look ahead for multi-line fields - while pos <= #line do - local char = line:byte(pos) - if char == self._quote_char then - pos = pos + 1 - local closed = skip_until_closing_quote() - if not closed then - -- Could not find the closing quote within the lookahead limit - -- Treat the rest of the line as a single field - break + -- Finalize last field + events.field(col_idx, current_lnum, line, field_start - 1, len) + events.record_end(lnum, current_lnum, terminated) +end + +local function create_field_collector() + local fields = {} ---@type CsvView.Parser.FieldInfo[] + local current_field = nil ---@type CsvView.Parser.FieldInfo? + local current_col = 0 + local is_comment = false + + local events = { + record_start = function() end, + comment = function() + is_comment = true + end, + field_newline = function() end, + record_end = function() + if current_field then + table.insert(fields, current_field) + current_field = nil end - else - local delimiter_match_state = self._delimiter.match(line, pos, char, delimiter_match_count) - if delimiter_match_state == MatchState.MATCHING then - -- A delimiter match is in progress - delimiter_match_count = delimiter_match_count + 1 - elseif delimiter_match_state == MatchState.MATCH_COMPLETE then - -- A complete delimiter match is found - add_field(pos - (delimiter_match_count + 1)) - field_start.lnum = current_lnum - field_start.pos = pos + 1 - delimiter_match_count = 0 + end, + field = function(col_idx, _, line, offset, len) + local text = str_sub(line, offset + 1, len) + if col_idx ~= current_col then + if current_field then + table.insert(fields, current_field) + end + current_field = { start_pos = offset + 1, text = text } + current_col = col_idx else - -- No match, reset the delimiter match count - delimiter_match_count = 0 + -- Append to existing field (multiline) + local t = current_field.text + if type(t) == "table" then + table.insert(t, text) + else + current_field.text = { t, text } + end end - end - - pos = pos + 1 + end, + } + return events, function() + return fields, is_comment end +end - -- Add the last field to the list - if pos > 1 or field_start.lnum ~= current_lnum then - add_field(pos - 1) +--- Parse a single line and return field info table +---@param lnum integer +---@return boolean is_comment +---@return CsvView.Parser.FieldInfo[] fields +---@return integer endlnum +---@return boolean terminated +function CsvViewParser:parse_line(lnum) + local events, get_result = create_field_collector() + local endlnum_result = lnum + local terminated_result = true + + -- Hook into record_end to capture status + local original_end = events.record_end + events.record_end = function(_, endlnum, terminated) + endlnum_result = endlnum + terminated_result = terminated + original_end() end - return false, fields, current_lnum, terminated + self:parse_record(lnum, events) + local fields, is_comment = get_result() + return is_comment, fields, endlnum_result, terminated_result end ---- Parse CSV lines. ----@param async_chunksize integer The number of lines to parse in each async step. ----@param cb Csvview.Parser.Callbacks ----@param startlnum? integer 1-indexed start line number. ----@param endlnum? integer 1-indexed end line number. ----@param cancel_token? { cancelled:boolean } -function CsvViewParser:parse_lines(async_chunksize, cb, startlnum, endlnum, cancel_token) +---@class CsvView.Parser.RecordCallbacks +---@field on_comment fun(lnum: integer) called for comment lines +---@field on_record_start fun(lnum: integer) called when a record starts +---@field on_field fun(col_idx: integer, lnum: integer, line: string, offset: integer, endpos: integer) called for each field +---@field on_record_end fun(startlnum: integer, endlnum: integer, terminated: boolean): integer? called when a record ends. Returns new endlnum if needed. +---@field on_end fun(err: string?) called when parsing is complete + +--- Parse records using event-based callbacks with async chunking +---@param async_chunksize integer +---@param cb CsvView.Parser.RecordCallbacks +---@param startlnum? integer +---@param endlnum? integer +---@param cancel_token? { cancelled: boolean } +function CsvViewParser:parse_records(async_chunksize, cb, startlnum, endlnum, cancel_token) startlnum = startlnum or 1 endlnum = endlnum or self._source.get_line_count() - local iter_num = (endlnum - startlnum) / async_chunksize - local on_success = cb.on_end - local should_notify = iter_num > 1000 - if should_notify then - local start_time = vim.uv.now() - vim.notify("csvview: parsing buffer, please wait...") - on_success = function() - cb.on_end() - local elapsed = vim.loop.now() - start_time - vim.notify(string.format("csvview: parsing buffer done in %d[ms]", elapsed)) - end - end - - local iter --- @type fun():nil - local function do_step() - local ok, err = xpcall(iter, util.wrap_stacktrace) - if not ok then - cb.on_end(util.format_error(err)) - end - end - - local current_lnum = startlnum - iter = function() - if cancel_token and cancel_token.cancelled then - cb.on_end("cancelled") - return - end + local current_record_end = startlnum + local endlnum_override = nil - local chunk_end = math.min(current_lnum + async_chunksize - 1, endlnum) - while current_lnum <= chunk_end do - local is_comment, fields, parse_endlnum, closed = self:parse_line(current_lnum) - local new_endlnum = cb.on_line(current_lnum, is_comment, fields, parse_endlnum, closed) - current_lnum = parse_endlnum + 1 - if new_endlnum then - endlnum = new_endlnum - end - end + -- Create event callbacks + local events = { + comment = function(lnum) + current_record_end = lnum + cb.on_comment(lnum) + end, + record_start = function(lnum) + cb.on_record_start(lnum) + end, + record_end = function(start_lnum, end_lnum, terminated) + current_record_end = end_lnum + endlnum_override = cb.on_record_end(start_lnum, end_lnum, terminated) + end, + field = cb.on_field, + } - if current_lnum <= endlnum then - vim.schedule(do_step) - else - on_success() + run_async_chunked({ + chunksize = async_chunksize, + startlnum = startlnum, + endlnum = endlnum, + cancel_token = cancel_token, + on_end = cb.on_end, + }, function(chunk_start, chunk_end) + local lnum = chunk_start + while lnum <= chunk_end do + self:parse_record(lnum, events) + lnum = current_record_end + 1 end - end - - -- start parsing - do_step() + local new_end = endlnum_override + endlnum_override = nil + return lnum, new_end + end) end --- Find the closing quote for a quoted field. ---@param line string The line to search in. mutate ----@param pos integer The starting position to search from. ----@return boolean found Whether the closing quote was found. ----@return integer pos The position of the closing quote. -function CsvViewParser:_find_closing_quote_within_line(line, pos) +---@param start_pos integer The starting position to search from. +---@return integer? pos The position of the closing quote. +function CsvViewParser:_find_closing_quote(line, start_pos) local len = #line + local q = self._quote_char + local pos = start_pos + while pos <= len do - if line:byte(pos) == self._quote_char then - if line:byte(pos + 1) == self._quote_char then + if str_byte(line, pos) == q then + if str_byte(line, pos + 1) == q then -- This is an escaped quote, skip the next character pos = pos + 1 else -- This is the end of the quoted field - return true, pos + return pos end end pos = pos + 1 end - return false, pos + return nil end return CsvViewParser diff --git a/tests/parser_spec.lua b/tests/parser_spec.lua index 74b9bca..927e555 100644 --- a/tests/parser_spec.lua +++ b/tests/parser_spec.lua @@ -109,6 +109,20 @@ local cases = { }, }, }, + { + it = "should parse line with quoted comma2", + lines = { 'abc,de,"f,g"' }, + expected = { + { + is_comment = false, + fields = { + { start_pos = 1, text = "abc" }, + { start_pos = 5, text = "de" }, + { start_pos = 8, text = '"f,g"' }, + }, + }, + }, + }, { it = "should parse line with missing closing quotes", lines = { 'abc,de,"f,g,h' }, @@ -490,20 +504,26 @@ describe("CsvViewParser", function() vim.api.nvim_buf_set_lines(bufnr, 0, #case.lines, false, case.lines) - local thread = coroutine.running() local results = {} ---@type { is_comment: boolean?, fields: CsvView.Parser.FieldInfo[] }[] - parser:parse_lines(opts.parser.async_chunksize, { - on_end = vim.schedule_wrap(function() - coroutine.resume(thread) - end), - on_line = function(lnum, is_comment, fields) - table.insert(results, { is_comment = is_comment, fields = fields }) - end, - }, case.startlnum, case.endlnum) + local startlnum = case.startlnum or 1 + local endlnum = case.endlnum or #case.lines + local lnum = startlnum + + while lnum <= endlnum do + local is_comment, fields, parse_endlnum = parser:parse_line(lnum) + table.insert(results, { is_comment = is_comment, fields = fields }) + lnum = parse_endlnum + 1 + end - coroutine.yield() vim.api.nvim_buf_delete(bufnr, { force = true }) - assert.are.same(#case.expected, #results) + assert.are.same( + #case.expected, + #results, + vim.inspect({ + expected = case.expected, + result = results, + }) + ) for i = 1, #results do assert.are.same(case.expected[i].is_comment, results[i].is_comment) assert.are.same(case.expected[i].fields, results[i].fields) From 1f4f37b33c63f295f8c6fcd7770ac3e70293de50 Mon Sep 17 00:00:00 2001 From: hat0uma <55551571+hat0uma@users.noreply.github.com> Date: Mon, 15 Dec 2025 02:29:19 +0900 Subject: [PATCH 3/5] test(perf): add performance benchmark script Add perfcheck.lua for measuring parser and metrics calculation performance. The script generates configurable test data and measures: - Execution time with warmup iterations - Memory usage (peak and retained) - Throughput (lines/sec) --- tests/perfcheck.lua | 244 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 tests/perfcheck.lua diff --git a/tests/perfcheck.lua b/tests/perfcheck.lua new file mode 100644 index 0000000..ab59c28 --- /dev/null +++ b/tests/perfcheck.lua @@ -0,0 +1,244 @@ +-- perfcheck.lua +-- Usage: +-- nvim --headless -c "luafile tests/perfcheck.lua" -c "qa!" +-- PERFCHECK_LINES=100000 PERFCHECK_COLS=20 nvim --headless -c "luafile tests/perfcheck.lua" -c "qa!" + +local uv = vim.uv or vim.loop + +---Helper to get config from env or default +---@param name string +---@param default integer +---@return integer +local function get_env_num(name, default) + local val = tonumber(os.getenv(name)) + return val or default +end + +local cfg = { + iterations = get_env_num("PERFCHECK_ITERS", 10), + warmup = get_env_num("PERFCHECK_WARMUP", 3), + lines = get_env_num("PERFCHECK_LINES", 100000), + columns = get_env_num("PERFCHECK_COLS", 15), + filename = "perf_gen.csv", + opts = { --- @type CsvView.Options + parser = { + comments = { "#" }, + max_lookahead = 50, + async_chunksize = 100, + }, + }, +} + +--- log +---@param fmt string +---@param ... any +local function log(fmt, ...) + print(string.format("[PERFCHECK] " .. fmt .. "\n", ...)) +end + +---Generate CSV File +local function prepare_data_file() + local f = io.open(cfg.filename, "w") + if not f then + error("Could not open file for writing") + end + + -- Header + local headers = {} --- @type string[] + for c = 1, cfg.columns do + headers[c] = "Col_" .. c + end + f:write(table.concat(headers, ",") .. "\n") + + -- Rows + for r = 1, cfg.lines do + local row = {} --- @type string[] + for c = 1, cfg.columns do + local val --- @type string + local m = c % 5 + if m == 1 then + val = string.format('"Q %d-%d"', r, c) + elseif m == 2 then + val = "日本語" .. r + elseif m == 3 then + val = "" + elseif m == 4 then + val = "Long_payload_" .. r + else + val = tostring(r * c) + end + row[c] = val + end + f:write(table.concat(row, ",") .. "\n") + end + f:close() + log("Generated %s (%d lines, %d cols)", cfg.filename, cfg.lines, cfg.columns) +end + +---Read file into memory (simulate buffer load) +---@return string[] +local function load_data_file() + local lines = {} + for line in io.lines(cfg.filename) do + table.insert(lines, line) + end + return lines +end + +---Calculate standard statistics +---@param samples number[] +---@return number avg, integer min, integer max, integer std_dev +local function calculate_stats(samples) + if #samples == 0 then + return 0, 0, 0, 0 + end + local sum = 0 + local min = math.huge + local max = -math.huge + + for _, v in ipairs(samples) do + sum = sum + v + if v < min then + min = v + end + if v > max then + max = v + end + end + local avg = sum / #samples + + local sq_sum = 0 + for _, v in ipairs(samples) do + sq_sum = sq_sum + (v - avg) ^ 2 + end + local std_dev = math.sqrt(sq_sum / #samples) + + return avg, min, max, std_dev +end + +local function run_perfcheck() + log("Configuration: %s", vim.inspect(cfg)) + + ----------------------------------------------------- + -- Generate Data + local t_gen_start = uv.hrtime() + prepare_data_file() + local lines = load_data_file() + local t_gen_end = uv.hrtime() + log("Data generation took %.2f ms", (t_gen_end - t_gen_start) / 1e6) + + ----------------------------------------------------- + -- perfcheck loop + log("Starting perfcheck loop...") + + local times = {} ---@type number[] + local mems_retained = {} ---@type number[] + local mems_peak = {} ---@type number[] + local throughputs = {} ---@type number[] + + for i = 1, cfg.warmup + cfg.iterations do + -- Create Buffer + local buf = vim.api.nvim_create_buf(false, true) + vim.api.nvim_buf_set_lines(buf, 0, -1, false, lines) + + collectgarbage("collect") + collectgarbage("collect") + + local start_mem = collectgarbage("count") ---@type integer + local start_time = uv.hrtime() + local done = false + + -- Setup event listener + local autocmd_id = vim.api.nvim_create_autocmd("User", { + pattern = "CsvViewAttach", + callback = function(args) + if args.data == buf then + done = true + end + end, + once = true, + }) + + -- Run Target + local status, err = pcall(require("csvview").enable, buf, cfg.opts) + if not status then + log("Error during enable: %s", err) + break + end + + -- Wait for async completion and observe peak. + local peak_mem = start_mem + local wait_ok = vim.wait(60000, function() + local current = collectgarbage("count") + if current > peak_mem then + peak_mem = current + end + return done + end, 5) + + if not wait_ok then + vim.api.nvim_del_autocmd(autocmd_id) + log("TIMEOUT on iteration %d", i) + break + end + + -- Collect data + local end_time = uv.hrtime() + + -- Cleanup + require("csvview").disable(buf) + vim.api.nvim_buf_delete(buf, { force = true }) + + collectgarbage("collect") + collectgarbage("collect") + local end_mem_retained = collectgarbage("count") ---@type integer + + -- Calculate Metrics + local duration_ms = (end_time - start_time) / 1e6 + local retained_delta = end_mem_retained - start_mem + local peak_delta = peak_mem - start_mem + local lines_per_sec = cfg.lines / (duration_ms / 1000) + + if i <= cfg.warmup then + log("[Warmup %d] %.2f ms", i, duration_ms) + else + local run_idx = i - cfg.warmup + table.insert(times, duration_ms) + table.insert(mems_retained, retained_delta) + table.insert(mems_peak, peak_delta) + table.insert(throughputs, lines_per_sec) + log( + "[Run %02d] Time: %.2f ms | Mem Peak: %+.2f KB | Mem Retained: %+.2f KB | %.0f lines/s", + run_idx, + duration_ms, + peak_delta, + retained_delta, + lines_per_sec + ) + end + + -- Small pause between runs + uv.sleep(10) + end + + -- Final Report + local t_avg, t_min, t_max, t_std = calculate_stats(times) + local mr_avg, mr_min, mr_max = calculate_stats(mems_retained) + local mp_avg, mp_min, mp_max = calculate_stats(mems_peak) + local tp_avg = calculate_stats(throughputs) + + print("\n") + print("================================================================") + print(string.format(" PERFCHECK RESULTS (N=%d, Lines=%d, Cols=%d)", #times, cfg.lines, cfg.columns)) + print("================================================================") + print(string.format(" Execution Time : %.2f ms (±%.2f) [Min: %.2f, Max: %.2f]", t_avg, t_std, t_min, t_max)) + print(string.format(" Throughput : %.0f lines/sec", tp_avg)) + print(string.format(" Observed Peak : %.2f KB (Avg) [Min: %.2f, Max: %.2f]", mp_avg, mp_min, mp_max)) + print(string.format(" Retained Mem : %.2f KB (Avg) [Min: %.2f, Max: %.2f]", mr_avg, mr_min, mr_max)) + print("================================================================") + + -- Cleanup file + os.remove(cfg.filename) +end + +run_perfcheck() From f1424959a5cf3bd280678f6bc20cd66218ff5874 Mon Sep 17 00:00:00 2001 From: hat0uma <55551571+hat0uma@users.noreply.github.com> Date: Mon, 15 Dec 2025 02:34:36 +0900 Subject: [PATCH 4/5] perf(metrics): optimize field processing with dedicated string functions Replace `string.sub` + `vim.fn.strdisplaywidth` with optimized strings module functions that avoid intermediate string allocations. Benchmark (tests/perfcheck.lua, 100,000 lines x 15 cols): Before: 852ms, 117k lines/sec After: 382ms, 261k lines/sec (~2.2x faster) --- lua/csvview/metrics.lua | 6 +- lua/csvview/strings.lua | 225 ++++++++++++++++++++++++++++++++++++++++ tests/strings_spec.lua | 131 +++++++++++++++++++++++ 3 files changed, 359 insertions(+), 3 deletions(-) create mode 100644 lua/csvview/strings.lua create mode 100644 tests/strings_spec.lua diff --git a/lua/csvview/metrics.lua b/lua/csvview/metrics.lua index 1932784..f6388ea 100644 --- a/lua/csvview/metrics.lua +++ b/lua/csvview/metrics.lua @@ -1,3 +1,4 @@ +local strings = require("csvview.strings") local nop = function() end local ColumnTracker = require("csvview.metrics_column") local Row = require("csvview.metrics_row") @@ -204,9 +205,8 @@ function CsvViewMetrics:_compute_metrics(startlnum, endlnum, on_end) on_field = function(_, lnum, line, offset, endpos) local len = endpos - offset -- endpos is 1-based end position, offset is 0-based start - local text = string.sub(line, offset + 1, endpos) - local display_width = vim.fn.strdisplaywidth(text, offset) - local is_number = tonumber(text) ~= nil + local display_width = strings.display_width(line, offset, endpos) + local is_number = strings.is_number(line, offset, endpos) field_buffer:add(offset, len, display_width, is_number) -- field count per lines diff --git a/lua/csvview/strings.lua b/lua/csvview/strings.lua new file mode 100644 index 0000000..73c3e7f --- /dev/null +++ b/lua/csvview/strings.lua @@ -0,0 +1,225 @@ +local ffi = require("ffi") + +local M = {} + +--- string buffer size +M._str_buf_size = 8192 + +--- string buffer (module-local reusable buffer) +M._str_buf = ffi.new("char[?]", M._str_buf_size + 1) + +--- Ensure buffer +---@param len integer +local function ensure_buffer(len) + if len >= M._str_buf_size then + local new_size = math.max(M._str_buf_size * 2, len + 1) + M._str_buf = ffi.new("char[?]", new_size) + M._str_buf_size = new_size + end +end + +--- Copy string to C buffer +---@param text string +---@param offset integer 0-based +---@param endpos integer 1-based +---@return integer len +---@return ffi.cdata* char_ptr +local function to_char_array(text, offset, endpos) + local len = endpos - offset + if len <= 0 then + M._str_buf[0] = 0 ---@diagnostic disable-line: no-unknown + return 0, M._str_buf + end + + ensure_buffer(len) + + local text_ptr = ffi.cast("const char*", text) + ffi.copy(M._str_buf, text_ptr + offset, len) + M._str_buf[len] = 0 ---@diagnostic disable-line: no-unknown + + return len, M._str_buf +end + +M.display_width = (function() + ---@param line string full line text + ---@param offset? integer 0-based start position of the text + ---@param endpos? integer end position (1-based, inclusive) + ---@return integer display_width + local function fallback(line, offset, endpos) + offset = offset or 0 + local str = string.sub(line, offset + 1, endpos) + return vim.fn.strdisplaywidth(str, offset) + end + + -- NOTE: `linetabsize_col` is an INTERNAL Neovim function. + -- `nvim_strwidth` API exists but does not handle tab expansion relative to startcol. + local cdef = [[ + typedef unsigned char char_u; + int linetabsize_col(int startcol, char_u *s); + ]] + local cdef_ok = pcall(ffi.cdef, cdef) + if not cdef_ok then + return fallback + end + + ---@param line string full line text + ---@param offset? integer 0-based start position of the text + ---@param endpos? integer end position (1-based, inclusive) + ---@return integer display_width + local function ffi_func(line, offset, endpos) + offset = offset or 0 + endpos = endpos or #line + local _, text = to_char_array(line, offset, endpos) + return ffi.C.linetabsize_col(offset, text) - offset + end + + -- Try to execute (Check if symbol exists in current binary) + local run_ok = pcall(ffi_func, "abc", 0, 3) + if not run_ok then + return fallback + end + + return ffi_func +end)() + +M.is_number = (function() + local byte = string.byte + + -- Constants + local B_0, B_9 = byte("0"), byte("9") + local B_plus, B_minus = byte("+"), byte("-") + local B_dot = byte(".") + local B_comma = byte(",") + local B_e, B_E = byte("e"), byte("E") + local B_space, B_tab = byte(" "), byte("\t") + + --- Check string is number + ---@param line string full line text + ---@param offset? integer 0-based start position of the text + ---@param endpos? integer end position (1-based, inclusive) + ---@return boolean is_number + return function(line, offset, endpos) + offset = offset or 0 + endpos = endpos or #line + + local i = offset + 1 + + -- Skip leading whitespace + while i <= endpos do + local b = byte(line, i) + if b ~= B_space and b ~= B_tab then + break + end + i = i + 1 + end + + if i > endpos then + -- Empty or all whitespace + return false + end + + -- Check Sign + local b = byte(line, i) + if b == B_plus or b == B_minus then + i = i + 1 + if i > endpos then + -- Only sign + return false + end + b = byte(line, i) + end + + -- Check Digits (Integer part) and Dot + local has_digits = false + + -- Check integer part + while i <= endpos do + if b >= B_0 and b <= B_9 then + has_digits = true + i = i + 1 + elseif b == B_comma then + -- "1,234,567" is valid + i = i + 1 + else + break + end + + if i <= endpos then + b = byte(line, i) + end + end + + -- Check Dot and Fractional digits + if i <= endpos and b == B_dot then + i = i + 1 + -- Fractional part + while i <= endpos do + b = byte(line, i) + if b >= B_0 and b <= B_9 then + has_digits = true + i = i + 1 + else + break + end + end + end + + if not has_digits then + -- only ".", "+.", "-." are invalid + return false + end + + -- Check Exponent (e/E) + if i <= endpos and (b == B_e or b == B_E) then + i = i + 1 + if i > endpos then + -- "123e" invalid + return false + end + + b = byte(line, i) + -- Exponent sign + if b == B_plus or b == B_minus then + i = i + 1 + if i > endpos then + -- "123e+" invalid + return false + end + b = byte(line, i) + end + + -- Exponent digits (at least one required) + local has_exp_digits = false + while i <= endpos do + if b >= B_0 and b <= B_9 then + has_exp_digits = true + i = i + 1 + if i > endpos then + break + end + b = byte(line, i) + else + break + end + end + if not has_exp_digits then + return false + end + end + + -- Skip trailing whitespace + while i <= endpos do + if b ~= B_space and b ~= B_tab then + return false -- Garbage at the end (e.g. "123abc") + end + i = i + 1 + if i <= endpos then + b = byte(line, i) + end + end + + return true + end +end)() + +return M diff --git a/tests/strings_spec.lua b/tests/strings_spec.lua new file mode 100644 index 0000000..bdcb3b0 --- /dev/null +++ b/tests/strings_spec.lua @@ -0,0 +1,131 @@ +local strings = require("csvview.strings") + +describe("strings", function() + describe("display_width", function() + local orig = {} + local function save_options() + orig.cellwidth = vim.fn.getcellwidths() + vim.fn.setcellwidths({}) + orig.fillchars = vim.o.fillchars + vim.o.fillchars = "vert:|,fold:-,eob:~,lastline:@" + orig.tabstop = vim.o.tabstop + vim.o.tabstop = 8 + orig.ambiwidth = vim.o.ambiwidth + end + + local function restore_options() + vim.o.ambiwidth = orig.ambiwidth + vim.fn.setcellwidths(orig.cellwidth) + vim.o.fillchars = orig.fillchars + vim.o.tabstop = orig.tabstop + end + + -- Basic cases (ambiwidth independent) + local basic_cases = { + { str = "abc", expected = 3, desc = "ascii" }, + { str = "あいう", expected = 6, desc = "fullwidth japanese" }, + { str = "abc,def,ghi", offset = 0, endpos = 3, expected = 3, desc = "first field" }, + { str = "abc,def,ghi", offset = 4, endpos = 7, expected = 3, desc = "middle field" }, + { str = "a,あ,b", offset = 2, endpos = 5, expected = 2, desc = "japanese in middle" }, + } + + for _, case in ipairs(basic_cases) do + it(case.desc, function() + local offset = case.offset or 0 + local endpos = case.endpos or #case.str + assert.are.same(case.expected, strings.display_width(case.str, offset, endpos)) + end) + end + + -- Tab cases (tabstop=8) + describe("with tabs", function() + save_options() + vim.o.tabstop = 8 + + local tab_cases = { + -- tab at position 0 expands to 8 + { str = "\tabc", expected = 11, desc = "tab at start" }, + -- "abc" is 3 chars, tab expands to fill to next 8 (8-3=5) + { str = "abc\tde", expected = 10, desc = "tab after 3 chars" }, + -- only tab after 3 chars + { str = "abc\tde", offset = 3, endpos = 4, expected = 5, desc = "only tab after 3 chars" }, + } + + for _, case in ipairs(tab_cases) do + it(case.desc, function() + local offset = case.offset or 0 + local endpos = case.endpos or #case.str + assert.are.same(case.expected, strings.display_width(case.str, offset, endpos)) + end) + end + + restore_options() + end) + + -- East Asian Ambiguous width cases + describe("with ambiwidth", function() + save_options() + + -- Characters like "├─┤" are East Asian Ambiguous + local ambi_cases = { + { str = "├─┤", ambiwidth = "single", expected = 3, desc = "box drawing single" }, + { str = "├─┤", ambiwidth = "double", expected = 6, desc = "box drawing double" }, + { str = "①②③", ambiwidth = "single", expected = 3, desc = "circled numbers single" }, + { str = "①②③", ambiwidth = "double", expected = 6, desc = "circled numbers double" }, + } + + for _, case in ipairs(ambi_cases) do + it(case.desc, function() + vim.o.ambiwidth = case.ambiwidth + local offset = case.offset or 0 + local endpos = case.endpos or #case.str + assert.are.same(case.expected, strings.display_width(case.str, offset, endpos)) + end) + end + + restore_options() + end) + end) + + describe("is_number", function() + local cases = { + -- Valid numbers + { line = "123", expected = true, desc = "integer" }, + { line = "0", expected = true, desc = "zero" }, + { line = "-123", expected = true, desc = "negative integer" }, + { line = "+123", expected = true, desc = "positive integer with sign" }, + { line = "123.456", expected = true, desc = "decimal" }, + { line = "-123.456", expected = true, desc = "negative decimal" }, + { line = ".5", expected = true, desc = "decimal starting with dot" }, + { line = "1e10", expected = true, desc = "scientific notation" }, + { line = "1E10", expected = true, desc = "scientific notation uppercase" }, + { line = "1e-10", expected = true, desc = "scientific notation with minus" }, + { line = "1,234,567", expected = true, desc = "comma separated integer" }, + + -- Invalid numbers + { line = "", expected = false, desc = "empty string" }, + { line = "abc", expected = false, desc = "letters" }, + { line = "123abc", expected = false, desc = "number with trailing letters" }, + { line = "12.34.56", expected = false, desc = "multiple dots" }, + { line = "-", expected = false, desc = "only sign" }, + } + + for _, case in ipairs(cases) do + it(string.format("%s %s: %s ", case.desc, case.expected and "is number" or "is not number", case.line), function() + assert.are.same(case.expected, strings.is_number(case.line, 0, #case.line)) + end) + end + + describe("with offset and endpos", function() + it("extracts number from middle of line", function() + local line = "abc,123,def" + assert.are.same(true, strings.is_number(line, 4, 7)) + end) + + it("extracts non-number from middle of line", function() + local line = "abc,def,123" + assert.are.same(false, strings.is_number(line, 4, 7)) + end) + end) + end) +end) From 9578208010daa2fda787d1048868ea9c7b34697d Mon Sep 17 00:00:00 2001 From: hat0uma <55551571+hat0uma@users.noreply.github.com> Date: Tue, 16 Dec 2025 01:52:44 +0900 Subject: [PATCH 5/5] docs(parser): add performance notes to convenience APIs Document that `parse_line()` and `create_field_collector()` are not intended for performance-critical paths due to string allocation overhead. Users should use `parse_records()` with event callbacks directly for optimal performance. --- lua/csvview/parser.lua | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lua/csvview/parser.lua b/lua/csvview/parser.lua index 64f30be..5359138 100644 --- a/lua/csvview/parser.lua +++ b/lua/csvview/parser.lua @@ -308,6 +308,9 @@ function CsvViewParser:parse_record(lnum, events) events.record_end(lnum, current_lnum, terminated) end +--- Create a field collector for convenience APIs. +--- NOTE: This collector extracts field text via string.sub, which has allocation overhead. +--- For performance-critical paths, use parse_records() with event callbacks directly. local function create_field_collector() local fields = {} ---@type CsvView.Parser.FieldInfo[] local current_field = nil ---@type CsvView.Parser.FieldInfo? @@ -350,7 +353,9 @@ local function create_field_collector() end end ---- Parse a single line and return field info table +--- Parse a single line and return field info table. +--- NOTE: This is a convenience API for testing and simple use cases. +--- For performance-critical paths, use parse_records() with event callbacks directly. ---@param lnum integer ---@return boolean is_comment ---@return CsvView.Parser.FieldInfo[] fields