@@ -29,12 +29,16 @@ defmodule Opal.Tool.Grep do
2929 parsed and applied during directory traversal, so ignored files and
3030 directories are skipped automatically.
3131
32+ Set `no_ignore: true` to bypass `.gitignore` rules and search all
33+ non-binary files (hardcoded skip directories like `.git` are still
34+ excluded).
35+
3236 Binary files (containing null bytes) are silently skipped.
3337 """
3438
3539 @ behaviour Opal.Tool
3640
37- @ dialyzer { :no_opaque , [ do_walk_dir: 5 , walk_dir: 5 ] }
41+ @ dialyzer { :no_opaque , [ do_walk_dir: 6 , walk_dir: 6 ] }
3842
3943 alias Opal.Tool.Encoding
4044 alias Opal.Tool.FileHelper
@@ -57,6 +61,10 @@ defmodule Opal.Tool.Grep do
5761 @ max_output_bytes 50 * 1024
5862 @ max_depth 25
5963
64+ # Parallelism: only fan out when there are enough files to justify it.
65+ @ parallel_threshold 4
66+ @ max_concurrency System . schedulers_online ( )
67+
6068 @ impl true
6169 @ spec name ( ) :: String . t ( )
6270 def name , do: "grep"
@@ -100,6 +108,11 @@ defmodule Opal.Tool.Grep do
100108 "type" => "integer" ,
101109 "description" =>
102110 "Maximum number of matching lines returned across all files (default: 50)"
111+ } ,
112+ "no_ignore" => % {
113+ "type" => "boolean" ,
114+ "description" =>
115+ "When true, search files even if they are excluded by .gitignore rules (default: false)"
103116 }
104117 } ,
105118 "required" => [ "pattern" ]
@@ -115,12 +128,13 @@ defmodule Opal.Tool.Grep do
115128 include = Map . get ( args , "include" )
116129 ctx_lines = Map . get ( args , "context_lines" , @ max_context_default ) |> max ( 0 ) |> min ( 10 )
117130 max_results = Map . get ( args , "max_results" , @ max_results_default ) |> max ( 1 ) |> min ( 500 )
131+ no_ignore = Map . get ( args , "no_ignore" , false )
118132
119133 allow_bases = FileHelper . allowed_bases ( context )
120134
121135 case FileHelper . resolve_path ( search_path , working_dir , allow_bases: allow_bases ) do
122136 { :ok , resolved } ->
123- do_search ( resolved , regex , include , ctx_lines , max_results , working_dir )
137+ do_search ( resolved , regex , include , ctx_lines , max_results , working_dir , no_ignore )
124138
125139 { :error , reason } ->
126140 { :error , reason }
@@ -136,10 +150,10 @@ defmodule Opal.Tool.Grep do
136150
137151 # -- Search implementation --------------------------------------------------
138152
139- defp do_search ( resolved , regex , include , ctx_lines , max_results , working_dir ) do
153+ defp do_search ( resolved , regex , include , ctx_lines , max_results , working_dir , no_ignore ) do
140154 glob = Opal.Platform . compile_glob ( include )
141155
142- files = collect_files ( resolved , glob )
156+ files = collect_files ( resolved , glob , no_ignore )
143157
144158 { results , total_matches , capped? } =
145159 search_files ( files , regex , ctx_lines , max_results , working_dir )
@@ -154,27 +168,28 @@ defmodule Opal.Tool.Grep do
154168
155169 # -- File collection --------------------------------------------------------
156170
157- defp collect_files ( path , glob ) do
171+ defp collect_files ( path , glob , no_ignore ) do
158172 if File . regular? ( path ) do
159173 if Opal.Platform . matches_glob? ( Path . basename ( path ) , glob ) , do: [ path ] , else: [ ]
160174 else
161- gitignore = Gitignore . load ( path )
162- walk_dir ( path , glob , 0 , MapSet . new ( ) , gitignore )
175+ gitignore = if no_ignore , do: % Gitignore { root: path } , else: Gitignore . load ( path )
176+ walk_dir ( path , glob , 0 , MapSet . new ( ) , gitignore , no_ignore )
163177 end
164178 end
165179
166180 # Walks directories with depth limiting and symlink-loop protection.
167181 # `visited` tracks real (resolved) directory paths to break cycles.
168182 # `gitignore` accumulates rules from nested .gitignore files.
169- defp walk_dir ( dir , glob , depth , visited , gitignore ) do
183+ # `no_ignore` bypasses .gitignore rules when true.
184+ defp walk_dir ( dir , glob , depth , visited , gitignore , no_ignore ) do
170185 if depth > @ max_depth do
171186 [ ]
172187 else
173- do_walk_dir ( dir , glob , depth , visited , gitignore )
188+ do_walk_dir ( dir , glob , depth , visited , gitignore , no_ignore )
174189 end
175190 end
176191
177- defp do_walk_dir ( dir , glob , depth , visited , gitignore ) do
192+ defp do_walk_dir ( dir , glob , depth , visited , gitignore , no_ignore ) do
178193 # Resolve symlinks to detect cycles on all platforms
179194 real_dir = Path . expand ( dir )
180195
@@ -185,9 +200,9 @@ defmodule Opal.Tool.Grep do
185200
186201 # Merge nested .gitignore when descending into subdirectories.
187202 # The root .gitignore is already loaded in collect_files, so skip
188- # re-reading it at depth 0.
203+ # re-reading it at depth 0. Skip entirely when no_ignore is set.
189204 gitignore =
190- if depth > 0 do
205+ if not no_ignore and depth > 0 do
191206 case File . read ( Path . join ( dir , ".gitignore" ) ) do
192207 { :ok , content } ->
193208 child = Gitignore . parse ( content , gitignore . root )
@@ -213,11 +228,11 @@ defmodule Opal.Tool.Grep do
213228 skip_dir? ( entry ) ->
214229 [ ]
215230
216- Gitignore . ignored? ( gitignore , rel , is_dir ) ->
231+ not no_ignore and Gitignore . ignored? ( gitignore , rel , is_dir ) ->
217232 [ ]
218233
219234 is_dir ->
220- walk_dir ( full , glob , depth + 1 , visited , gitignore )
235+ walk_dir ( full , glob , depth + 1 , visited , gitignore , no_ignore )
221236
222237 Opal.Platform . matches_glob? ( entry , glob ) ->
223238 [ full ]
@@ -241,8 +256,21 @@ defmodule Opal.Tool.Grep do
241256 defp skip_dir? ( name ) , do: MapSet . member? ( @ skip_dirs , name )
242257
243258 # -- Search across files ----------------------------------------------------
259+ #
260+ # Files are searched in parallel when there are enough to justify the
261+ # overhead. Each task is fully independent (read → regex → hashline),
262+ # so there is no shared mutable state. Results stream back in file
263+ # order via `ordered: true`, then we apply the max_results cap.
244264
245265 defp search_files ( files , regex , ctx_lines , max_results , working_dir ) do
266+ if length ( files ) < @ parallel_threshold do
267+ search_files_sequential ( files , regex , ctx_lines , max_results , working_dir )
268+ else
269+ search_files_parallel ( files , regex , ctx_lines , max_results , working_dir )
270+ end
271+ end
272+
273+ defp search_files_sequential ( files , regex , ctx_lines , max_results , working_dir ) do
246274 Enum . reduce_while ( files , { [ ] , 0 , false } , fn file , { acc , count , _capped? } ->
247275 case search_file ( file , regex , ctx_lines , max_results - count , working_dir ) do
248276 { :ok , matches , match_count } when match_count > 0 ->
@@ -261,6 +289,72 @@ defmodule Opal.Tool.Grep do
261289 end )
262290 end
263291
292+ defp search_files_parallel ( files , regex , ctx_lines , max_results , working_dir ) do
293+ # Each task searches with the full max_results cap. We trim after
294+ # collecting, so individual tasks may do slightly more work than
295+ # strictly necessary — but each one is bounded and the fan-out
296+ # across schedulers more than compensates.
297+ files
298+ |> Task . async_stream (
299+ fn file -> { file , search_file ( file , regex , ctx_lines , max_results , working_dir ) } end ,
300+ ordered: true ,
301+ max_concurrency: @ max_concurrency
302+ )
303+ |> Enum . reduce_while ( { [ ] , 0 , false } , fn { :ok , { file , result } } , { acc , count , _capped? } ->
304+ case result do
305+ { :ok , matches , match_count } when match_count > 0 ->
306+ # Trim this file's matches if adding all would exceed the cap.
307+ trimmed_count = min ( match_count , max_results - count )
308+
309+ matches =
310+ if trimmed_count < match_count do
311+ trim_matches ( matches , trimmed_count )
312+ else
313+ matches
314+ end
315+
316+ new_count = count + trimmed_count
317+ new_acc = acc ++ [ { file , matches } ]
318+
319+ if new_count >= max_results do
320+ { :halt , { new_acc , new_count , true } }
321+ else
322+ { :cont , { new_acc , new_count , false } }
323+ end
324+
325+ _ ->
326+ { :cont , { acc , count , false } }
327+ end
328+ end )
329+ end
330+
331+ # Rebuild the tagged output keeping only the first `n` match lines.
332+ # Context lines around kept matches are preserved.
333+ defp trim_matches ( { rel_path , groups } , keep ) do
334+ { trimmed_groups , _remaining } =
335+ Enum . reduce_while ( groups , { [ ] , keep } , fn group , { acc , remaining } ->
336+ match_lines_in_group = Enum . count ( group , fn { _tagged , is_match } -> is_match end )
337+
338+ if match_lines_in_group <= remaining do
339+ { :cont , { acc ++ [ group ] , remaining - match_lines_in_group } }
340+ else
341+ # Partial group: keep only enough match lines.
342+ { partial , _ } =
343+ Enum . reduce ( group , { [ ] , remaining } , fn { tagged , is_match } = entry , { kept , rem } ->
344+ cond do
345+ not is_match -> { kept ++ [ entry ] , rem }
346+ rem > 0 -> { kept ++ [ { tagged , true } ] , rem - 1 }
347+ true -> { kept , 0 }
348+ end
349+ end )
350+
351+ { :halt , { acc ++ [ partial ] , 0 } }
352+ end
353+ end )
354+
355+ { rel_path , trimmed_groups }
356+ end
357+
264358 defp search_file ( file , regex , ctx_lines , remaining , working_dir ) do
265359 with { :ok , raw } <- File . read ( file ) ,
266360 true <- String . valid? ( raw ) ,
0 commit comments