@@ -172,6 +172,16 @@ pub struct Page {
172172 pub data : Vec < u32 > ,
173173}
174174
175+ impl Default for Page {
176+ fn default ( ) -> Self {
177+ Self {
178+ index : 1 ,
179+ size : 1 ,
180+ data : vec ! [ 0 ] ,
181+ }
182+ }
183+ }
184+
175185impl Page {
176186 pub fn new ( index : usize , size : usize , data : Vec < u32 > ) -> Self {
177187 Self { index, size, data }
@@ -186,38 +196,6 @@ impl Page {
186196 }
187197}
188198
189- pub struct PagePtr < ' a > {
190- #[ allow( dead_code) ]
191- mmap : Option < Mmap > ,
192- pub index : usize ,
193- pub size : usize ,
194- pub data : & ' a [ u32 ] ,
195- }
196-
197- impl < ' a > PagePtr < ' a > {
198- pub fn new ( mmap : Option < Mmap > , index : usize , size : usize , data : & ' a [ u32 ] ) -> Self {
199- Self {
200- mmap,
201- index,
202- size,
203- data,
204- }
205- }
206- }
207-
208- impl < ' a > Default for PagePtr < ' a > {
209- fn default ( ) -> Self {
210- // 创建一个包含0的静态数组
211- static DEFAULT_DATA : [ u32 ; 1 ] = [ 0 ] ;
212- Self {
213- mmap : None ,
214- index : 1 ,
215- size : 1 ,
216- data : & DEFAULT_DATA ,
217- }
218- }
219- }
220-
221199use std:: fmt:: { self , Debug } ;
222200
223201#[ derive( Clone , Copy ) ]
@@ -353,11 +331,11 @@ pub trait K2Compact: std::marker::Sync + Send {
353331}
354332
355333#[ allow( unused) ]
356- pub struct CHPage < ' a > {
334+ pub struct CHPage {
357335 // 哈希表的容量
358336 pub config : HashConfig ,
359337 pub page : Page ,
360- pub next_page : PagePtr < ' a > ,
338+ pub next_page : Page ,
361339}
362340
363341fn _read_page_from_file < P : AsRef < Path > > ( filename : P ) -> Result < Page > {
@@ -406,71 +384,59 @@ fn read_page_from_file<P: AsRef<Path>>(filename: P) -> Result<Page> {
406384 Ok ( Page :: new ( index, capacity, data) )
407385}
408386
409- fn _read_pageptr_from_file < ' a , P : AsRef < Path > > ( filename : P ) -> Result < PagePtr < ' a > > {
410- let file = OpenOptions :: new ( ) . read ( true ) . open ( & filename) ?;
411- let mmap = unsafe { MmapOptions :: new ( ) . populate ( ) . map ( & file) ? } ;
412- let index = LittleEndian :: read_u64 ( & mmap[ 0 ..8 ] ) as usize ;
413- let capacity = LittleEndian :: read_u64 ( & mmap[ 8 ..16 ] ) as usize ;
414-
415- let raw_page_data =
416- unsafe { std:: slice:: from_raw_parts ( mmap. as_ptr ( ) . add ( 16 ) as * const u32 , capacity) } ;
417-
418- let first_zero_end = raw_page_data
419- . iter ( )
420- . position ( |& x| x == 0 )
421- . map ( |pos| pos + 1 )
422- . unwrap_or ( capacity) ;
423- let page_data = & raw_page_data[ ..first_zero_end] ;
424-
425- // let page_data =
426- // unsafe { std::slice::from_raw_parts(mmap.as_ptr().add(16) as *const u32, capacity) };
427-
428- Ok ( PagePtr :: new ( Some ( mmap) , index, first_zero_end, page_data) )
429- }
387+ fn read_pageptr_from_file_chunk < P : AsRef < Path > > ( filename : P ) -> Result < Page > {
388+ let mut file = std:: fs:: File :: open ( filename) ?;
430389
431- fn read_pageptr_from_file_chunk < ' a , P : AsRef < Path > > ( filename : P ) -> Result < PagePtr < ' a > > {
432- let file = OpenOptions :: new ( ) . read ( true ) . open ( & filename ) ? ;
433- let mmap = unsafe { MmapOptions :: new ( ) . populate ( ) . map ( & file ) ? } ;
434- let index = LittleEndian :: read_u64 ( & mmap [ 0 ..8 ] ) as usize ;
435- let capacity = LittleEndian :: read_u64 ( & mmap [ 8 ..16 ] ) as usize ;
390+ // Read the index and capacity
391+ let mut buffer = [ 0u8 ; 16 ] ;
392+ file . read_exact ( & mut buffer ) ? ;
393+ let index = LittleEndian :: read_u64 ( & buffer [ 0 ..8 ] ) as usize ;
394+ let capacity = LittleEndian :: read_u64 ( & buffer [ 8 ..16 ] ) as usize ;
436395
437396 let mut first_zero_end = capacity;
438- let chunk_size = 1024 ; // 定义每次读取的块大小
397+ let chunk_size = 1024 ; // Define the chunk size for reading
439398 let mut found_zero = false ;
440-
441- for i in ( 0 ..capacity) . step_by ( chunk_size) {
442- let end = usize:: min ( i + chunk_size, capacity) ;
443- let chunk = unsafe {
444- std:: slice:: from_raw_parts ( mmap. as_ptr ( ) . add ( 16 + i * 4 ) as * const u32 , end - i)
445- } ;
446- if let Some ( pos) = chunk. iter ( ) . position ( |& x| x == 0 ) {
447- first_zero_end = i + pos + 1 ;
399+ let mut data = vec ! [ 0u32 ; capacity] ;
400+ let mut read_pos = 0 ;
401+
402+ while read_pos < capacity {
403+ let end = usize:: min ( read_pos + chunk_size, capacity) ;
404+ let bytes_to_read = ( end - read_pos) * std:: mem:: size_of :: < u32 > ( ) ;
405+ let mut chunk = vec ! [ 0u8 ; bytes_to_read] ;
406+ file. read_exact ( & mut chunk) ?;
407+ let chunk_u32 =
408+ unsafe { std:: slice:: from_raw_parts ( chunk. as_ptr ( ) as * const u32 , end - read_pos) } ;
409+ data[ read_pos..end] . copy_from_slice ( chunk_u32) ;
410+
411+ if let Some ( pos) = chunk_u32. iter ( ) . position ( |& x| x == 0 ) {
412+ first_zero_end = read_pos + pos + 1 ;
448413 found_zero = true ;
449414 break ;
450415 }
416+ read_pos = end;
451417 }
452418
453419 if !found_zero {
454420 first_zero_end = capacity;
421+ eprintln ! ( "Warning: No zero value found in the data, using full capacity." ) ;
455422 }
456423
457- let page_data =
458- unsafe { std:: slice:: from_raw_parts ( mmap. as_ptr ( ) . add ( 16 ) as * const u32 , first_zero_end) } ;
424+ data. truncate ( first_zero_end) ;
459425
460- Ok ( PagePtr :: new ( Some ( mmap ) , index, first_zero_end, page_data ) )
426+ Ok ( Page :: new ( index, first_zero_end, data ) )
461427}
462428
463- impl < ' a > CHPage < ' a > {
429+ impl CHPage {
464430 pub fn from < P : AsRef < Path > + Debug > (
465431 config : HashConfig ,
466432 chunk_file1 : P ,
467433 chunk_file2 : P ,
468- ) -> Result < CHPage < ' a > > {
434+ ) -> Result < CHPage > {
469435 let page = read_page_from_file ( chunk_file1) ?;
470436 let next_page = if page. data . last ( ) . map_or ( false , |& x| x == 0 ) {
471437 read_pageptr_from_file_chunk ( chunk_file2) ?
472438 } else {
473- PagePtr :: default ( )
439+ Page :: default ( )
474440 } ;
475441
476442 let chtm = CHPage {
@@ -506,7 +472,7 @@ impl<'a> CHPage<'a> {
506472 }
507473}
508474
509- impl < ' a > K2Compact for CHPage < ' a > {
475+ impl K2Compact for CHPage {
510476 fn get_idx_mask ( & self ) -> usize {
511477 let idx_bits = ( ( self . config . hash_capacity as f64 ) . log2 ( ) . ceil ( ) as usize ) . max ( 1 ) ;
512478 ( 1 << idx_bits) - 1
0 commit comments