Skip to content

Commit e748b57

Browse files
author
dagou
committed
bug fix
1 parent 669fea1 commit e748b57

File tree

1 file changed

+42
-76
lines changed

1 file changed

+42
-76
lines changed

kr2r/src/compact_hash.rs

Lines changed: 42 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,16 @@ pub struct Page {
172172
pub data: Vec<u32>,
173173
}
174174

175+
impl Default for Page {
176+
fn default() -> Self {
177+
Self {
178+
index: 1,
179+
size: 1,
180+
data: vec![0],
181+
}
182+
}
183+
}
184+
175185
impl Page {
176186
pub fn new(index: usize, size: usize, data: Vec<u32>) -> Self {
177187
Self { index, size, data }
@@ -186,38 +196,6 @@ impl Page {
186196
}
187197
}
188198

189-
pub struct PagePtr<'a> {
190-
#[allow(dead_code)]
191-
mmap: Option<Mmap>,
192-
pub index: usize,
193-
pub size: usize,
194-
pub data: &'a [u32],
195-
}
196-
197-
impl<'a> PagePtr<'a> {
198-
pub fn new(mmap: Option<Mmap>, index: usize, size: usize, data: &'a [u32]) -> Self {
199-
Self {
200-
mmap,
201-
index,
202-
size,
203-
data,
204-
}
205-
}
206-
}
207-
208-
impl<'a> Default for PagePtr<'a> {
209-
fn default() -> Self {
210-
// 创建一个包含0的静态数组
211-
static DEFAULT_DATA: [u32; 1] = [0];
212-
Self {
213-
mmap: None,
214-
index: 1,
215-
size: 1,
216-
data: &DEFAULT_DATA,
217-
}
218-
}
219-
}
220-
221199
use std::fmt::{self, Debug};
222200

223201
#[derive(Clone, Copy)]
@@ -353,11 +331,11 @@ pub trait K2Compact: std::marker::Sync + Send {
353331
}
354332

355333
#[allow(unused)]
356-
pub struct CHPage<'a> {
334+
pub struct CHPage {
357335
// 哈希表的容量
358336
pub config: HashConfig,
359337
pub page: Page,
360-
pub next_page: PagePtr<'a>,
338+
pub next_page: Page,
361339
}
362340

363341
fn _read_page_from_file<P: AsRef<Path>>(filename: P) -> Result<Page> {
@@ -406,71 +384,59 @@ fn read_page_from_file<P: AsRef<Path>>(filename: P) -> Result<Page> {
406384
Ok(Page::new(index, capacity, data))
407385
}
408386

409-
fn _read_pageptr_from_file<'a, P: AsRef<Path>>(filename: P) -> Result<PagePtr<'a>> {
410-
let file = OpenOptions::new().read(true).open(&filename)?;
411-
let mmap = unsafe { MmapOptions::new().populate().map(&file)? };
412-
let index = LittleEndian::read_u64(&mmap[0..8]) as usize;
413-
let capacity = LittleEndian::read_u64(&mmap[8..16]) as usize;
414-
415-
let raw_page_data =
416-
unsafe { std::slice::from_raw_parts(mmap.as_ptr().add(16) as *const u32, capacity) };
417-
418-
let first_zero_end = raw_page_data
419-
.iter()
420-
.position(|&x| x == 0)
421-
.map(|pos| pos + 1)
422-
.unwrap_or(capacity);
423-
let page_data = &raw_page_data[..first_zero_end];
424-
425-
// let page_data =
426-
// unsafe { std::slice::from_raw_parts(mmap.as_ptr().add(16) as *const u32, capacity) };
427-
428-
Ok(PagePtr::new(Some(mmap), index, first_zero_end, page_data))
429-
}
387+
fn read_pageptr_from_file_chunk<P: AsRef<Path>>(filename: P) -> Result<Page> {
388+
let mut file = std::fs::File::open(filename)?;
430389

431-
fn read_pageptr_from_file_chunk<'a, P: AsRef<Path>>(filename: P) -> Result<PagePtr<'a>> {
432-
let file = OpenOptions::new().read(true).open(&filename)?;
433-
let mmap = unsafe { MmapOptions::new().populate().map(&file)? };
434-
let index = LittleEndian::read_u64(&mmap[0..8]) as usize;
435-
let capacity = LittleEndian::read_u64(&mmap[8..16]) as usize;
390+
// Read the index and capacity
391+
let mut buffer = [0u8; 16];
392+
file.read_exact(&mut buffer)?;
393+
let index = LittleEndian::read_u64(&buffer[0..8]) as usize;
394+
let capacity = LittleEndian::read_u64(&buffer[8..16]) as usize;
436395

437396
let mut first_zero_end = capacity;
438-
let chunk_size = 1024; // 定义每次读取的块大小
397+
let chunk_size = 1024; // Define the chunk size for reading
439398
let mut found_zero = false;
440-
441-
for i in (0..capacity).step_by(chunk_size) {
442-
let end = usize::min(i + chunk_size, capacity);
443-
let chunk = unsafe {
444-
std::slice::from_raw_parts(mmap.as_ptr().add(16 + i * 4) as *const u32, end - i)
445-
};
446-
if let Some(pos) = chunk.iter().position(|&x| x == 0) {
447-
first_zero_end = i + pos + 1;
399+
let mut data = vec![0u32; capacity];
400+
let mut read_pos = 0;
401+
402+
while read_pos < capacity {
403+
let end = usize::min(read_pos + chunk_size, capacity);
404+
let bytes_to_read = (end - read_pos) * std::mem::size_of::<u32>();
405+
let mut chunk = vec![0u8; bytes_to_read];
406+
file.read_exact(&mut chunk)?;
407+
let chunk_u32 =
408+
unsafe { std::slice::from_raw_parts(chunk.as_ptr() as *const u32, end - read_pos) };
409+
data[read_pos..end].copy_from_slice(chunk_u32);
410+
411+
if let Some(pos) = chunk_u32.iter().position(|&x| x == 0) {
412+
first_zero_end = read_pos + pos + 1;
448413
found_zero = true;
449414
break;
450415
}
416+
read_pos = end;
451417
}
452418

453419
if !found_zero {
454420
first_zero_end = capacity;
421+
eprintln!("Warning: No zero value found in the data, using full capacity.");
455422
}
456423

457-
let page_data =
458-
unsafe { std::slice::from_raw_parts(mmap.as_ptr().add(16) as *const u32, first_zero_end) };
424+
data.truncate(first_zero_end);
459425

460-
Ok(PagePtr::new(Some(mmap), index, first_zero_end, page_data))
426+
Ok(Page::new(index, first_zero_end, data))
461427
}
462428

463-
impl<'a> CHPage<'a> {
429+
impl CHPage {
464430
pub fn from<P: AsRef<Path> + Debug>(
465431
config: HashConfig,
466432
chunk_file1: P,
467433
chunk_file2: P,
468-
) -> Result<CHPage<'a>> {
434+
) -> Result<CHPage> {
469435
let page = read_page_from_file(chunk_file1)?;
470436
let next_page = if page.data.last().map_or(false, |&x| x == 0) {
471437
read_pageptr_from_file_chunk(chunk_file2)?
472438
} else {
473-
PagePtr::default()
439+
Page::default()
474440
};
475441

476442
let chtm = CHPage {
@@ -506,7 +472,7 @@ impl<'a> CHPage<'a> {
506472
}
507473
}
508474

509-
impl<'a> K2Compact for CHPage<'a> {
475+
impl K2Compact for CHPage {
510476
fn get_idx_mask(&self) -> usize {
511477
let idx_bits = ((self.config.hash_capacity as f64).log2().ceil() as usize).max(1);
512478
(1 << idx_bits) - 1

0 commit comments

Comments
 (0)