2 * Copyright (C) 2017 Cisco Inc.
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2,
6 * as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 // @author Changxue Deng <chadeng@cisco.com>
27 #include "integer_4b_5b.h"
29 #include "resource_pool.h"
30 #include "async_writer.h"
32 #define OFFSET_SIZE_P1 7
34 #define MAX_BUFFER_RESERVE_SIZE 8192
35 #define NUM_BUFFER_RESERVE MAX_BUFFER_RESERVE_SIZE/BUFFER_ALIGNMENT
39 /////////////////////////////////////////////////////////////////////////////////////
40 /////////////////////////////////////////////////////////////////////////////////////
41 /////////////////////////////////////////////////////////////////////////////////////
43 // Edge size is 13 bytes.
44 // X************ leading byte of edge key offset
45 // *XXXX******** edge key string or edge key offset
46 // *****X******* edge key length
47 // ******X****** flag (0x01) indicating the data offset; This bit can used to
48 // determine if the node if a leaf-node.
49 // flag (0x02) indicating this edge owns the allocated bufifer
50 // *******X***** leading byte of next node offset or data offset
51 // ********xxxxX next node offset of data offset
52 /////////////////////////////////////////////////////////////////////////////////////
54 // Node size is 1 + 1 + 6 + NT + NT*13
55 // X************ flags (0x01 bit indicating match found)
56 // *X*********** nt-1, nt is the number of edges for this node.
57 // **XXXXXX***** data offset
58 // NT bytes of first characters of each edge
59 // NT edges NT*13 bytes
60 // Since we use 6-byte to store both the index and data offset, the maximum size for
61 // data and index is 281474976710655 bytes (or 255T).
62 /////////////////////////////////////////////////////////////////////////////////////
63 /////////////////////////////////////////////////////////////////////////////////////
64 /////////////////////////////////////////////////////////////////////////////////////
67 DictMem::DictMem(const std::string &mbdir, bool init_header, size_t memsize,
68 int mode, uint32_t block_size, int max_num_blk, uint32_t queue_size)
77 assert(sizeof(IndexHeader) <= (unsigned) RollableFile::page_size);
79 bool create_hdr = false;
80 size_t hdr_size = RollableFile::page_size;
81 if(mode & CONSTS::ACCESS_MODE_WRITER)
84 hdr_size += sizeof(AsyncNode) * queue_size;
86 header_file = ResourcePool::getInstance().OpenFile(mbdir + "_mabain_h",
91 if(header_file == NULL)
93 std::cerr << "failed top open header file: " << mbdir + "_mabain_h\n";
96 header = reinterpret_cast<IndexHeader *>(header_file->GetMapAddr());
100 // Both reader and writer need to open the mmapped file.
103 if(block_size != 0 && header->index_block_size != block_size)
105 std::cerr << "mabain index block size not match " << block_size << ": "
106 << header->index_block_size << std::endl;
108 throw (int) MBError::INVALID_SIZE;
113 memset(header, 0, sizeof(IndexHeader));
114 header->index_block_size = block_size;
116 kv_file = new RollableFile(mbdir + "_mabain_i",
117 static_cast<size_t>(header->index_block_size),
118 memsize, mode, max_num_blk);
120 kv_file->InitShmSlidingAddr(&header->shm_index_sliding_start);
122 if(!(mode & CONSTS::ACCESS_MODE_WRITER))
125 if(header->async_queue_size == 0)
127 std::cerr << "async_queue_size not set yet in header\n";
134 ////////////////////////////////////
135 // Writor only initialization below
136 ////////////////////////////////////
138 node_size = new int[NUM_ALPHABET];
140 for(int i = 0; i < NUM_ALPHABET; i++)
143 node_size[i] = 1 + 1 + OFFSET_SIZE + nt + nt*EDGE_SIZE;
146 node_ptr = new uint8_t[ node_size[NUM_ALPHABET-1] ];
147 free_lists = new FreeList(mbdir+"_ibfl", BUFFER_ALIGNMENT, NUM_BUFFER_RESERVE);
152 header->version[0] = version[0];
153 header->version[1] = version[1];
154 header->version[2] = version[2];
155 header->version[3] = 0;
156 // Cannot set is_valid to true.
157 // More init to be dobe in InitRootNode.
163 Logger::Log(LOG_LEVEL_INFO, "set up mabain db version to %u.%u.%u",
164 header->version[0], header->version[1], header->version[2]);
167 // The whole edge is initizlized to zero.
168 const uint8_t DictMem::empty_edge[] = {0};
170 void DictMem::InitRootNode()
173 assert(header != NULL);
175 header->m_index_offset = 0;
180 node_move = ReserveNode(NUM_ALPHABET-1, root_offset, root_node);
182 assert(root_offset == 0);
185 root_node[0] = FLAG_NODE_NONE;
186 root_node[1] = NUM_ALPHABET-1;
187 for(int i = 0; i < NUM_ALPHABET; i++)
189 root_node[NODE_EDGE_KEY_FIRST+i] = static_cast<uint8_t>(i);
193 WriteData(root_node, node_size[NUM_ALPHABET-1], root_offset);
195 // Everything is running fine if reaching this point.
203 void DictMem::Destroy()
208 if(free_lists != NULL)
211 if(node_size != NULL)
217 bool DictMem::IsValid() const
223 void DictMem::AddRootEdge(EdgePtrs &edge_ptrs, const uint8_t *key,
224 int len, size_t data_offset)
226 edge_ptrs.len_ptr[0] = len;
227 if(len > LOCAL_EDGE_LEN)
230 ReserveData(key+1, len-1, edge_str_off);
231 Write5BInteger(edge_ptrs.ptr, edge_str_off);
235 memcpy(edge_ptrs.ptr, key+1, len-1);
238 edge_ptrs.flag_ptr[0] = EDGE_FLAG_DATA_OFF;
239 Write6BInteger(edge_ptrs.offset_ptr, data_offset);
242 header->excep_lf_offset = edge_ptrs.offset;
243 header->excep_updating_status = EXCEP_STATUS_ADD_EDGE;
244 lfree->WriterLockFreeStart(edge_ptrs.offset);
246 WriteEdge(edge_ptrs);
248 lfree->WriterLockFreeStop();
249 header->excep_updating_status = EXCEP_STATUS_NONE;
253 void DictMem::UpdateTailEdge(EdgePtrs &edge_ptrs, int match_len, MBData &data,
254 EdgePtrs &tail_edge, uint8_t &new_key_first,
255 bool &map_new_sliding)
257 int edge_len = edge_ptrs.len_ptr[0] - match_len;
258 tail_edge.len_ptr[0] = edge_len;
259 if(edge_len > LOCAL_EDGE_LEN)
261 // Old key len must be greater than LOCAL_EDGE_LEN too.
262 // Load the string with length edge_len.
264 size_t edge_str_off = Get5BInteger(edge_ptrs.ptr);
265 if(ReadData(data.node_buff, edge_len, edge_str_off + match_len - 1)
267 throw (int) MBError::READ_ERROR;
268 new_key_first = data.node_buff[0];
270 // Reserve the key buffer
271 ReserveData(data.node_buff+1, edge_len-1, new_key_off, map_new_sliding);
272 map_new_sliding = false;
273 Write5BInteger(tail_edge.ptr, new_key_off);
277 if(edge_ptrs.len_ptr[0] > LOCAL_EDGE_LEN)
279 if(ReadData(data.node_buff, edge_len, Get5BInteger(edge_ptrs.ptr)+match_len-1)
281 throw (int) MBError::READ_ERROR;
282 new_key_first = data.node_buff[0];
284 memcpy(tail_edge.ptr, data.node_buff+1, edge_len-1);
288 // Both new and old keys are local
289 new_key_first = edge_ptrs.ptr[match_len - 1];
291 memcpy(tail_edge.ptr, edge_ptrs.ptr+match_len, edge_len-1);
295 // 7 = 1 + OFFSET_SIZE
296 memcpy(tail_edge.flag_ptr, edge_ptrs.flag_ptr, OFFSET_SIZE_P1);
299 //The old edge becomes head edge.
300 void DictMem::UpdateHeadEdge(EdgePtrs &edge_ptrs, int match_len,
301 MBData &data, int &release_buffer_size,
302 size_t &edge_str_off, bool &map_new_sliding)
304 int match_len_m1 = match_len - 1;
305 if(edge_ptrs.len_ptr[0] > LOCAL_EDGE_LEN)
307 if(match_len <= LOCAL_EDGE_LEN)
309 edge_str_off = Get5BInteger(edge_ptrs.ptr);
310 release_buffer_size = edge_ptrs.len_ptr[0] - 1;
311 // Old key is remote but new key is local. Need to read the old key.
314 if(ReadData(edge_ptrs.ptr, match_len_m1, edge_str_off) != match_len_m1)
315 throw (int) MBError::READ_ERROR;
320 edge_str_off = Get5BInteger(edge_ptrs.ptr);
321 release_buffer_size = edge_ptrs.len_ptr[0] - 1;
322 // Load the string with length edge_len - 1
323 if(ReadData(data.node_buff, match_len_m1, edge_str_off) != match_len_m1)
324 throw (int) MBError::READ_ERROR;
325 // Reserve the key buffer
327 ReserveData(data.node_buff, match_len_m1, new_key_off, map_new_sliding);
328 map_new_sliding = false;
329 Write5BInteger(edge_ptrs.ptr, new_key_off);
333 edge_ptrs.len_ptr[0] = match_len;
334 edge_ptrs.flag_ptr[0] = 0;
337 // Insert a new node on the current edge
338 // The old edge becomes two edges.
339 int DictMem::InsertNode(EdgePtrs &edge_ptrs, int match_len,
340 size_t data_offset, MBData &data)
343 EdgePtrs new_edge_ptrs;
346 bool map_new_sliding = false;
348 // The new node has one edge. nt1 = nt - 1 = 0
349 node_move = ReserveNode(0, node_ptrs.offset, node);
351 map_new_sliding = true;
353 InitNodePtrs(node, 0, node_ptrs);
355 InitEdgePtrs(node_ptrs, 0, new_edge_ptrs);
357 uint8_t new_key_first;
358 UpdateTailEdge(edge_ptrs, match_len, data, new_edge_ptrs, new_key_first,
361 int release_buffer_size = 0;
362 size_t edge_str_off = 0;
363 UpdateHeadEdge(edge_ptrs, match_len, data, release_buffer_size, edge_str_off,
365 Write6BInteger(edge_ptrs.offset_ptr, node_ptrs.offset);
367 // Update the new node
368 // match found for the new node
369 node[0] = FLAG_NODE_NONE | FLAG_NODE_MATCH;
370 // Update data offset in the node
371 Write6BInteger(node_ptrs.ptr+2, data_offset);
372 // Update the first character in edge key
373 node_ptrs.edge_key_ptr[0] = new_key_first;
376 WriteData(node, node_size[0], node_ptrs.offset);
378 if(release_buffer_size > 0)
379 ReleaseBuffer(edge_str_off, release_buffer_size);
381 header->excep_lf_offset = edge_ptrs.offset;
382 header->excep_updating_status = EXCEP_STATUS_ADD_EDGE;
383 lfree->WriterLockFreeStart(edge_ptrs.offset);
385 WriteEdge(edge_ptrs);
387 lfree->WriterLockFreeStop();
388 header->excep_updating_status = EXCEP_STATUS_NONE;
392 return MBError::SUCCESS;
395 // Insert a new node on the current edge.
396 // Add a new edge to the new node. The new node will have two edges.
397 // The old edge becomes two edges.
398 int DictMem::AddLink(EdgePtrs &edge_ptrs, int match_len, const uint8_t *key,
399 int key_len, size_t data_off, MBData &data)
402 EdgePtrs new_edge_ptrs[2];
405 bool map_new_sliding = false;
407 // The new node has two edge. nt1 = nt - 1 = 1
408 node_move = ReserveNode(1, node_ptrs.offset, node);
410 map_new_sliding = true;
411 InitNodePtrs(node, 1, node_ptrs);
412 node[0] = FLAG_NODE_NONE;
414 InitEdgePtrs(node_ptrs, 0, new_edge_ptrs[0]);
415 InitEdgePtrs(node_ptrs, 1, new_edge_ptrs[1]);
417 uint8_t new_key_first;
418 UpdateTailEdge(edge_ptrs, match_len, data, new_edge_ptrs[0], new_key_first,
421 int release_buffer_size = 0;
423 UpdateHeadEdge(edge_ptrs, match_len, data, release_buffer_size, edge_str_off,
425 Write6BInteger(edge_ptrs.offset_ptr, node_ptrs.offset);
427 // Update the new node
428 // match not found for the new node, should not set node[1] and data offset
429 node_ptrs.edge_key_ptr[0] = new_key_first;
430 node_ptrs.edge_key_ptr[1] = key[0];
432 // Update the new edge
433 new_edge_ptrs[1].len_ptr[0] = key_len;
434 if(key_len > LOCAL_EDGE_LEN)
437 ReserveData(key+1, key_len-1, new_key_off, map_new_sliding);
438 Write5BInteger(new_edge_ptrs[1].ptr, new_key_off);
444 memcpy(new_edge_ptrs[1].ptr, key+1, key_len-1);
446 // Indicate this new edge holds a data offset
447 new_edge_ptrs[1].flag_ptr[0] = EDGE_FLAG_DATA_OFF;
448 Write6BInteger(new_edge_ptrs[1].offset_ptr, data_off);
451 WriteData(node, node_size[1], node_ptrs.offset);
453 // Update the parent edge
454 if(release_buffer_size > 0)
455 ReleaseBuffer(edge_str_off, release_buffer_size);
457 header->excep_lf_offset = edge_ptrs.offset;
458 header->excep_updating_status = EXCEP_STATUS_ADD_EDGE;
459 lfree->WriterLockFreeStart(edge_ptrs.offset);
461 WriteEdge(edge_ptrs);
463 lfree->WriterLockFreeStop();
464 header->excep_updating_status = EXCEP_STATUS_NONE;
467 header->n_edges += 2;
468 return MBError::SUCCESS;
471 // Add a new edge in current node
472 // This invloves creating a new node and copying data from old node to the new node
473 // and updating the child node offset in edge_ptrs (parent edge).
474 int DictMem::UpdateNode(EdgePtrs &edge_ptrs, const uint8_t *key, int key_len,
477 int nt = edge_ptrs.curr_nt + 1;
481 bool map_new_sliding = false;
483 node_move = ReserveNode(nt, node_ptrs.offset, node);
485 map_new_sliding = true;
486 InitNodePtrs(node, nt, node_ptrs);
489 size_t old_node_off = Get6BInteger(edge_ptrs.offset_ptr);
490 int release_node_index = -1;
493 // Change from empty node to node with one edge
494 // The old empty node stored the data offset instead of child node off.
495 if(edge_ptrs.flag_ptr[0] & EDGE_FLAG_DATA_OFF)
497 Write6BInteger(node_ptrs.ptr+2, old_node_off);
498 edge_ptrs.flag_ptr[0] &= ~EDGE_FLAG_DATA_OFF;
499 node[0] = FLAG_NODE_MATCH | FLAG_NODE_NONE;
509 int copy_size = NODE_EDGE_KEY_FIRST + nt;
510 if(ReadData(node_ptrs.ptr, copy_size, old_node_off) != copy_size)
511 return MBError::READ_ERROR;
512 if(ReadData(node_ptrs.ptr+copy_size+1, EDGE_SIZE*nt, old_node_off+copy_size) !=
514 return MBError::READ_ERROR;
516 release_node_index = nt - 1;
519 node[1] = static_cast<uint8_t>(nt);
521 // Update the first edge key character for the new edge
522 node_ptrs.edge_key_ptr[nt] = key[0];
524 Write6BInteger(edge_ptrs.offset_ptr, node_ptrs.offset);
526 // Create the new edge
527 EdgePtrs new_edge_ptrs;
528 InitEdgePtrs(node_ptrs, nt, new_edge_ptrs);
529 new_edge_ptrs.len_ptr[0] = key_len;
530 if(key_len > LOCAL_EDGE_LEN)
533 ReserveData(key+1, key_len-1, new_key_off, map_new_sliding);
534 Write5BInteger(new_edge_ptrs.ptr, new_key_off);
540 memcpy(new_edge_ptrs.ptr, key+1, key_len-1);
543 // Indicate this new edge holds a data offset
544 new_edge_ptrs.flag_ptr[0] = EDGE_FLAG_DATA_OFF;
545 Write6BInteger(new_edge_ptrs.offset_ptr, data_off);
548 WriteData(node, node_size[nt], node_ptrs.offset);
550 if(release_node_index >= 0)
551 ReleaseNode(old_node_off, release_node_index);
553 header->excep_lf_offset = edge_ptrs.offset;
554 header->excep_updating_status = EXCEP_STATUS_ADD_EDGE;
555 lfree->WriterLockFreeStart(edge_ptrs.offset);
557 WriteEdge(edge_ptrs);
559 lfree->WriterLockFreeStop();
560 header->excep_updating_status = EXCEP_STATUS_NONE;
564 return MBError::SUCCESS;
567 bool DictMem::FindNext(const unsigned char *key, int keylen, int &match_len,
568 EdgePtrs &edge_ptr, uint8_t *key_tmp) const
570 if(edge_ptr.flag_ptr[0] & EDGE_FLAG_DATA_OFF)
572 edge_ptr.curr_nt = -1;
576 size_t node_off = Get6BInteger(edge_ptr.offset_ptr);
578 assert(node_off != 0);
580 if(ReadData(key_tmp, 1, node_off+1) != 1)
583 edge_ptr.curr_nt = nt;
585 // Load edge key first
586 node_off += NODE_EDGE_KEY_FIRST;
587 if(ReadData(key_tmp, nt, node_off) != nt)
590 for(i = 0; i < nt; i++)
592 if(key_tmp[i] == key[0])
602 edge_ptr.offset = node_off + nt + i*EDGE_SIZE;
603 if(ReadData(header->excep_buff, EDGE_SIZE, edge_ptr.offset) != EDGE_SIZE)
605 uint8_t *key_string_ptr;
606 int len = edge_ptr.len_ptr[0] - 1;
607 if(len > LOCAL_EDGE_LEN_M1)
609 if(ReadData(key_tmp, len, Get5BInteger(edge_ptr.ptr)) != len)
611 key_string_ptr = key_tmp;
615 key_string_ptr = header->excep_buff;
622 for(i = 1; i < keylen; i++)
624 if(key_string_ptr[i-1] != key[i] || i > len)
632 // Reserve buffer for a new node.
633 // The allocated in-memory buffer must be initialized to zero.
634 bool DictMem::ReserveNode(int nt, size_t &offset, uint8_t* &ptr)
637 assert(nt >= 0 && nt < 256);
640 int buf_size = free_lists->GetAlignmentSize(node_size[nt]);
641 int buf_index = free_lists->GetBufferIndex(buf_size);
645 if(free_lists->GetBufferByIndex(buf_index, offset))
648 memset(ptr, 0, buf_size);
649 header->pending_index_buff_size -= buf_size;
653 if(free_lists->GetBufferCountByIndex(buf_index) > 0)
655 offset = free_lists->RemoveBufferByIndex(buf_index);
657 memset(ptr, 0, buf_size);
658 header->pending_index_buff_size -= buf_size;
664 size_t old_off = header->m_index_offset;
665 bool node_move = false;
666 int rval = kv_file->Reserve(header->m_index_offset, buf_size, ptr);
667 if(rval != MBError::SUCCESS)
675 //Checking missing buffer due to alignment
676 if(old_off < header->m_index_offset)
678 free_lists->ReleaseAlignmentBuffer(old_off, header->m_index_offset);
679 header->pending_index_buff_size += header->m_index_offset - old_off;
682 memset(ptr, 0, buf_size);
683 offset = header->m_index_offset;
684 header->m_index_offset += buf_size;
688 // Reserve buffer for a new key
689 void DictMem::ReserveData(const uint8_t* key, int size, size_t &offset,
690 bool map_new_sliding)
692 int buf_index = free_lists->GetBufferIndex(size);
693 int buf_size = free_lists->GetAlignmentSize(size);
696 if(free_lists->GetBufferByIndex(buf_index, offset))
698 WriteData(key, size, offset);
699 header->pending_index_buff_size -= buf_size;
702 if(free_lists->GetBufferCountByIndex(buf_index) > 0)
704 offset = free_lists->RemoveBufferByIndex(buf_index);
705 WriteData(key, size, offset);
706 header->pending_index_buff_size -= buf_size;
711 size_t old_off = header->m_index_offset;
714 int rval = kv_file->Reserve(header->m_index_offset, buf_size, ptr,
716 if(rval != MBError::SUCCESS)
719 //Checking missing buffer due to alignment
720 if(old_off < header->m_index_offset)
722 free_lists->ReleaseAlignmentBuffer(old_off, header->m_index_offset);
723 header->pending_index_buff_size += header->m_index_offset - old_off;
726 offset = header->m_index_offset;
727 header->m_index_offset += buf_size;
729 memcpy(ptr, key, size);
731 WriteData(key, size, offset);
734 header->edge_str_size += buf_size;
737 // Release node buffer
738 void DictMem::ReleaseNode(size_t offset, int nt)
743 int buf_index = free_lists->GetBufferIndex(node_size[nt]);
744 int rval = free_lists->AddBufferByIndex(buf_index, offset);
745 if(rval == MBError::SUCCESS)
748 Logger::Log(LOG_LEVEL_ERROR, "failed to release node buffer");
749 header->pending_index_buff_size += free_lists->GetAlignmentSize(node_size[nt]);
752 // Release edge string buffer
753 void DictMem::ReleaseBuffer(size_t offset, int size)
755 int rval = free_lists->ReleaseBuffer(offset, size);
756 if(rval != MBError::SUCCESS)
757 Logger::Log(LOG_LEVEL_ERROR, "failed to release buffer");
759 header->edge_str_size -= free_lists->GetAlignmentSize(size);
760 header->pending_index_buff_size += free_lists->GetAlignmentSize(size);
763 int DictMem::GetRootEdge(size_t rc_off, int nt, EdgePtrs &edge_ptrs) const
766 edge_ptrs.offset = rc_off + NODE_EDGE_KEY_FIRST + NUM_ALPHABET + nt*EDGE_SIZE;
768 edge_ptrs.offset = root_offset + NODE_EDGE_KEY_FIRST + NUM_ALPHABET + nt*EDGE_SIZE;
769 if(ReadData(edge_ptrs.edge_buff, EDGE_SIZE, edge_ptrs.offset) != EDGE_SIZE)
770 return MBError::READ_ERROR;
772 InitTempEdgePtrs(edge_ptrs);
773 return MBError::SUCCESS;
776 // The temp edge is written to shared memory for handling segfault situations.
777 // When writer restarts from segfault, it will retry WriteEdge so that the DB is
778 // maintained consistently.
779 int DictMem::GetRootEdge_Writer(bool rc_mode, int nt, EdgePtrs &edge_ptrs) const
783 if(root_offset_rc == 0)
784 throw (int) MBError::UNKNOWN_ERROR;
785 edge_ptrs.offset = root_offset_rc + NODE_EDGE_KEY_FIRST + NUM_ALPHABET + nt*EDGE_SIZE;
789 edge_ptrs.offset = root_offset + NODE_EDGE_KEY_FIRST + NUM_ALPHABET + nt*EDGE_SIZE;
791 if(ReadData(header->excep_buff, EDGE_SIZE, edge_ptrs.offset) != EDGE_SIZE)
792 return MBError::READ_ERROR;
794 edge_ptrs.ptr = header->excep_buff;
795 edge_ptrs.len_ptr = edge_ptrs.ptr + EDGE_LEN_POS;
796 edge_ptrs.flag_ptr = edge_ptrs.ptr + EDGE_FLAG_POS;
797 edge_ptrs.offset_ptr = edge_ptrs.flag_ptr + 1;
798 return MBError::SUCCESS;
801 /////////////////////////////////////////////
802 // Init root node in resource collection mode
803 /////////////////////////////////////////////
804 size_t DictMem::InitRootNode_RC()
809 node_move = ReserveNode(NUM_ALPHABET-1, root_offset_rc, root_node);
810 root_node[0] = FLAG_NODE_NONE;
811 root_node[1] = NUM_ALPHABET-1;
812 for(int i = 0; i < NUM_ALPHABET; i++)
814 root_node[NODE_EDGE_KEY_FIRST+i] = static_cast<uint8_t>(i);
818 WriteData(root_node, node_size[NUM_ALPHABET-1], root_offset_rc);
820 return root_offset_rc;
823 // No need to call LokcFree for removing all DB entries.
824 // Note readers may get READ_ERROR as return, which should be expected
825 // considering the full DB is deleted.
826 int DictMem::ClearRootEdge(int nt) const
828 size_t offset = root_offset + NODE_EDGE_KEY_FIRST + NUM_ALPHABET + nt*EDGE_SIZE;
830 header->excep_lf_offset = offset;
831 header->excep_updating_status = EXCEP_STATUS_CLEAR_EDGE;
832 lfree->WriterLockFreeStart(offset);
834 WriteData(DictMem::empty_edge, EDGE_SIZE, offset);
836 lfree->WriterLockFreeStop();
837 header->excep_updating_status = EXCEP_STATUS_NONE;
840 return MBError::SUCCESS;
843 int DictMem::ClearRootEdges_RC() const
845 if(root_offset_rc == 0)
846 return MBError::INVALID_ARG;
849 for(int i = 0; i < NUM_ALPHABET; i++)
851 offset = root_offset_rc + NODE_EDGE_KEY_FIRST + NUM_ALPHABET + i*EDGE_SIZE;
853 header->excep_lf_offset = offset;
854 header->excep_updating_status = EXCEP_STATUS_CLEAR_EDGE;
855 lfree->WriterLockFreeStart(offset);
857 DRMBase::WriteData(DictMem::empty_edge, EDGE_SIZE, offset);
859 lfree->WriterLockFreeStop();
860 header->excep_updating_status = EXCEP_STATUS_NONE;
864 return MBError::SUCCESS;
867 void DictMem::ClearMem() const
869 int root_node_size = free_lists->GetAlignmentSize(node_size[NUM_ALPHABET-1]);
870 header->m_index_offset = root_offset + root_node_size;
871 header->n_states = 1; // Keep the root node
873 header->edge_str_size = 0;
875 header->pending_index_buff_size = 0;
878 int DictMem::NextEdge(const uint8_t *key, EdgePtrs &edge_ptrs, uint8_t *node_buff,
879 MBData &mbdata) const
882 // Check if need to read saved edge
883 if((mbdata.options & CONSTS::OPTION_READ_SAVED_EDGE) && edge_ptrs.offset == mbdata.edge_ptrs.offset)
884 node_off = Get6BInteger(mbdata.edge_ptrs.offset_ptr);
886 node_off = Get6BInteger(edge_ptrs.offset_ptr);
889 byte_read = ReadData(node_buff, NODE_EDGE_KEY_FIRST, node_off);
890 if(byte_read != NODE_EDGE_KEY_FIRST)
891 return MBError::READ_ERROR;
893 int nt = node_buff[1] + 1;
894 byte_read = ReadData(node_buff+NODE_EDGE_KEY_FIRST, nt, node_off+NODE_EDGE_KEY_FIRST);
896 return MBError::READ_ERROR;
898 int ret = MBError::NOT_EXIST;
899 for(int i = 0; i < nt; i++)
901 if(node_buff[i+NODE_EDGE_KEY_FIRST] == key[0])
903 if(mbdata.options & CONSTS::OPTION_FIND_AND_STORE_PARENT)
905 // update parent node/edge info for deletion
906 edge_ptrs.curr_nt = nt;
907 edge_ptrs.curr_edge_index = i;
908 edge_ptrs.parent_offset = edge_ptrs.offset;
909 edge_ptrs.curr_node_offset = node_off;
911 size_t offset_new = node_off + NODE_EDGE_KEY_FIRST + nt + i*EDGE_SIZE;
912 byte_read = ReadData(edge_ptrs.edge_buff, EDGE_SIZE, offset_new);
913 if(byte_read != EDGE_SIZE)
915 ret = MBError::READ_ERROR;
919 edge_ptrs.offset = offset_new;
920 ret = MBError::SUCCESS;
928 void DictMem::RemoveRootEdge(const EdgePtrs &edge_ptrs)
931 // Root node needs special handling.
932 if(edge_ptrs.len_ptr[0] > LOCAL_EDGE_LEN)
933 ReleaseBuffer(Get5BInteger(edge_ptrs.ptr), edge_ptrs.len_ptr[0]-1);
935 header->excep_lf_offset = edge_ptrs.offset;
936 header->excep_updating_status = EXCEP_STATUS_CLEAR_EDGE;
937 lfree->WriterLockFreeStart(edge_ptrs.offset);
939 WriteData(DictMem::empty_edge, EDGE_SIZE, edge_ptrs.offset);
941 lfree->WriterLockFreeStop();
942 header->excep_updating_status = EXCEP_STATUS_NONE;
946 int DictMem::RemoveEdgeSizeN(const EdgePtrs &edge_ptrs,
949 uint8_t *old_node_buffer,
952 size_t parent_edge_offset)
954 // Reserve a new node with nt-1
956 size_t new_node_offset;
959 // Reserve for the new node
960 node_move = ReserveNode(nt-2, new_node_offset, node);
962 // Copy data from old node
963 uint8_t *first_key_ptr = node + NODE_EDGE_KEY_FIRST;
964 uint8_t *edge_ptr = first_key_ptr + nt - 1;
965 uint8_t old_edge_buff[16];
966 size_t old_edge_offset = node_offset + NODE_EDGE_KEY_FIRST + nt;
967 memcpy(node, old_node_buffer, NODE_EDGE_KEY_FIRST);
969 for(int i = 0; i < nt; i++)
972 if(ReadData(old_edge_buff, EDGE_SIZE, old_edge_offset) != EDGE_SIZE)
973 return MBError::READ_ERROR;
975 if(i == edge_ptrs.curr_edge_index)
977 // Need to release this edge string buffer
978 if(old_edge_buff[EDGE_LEN_POS] > LOCAL_EDGE_LEN)
980 str_off_rel = Get5BInteger(old_edge_buff);
981 str_size_rel = old_edge_buff[EDGE_LEN_POS]-1;
986 first_key_ptr[0] = old_node_buffer[NODE_EDGE_KEY_FIRST+i];
987 memcpy(edge_ptr, old_edge_buff, EDGE_SIZE);
990 edge_ptr += EDGE_SIZE;
992 old_edge_offset += EDGE_SIZE;
995 // Write the new node before free
997 WriteData(node, node_size[nt-2], new_node_offset);
999 // Update the link from parent edge to the new node offset
1000 Write6BInteger(header->excep_buff, new_node_offset);
1001 #ifdef __LOCK_FREE__
1002 lfree->WriterLockFreeStart(parent_edge_offset);
1004 WriteData(header->excep_buff, OFFSET_SIZE, parent_edge_offset+EDGE_NODE_LEADING_POS);
1005 #ifdef __LOCK_FREE__
1006 lfree->WriterLockFreeStop();
1009 return MBError::SUCCESS;
1012 int DictMem::RemoveEdgeSizeOne(uint8_t *old_node_buffer,
1013 size_t parent_edge_offset,
1016 size_t &str_off_rel,
1019 int rval = MBError::SUCCESS;
1021 if(old_node_buffer[0] & FLAG_NODE_MATCH)
1023 uint8_t *parent_edge_buff = header->excep_buff;
1024 size_t data_offset = Get6BInteger(old_node_buffer+2);
1025 parent_edge_buff[0] = EDGE_FLAG_DATA_OFF;
1026 Write6BInteger(parent_edge_buff+1, data_offset);
1027 #ifdef __LOCK_FREE__
1028 lfree->WriterLockFreeStart(parent_edge_offset);
1030 // Write the one-byte flag and 6-byte offset
1031 WriteData(parent_edge_buff, OFFSET_SIZE_P1, parent_edge_offset+EDGE_FLAG_POS);
1032 #ifdef __LOCK_FREE__
1033 lfree->WriterLockFreeStop();
1038 // This is an internal node.
1039 rval = MBError::TRY_AGAIN;
1042 uint8_t old_edge_buff[16];
1043 size_t old_edge_offset = node_offset + NODE_EDGE_KEY_FIRST + nt;
1044 if(ReadData(old_edge_buff, EDGE_SIZE, old_edge_offset) != EDGE_SIZE)
1045 return MBError::READ_ERROR;
1046 if(old_edge_buff[EDGE_LEN_POS] > LOCAL_EDGE_LEN)
1048 str_off_rel = Get5BInteger(old_edge_buff);
1049 str_size_rel = old_edge_buff[EDGE_LEN_POS]-1;
1055 int DictMem::RemoveEdgeByIndex(const EdgePtrs &edge_ptrs, MBData &data)
1057 header->excep_offset = edge_ptrs.curr_node_offset;
1059 if(header->excep_offset == root_offset)
1061 RemoveRootEdge(edge_ptrs);
1062 return MBError::SUCCESS;
1065 int nt = edge_ptrs.curr_nt;
1066 if(nt < 1) return MBError::INVALID_ARG;
1068 uint8_t *old_node_buffer = data.node_buff;
1069 // load the current node
1070 if(ReadData(old_node_buffer, NODE_EDGE_KEY_FIRST + nt, edge_ptrs.curr_node_offset)
1071 != NODE_EDGE_KEY_FIRST + nt)
1072 return MBError::READ_ERROR;
1074 int rval = MBError::SUCCESS;
1076 int str_size_rel = 0;
1078 header->excep_lf_offset = edge_ptrs.parent_offset;
1079 header->excep_updating_status = EXCEP_STATUS_REMOVE_EDGE;
1082 rval = RemoveEdgeSizeN(edge_ptrs, nt, header->excep_offset, old_node_buffer,
1083 str_off_rel, str_size_rel, header->excep_lf_offset);
1087 rval = DictMem::RemoveEdgeSizeOne(old_node_buffer, header->excep_lf_offset,
1088 header->excep_offset, nt, str_off_rel, str_size_rel);
1090 header->excep_updating_status = EXCEP_STATUS_NONE;
1093 ReleaseNode(header->excep_offset, nt-1);
1094 if(str_size_rel > 0)
1095 ReleaseBuffer(str_off_rel, str_size_rel);
1098 #ifdef __LOCK_FREE__
1099 header->excep_lf_offset = edge_ptrs.offset;
1100 header->excep_updating_status = EXCEP_STATUS_CLEAR_EDGE;
1101 lfree->WriterLockFreeStart(edge_ptrs.offset);
1103 WriteData(DictMem::empty_edge, EDGE_SIZE, edge_ptrs.offset);
1104 #ifdef __LOCK_FREE__
1105 lfree->WriterLockFreeStop();
1106 header->excep_updating_status = EXCEP_STATUS_NONE;
1112 // Should only be called by writer
1113 void DictMem::PrintStats(std::ostream &out_stream) const
1118 out_stream << "Dict Memory Stats:" << std::endl;
1119 out_stream << "\tIndex size: " << header->m_index_offset << std::endl;
1120 out_stream << "\tIndex block size: " << header->index_block_size << std::endl;
1121 out_stream << "\tNumber of edges: " << header->n_edges << std::endl;
1122 out_stream << "\tNumber of nodes: " << header->n_states << std::endl;
1123 out_stream << "\tEdge string size: " << header->edge_str_size << std::endl;
1124 out_stream << "\tEdge size: " << header->n_edges*EDGE_SIZE << std::endl;
1125 out_stream << "\tException flag: " << header->excep_updating_status << std::endl;
1126 out_stream << "\tPending Buffer Size: " << header->pending_index_buff_size << std::endl;
1127 if(free_lists != NULL)
1128 out_stream << "\tTrackable Buffer Size: " << free_lists->GetTotSize() << std::endl;
1129 kv_file->PrintStats(out_stream);
1132 const int* DictMem::GetNodeSizePtr() const
1137 void DictMem::ResetSlidingWindow() const
1139 kv_file->ResetSlidingWindow();
1141 header->shm_index_sliding_start.store(0, std::memory_order_relaxed);
1144 void DictMem::InitLockFreePtr(LockFree *lf)
1149 void DictMem::Flush() const
1153 if(header_file != NULL)
1154 header_file->Flush();
1157 void DictMem::WriteData(const uint8_t *buff, unsigned len, size_t offset) const
1159 if(offset + len > header->m_index_offset)
1161 std::cerr << "invalid dmm write: " << offset << " " << len << " "
1162 << header->m_index_offset << "\n";
1163 throw (int) MBError::OUT_OF_BOUND;
1166 if(kv_file->RandomWrite(buff, len, offset) != len)
1167 throw (int) MBError::WRITE_ERROR;