重建保存在文件上的huffman树



将huffman树保存到文件后,我试图再次读取和重建该相同的树,具有与以前相同的结构。我遵循这篇文章并编写了这个函数来完成:

void build_tree(BinaryTree<HuffmanNode> * tree, List<NodeTree<HuffmanNode>> * list, int start, int end) {
if(start > end)
return;
tree->insert(list->get(start)->getData().getData());
int i;
for(i = start; i <= end; i++) {
if(list->get(i)->getData().getData() > list->get(start)->getData().getData()) {
break;
}
}
build_tree(tree, list, start + 1, i - 1);
build_tree(tree, list, i, end);
}

示例中的ListBinaryTree类为:

List.h

template<class T>
class List {
private:
NodeList<T> * first;
public:
List();
~List();
void insert(T data);
void update(int index, T data);
void remove(int index);
int size();
NodeList<T> * get(int index);
void set(int index, NodeList<T> * value);
int find(T data);
void sort();
void print();
T * toArray();
};

NodeList.h

template<class T>
class NodeList {
private:
T data;
NodeList * next;
public:
NodeList();
~NodeList();
T getData();
void setData(T value);
NodeList * getNext();
void setNext(NodeList<T> * next);
void setNext(NodeList<T> next);
};

binaryTree.h

template<class T>
class BinaryTree {
protected:
NodeTree<T> * root;
public:
BinaryTree();
~BinaryTree();
NodeTree<T> * getRoot();
void setRoot(NodeTree<T> * node);
void insert(T value);
void update(T old_value, T new_value);
void remove(T value);
List<T> preOrder();
List<T> inOrder();
List<T> postOrder();
void preOrder(NodeTree<T> * node, List<T> * list);
void inOrder(NodeTree<T> * node, List<T> * list);
void postOrder(NodeTree<T> * node, List<T> * list);
List<T> level(int value);
int levelOf(T data);
int levelOf(NodeTree<T> * node, T data, int level);
int height();
int height(NodeTree<T> * node, int height);
List<T> leafs();
};

NodeTree.h

template<class T>
class NodeTree {
private:
T data;
NodeTree * left;
NodeTree * right;
public:
NodeTree();
NodeTree(T data);
~NodeTree();
T getData();
void setData(T value);
NodeTree * getLeft();
void setLeft(NodeTree<T> * left);
void setLeft(NodeTree<T> left);
NodeTree * getRight();
void setRight(NodeTree<T> * right);
void setRight(NodeTree<T> right);
};

HuffmanNode的实现如下:

struct HuffmanNode {
char data;
int frequency;
bool operator==(HuffmanNode other) { return this->data == other.data; }
bool operator==(char data) { return this->data == data; }
bool operator!=(HuffmanNode other) { return this->data != other.data; }
bool operator!=(char data) { return this->data != data; }
bool operator<(HuffmanNode other) { return frequency < other.frequency; }
bool operator<=(HuffmanNode other) { return frequency <= other.frequency; }
bool operator>(HuffmanNode other) { return frequency > other.frequency; }
bool operator>=(HuffmanNode other) { return frequency >= other.frequency; }
HuffmanNode operator++() { this->frequency++; return *this; }
HuffmanNode operator++(int) { this->frequency++; return *this; }
HuffmanNode operator--() { this->frequency--; return *this; }
HuffmanNode operator--(int) { this->frequency--; return *this; }
friend ostream &operator<<( ostream &output, const HuffmanNode &node ) { output << node.data << " ( " << node.frequency << " ) "; return output; }
friend istream &operator>>( istream  &input, HuffmanNode &node ) { input >> node.data >> node.frequency; return input; }
};
typedef struct HuffmanNode HuffmanNode;

谁能告诉我我在这里做错了什么?完整代码:https://pastebin.com/5dfHRhLe

经过一些搜索,我根据我在这里和这里找到的答案更改了我的代码。第二题,我用的两个函数和答案中显示的完全一样。我试着让第一个适应我的用例,但我怀疑这是错误的。

对于写入文件,我有:

getCode(&encodeTable, toEncode.getRoot());
if (output.is_open()) {
List<HuffmanNode> list = toEncode.preOrder();
vector<bool> bits;
for(int i=1; i<=list.size(); i++) {
HuffmanNode node = list.get(i)->getData();
if(node.data == 0x0)
bits.push_back(true);
else
bits.push_back(false);
}
cout << endl;
binary_write(output, bits);
for(long unsigned int i=1; i<=bits.size(); i++) {
HuffmanNode node = list.get(i)->getData();
if(node.data != 0x0) {
char c = node.data;
output.write(&c, sizeof(c));
}
}
input.clear();
input.seekg(0, ios::beg);
string encoded_file = "";
char c;
while (input.get(c))
if(encodeTable.get(c) != nullptr)
encoded_file = encoded_file + encodeTable.get(c)->getValue();
if(encoded_file.length() % 8 != 0)
encoded_file = encoded_file + getSubstring(encoded_file.length() % 8);
for(long unsigned int i=0; i<encoded_file.length(); i+=8) {
string data = encoded_file.substr(i, 8);
bitset<8> b(data);
unsigned long x = b.to_ulong();
unsigned char c = static_cast<unsigned char>( x );
output.write(reinterpret_cast<char*>(&c), sizeof(c));
}
}

用于从文件中读取比特,我有:

string coded_file = "";
if (input.is_open()) {
vector<bool> bits;
binary_read(input, bits);
toDecode.insert(HuffmanNode());
NodeTree<HuffmanNode> * temp = toDecode.getRoot();
for(long unsigned int i=1; i<bits.size();) {
if(bits[i++]) {
temp->setLeft(NodeTree<HuffmanNode>());
temp = temp->getLeft();
} else {
char c;
input.read(&c, sizeof(c));
HuffmanNode node;
node.data = c;
temp->setLeft(NodeTree<HuffmanNode>(node));
}
if(bits[i++]) {
temp->setRight(NodeTree<HuffmanNode>());
temp = temp->getRight();
} else {
char c;
input.read(&c, sizeof(c));
HuffmanNode node;
node.data = c;
temp->setRight(NodeTree<HuffmanNode>(node));
}
}
char c;
while(input.read(reinterpret_cast<char*>(&c), sizeof(c))) {
bitset<8> b(c);
coded_file = coded_file + b.to_string();
}
}
getCode(&decodeTable, toDecode.getRoot());

你遇到的问题是如何序列化随机内存块(在这种情况下是树节点),在你的例子中,你试图编码内容,但这是没有必要的,因为你可以只写内存块,你没有在文件中包含必要的数据来重建树。

本例使用唯一索引来标识每个节点,因此您可以以任意顺序保存它们,并在重建时使用

查询它们:
int left_child_index = parent_index * 2 + 1;
int right_child_index = parent_index * 2 + 2;
#include <iostream>
#include <fstream>
#include <vector>
template<typename V> 
struct Node {
size_t    size = 0; // weight
V         data  = NULL;
Node<V> * left  = nullptr;
Node<V> * right = nullptr;
// New data node
Node(V value, size_t weight){
this->size = weight; 
this->data = value;
};
// New parent node
Node(Node<V> * left, Node<V> * right){
this->size = right->size + left->size;
this->right = right;
this->left = left; 
};
};
template<typename V> 
class Huffman {
struct save_chunk {
size_t index;
size_t weight;
V data;
};
static void save_nodes(std::ostream * out, Node<V> * node, int i){
if(node->left != nullptr){
save_nodes(out, node->left, i * 2 + 1);
}
if(node->right != nullptr){
save_nodes(out, node->right, i * 2 + 2);
}
// it is recursive, but still one threaded
// so making only one instance for all calls
// is a great optimization
static save_chunk chunk;
chunk.weight = node->size;
chunk.data = node->data;
chunk.index = i;
out->write((const char *)&chunk, sizeof(save_chunk));
}
static void load_nodes(std::vector<save_chunk*> & chunks, Node<V> * node, int _e){
size_t i; 
int l = _e * 2 + 1;
int r = _e * 2 + 2;
for (i = 0; i < chunks.size(); i++){
// if the next node is found join it 
// to the parent node
if (chunks[i]->index == l){
node->left = new Node<V>(
chunks[i]->data, 
chunks[i]->weight
);
chunks[i] = chunks[chunks.size() - 1];
chunks.pop_back();  
// repeat to the posible e child nodes
load_nodes(chunks, node->left, l);
break;
}
}
for (i = 0; i < chunks.size(); i++){
if (chunks[i]->index == r){
node->right = new Node<V>(
chunks[i]->data, 
chunks[i]->weight
);
chunks[i] = chunks[chunks.size() - 1];
chunks.pop_back();  
load_nodes(chunks, node->right, r);
break;
}
}
}
public:
static void save_tree(const char * path, Node<V> * tree){
std::ofstream file;
file.open(path, std::ofstream::binary);
file.seekp(0);  // move the cursor to the start
save_nodes(&file, tree, 0);
file.close();
}
static Node<V> * load_tree(const char * path){
std::ifstream file;
file.open(path, std::ifstream::binary);
// obtaind how log is the file, the length 
// is a multimple of save_chunk
file.seekg (0, file.end);
size_t len = file.tellg();
file.seekg (0, file.beg);
// this just moves the file content into 
// memory as an array of the chunks
save_chunk * chunk = nullptr;
std::vector<save_chunk*> chunks;
for (int i = 0; i < len / sizeof(save_chunk); i++){
chunk = new save_chunk;
file.read((char *)chunk, sizeof(save_chunk));
chunks.push_back(chunk);
}
// becouse its recursive, the root element
// needs to be created first
size_t i;
Node<V> * tree;
for (i = 0; i < chunks.size(); i++){
if (chunks[i]->index == 0){
break;
}
}
tree = new Node<V>(chunks[i]->data, chunks[i]->weight);
chunks[i] = chunks[chunks.size() - 1];
chunks.pop_back();  
// and just create the rest
load_nodes(chunks, tree, 0);
file.close();
return tree;
}
static Node<V> * gen_tree(std::vector<int> & weight, std::vector<V> & data){
size_t i, min;
Node<V> * selected = nullptr;
std::vector<Node<V>*> nodes;
if (weight.size() != data.size()){
return nullptr;
}
for (i = 0; i < weight.size(); i++){
nodes.push_back(new Node<V>(data[i], weight[i]));
}
while(!nodes.empty()){
min = 0;
for (i = 1; i < nodes.size(); i++){
if(nodes[i]->size < nodes[min]->size){
min = i;
}
}
if(selected != nullptr){
nodes[min] = new Node<V>(nodes[min], selected);
selected = nullptr;
} else {
selected = nodes[min];
nodes[min] = nodes[i - 1];
nodes.pop_back();  
}
}
return selected;
}
static void erase_tree(Node<V> * node, bool _root = true){
if(node->left != nullptr){
erase_tree(node->left, false);
delete node->left;
}
if(node->right != nullptr){
erase_tree(node->right, false);
delete node->right;
}
if (_root){
delete node;
}
}

static void print_tree(Node<V> * node, int _c = 0, int _i = 0){
for (int i = 0; i < _c; i++){ 
std::cout << "   "; 
}
std::cout << "(" << _i << ")" << "-> " << node->size;
if(node->data != NULL){
std::cout << " > " << node->data << std::endl;
} else {
std::cout << std::endl;
}
if(node->right != nullptr){
print_tree(node->right, _c + 1, _i * 2 + 2);
}
if(node->left != nullptr){
print_tree(node->left, _c + 1, _i * 2 + 1);
}
}
};

int main(void) {
Node<char> * tree;
std::vector<char> data = { 'a', 'b', 'c', 'd', 'e', 'f', 'g' };
std::vector<int>  wei  = { 15,  10,   8,  12,  20,  16,  25  };
tree = Huffman<char>::gen_tree(wei, data); 
std::cout << "Tree addr -> " << tree << std::endl;
Huffman<char>::save_tree("D:/Escritorio/tmp_cpp/tree.bin", tree);
Huffman<char>::print_tree(tree);
Huffman<char>::erase_tree(tree);
std::cout << std::endl;
tree = Huffman<char>::load_tree("D:/Escritorio/tmp_cpp/tree.bin");
std::cout << "Tree addr -> " << tree << std::endl;
Huffman<char>::print_tree(tree);
Huffman<char>::erase_tree(tree);
return 0;
}

的示例输出
Tree addr -> 000002DE2A6C2380
(0)-> 106
(2)-> 45
(6)-> 20 > e
(5)-> 25 > g
(1)-> 61
(4)-> 27
(10)-> 12 > d
(9)-> 15 > a
(3)-> 34
(8)-> 16 > f
(7)-> 18
(16)-> 8 > c
(15)-> 10 > b
Tree addr -> 000002DE2A6C2AA0
(0)-> 106
(2)-> 45
(6)-> 20 > e
(5)-> 25 > g
(1)-> 61
(4)-> 27
(10)-> 12 > d
(9)-> 15 > a
(3)-> 34
(8)-> 16 > f
(7)-> 18
(16)-> 8 > c
(15)-> 10 > b

最新更新