struct priority_queue
{
// A number of active (carrying data) nodes currently in the queue
unsigned int size;
// A total number of nodes in "nodes" array
unsigned int capacity;
// An array of pointers to nodes
struct priority_queue_node** nodes;
};
struct priority_queue_node
{
unsigned int weight;
};
int priority_queue_push(struct priority_queue* queue, struct priority_queue_node* node)
{
if (queue->size >= queue->capacity)
{
int new_capacity = queue->capacity * 2;
if (new_capacity == 0)
new_capacity = 1;
struct priority_queue_node** new_nodes = (struct priority_queue_node**) malloc(sizeof(struct priority_queue_node*) * new_capacity);
if (! new_nodes)
{
return 0;
}
memcpy(new_nodes, queue->nodes, sizeof(struct priority_queue_node*) * queue->size);
if (queue->capacity)
free(queue->nodes);
queue->nodes = new_nodes;
queue->capacity = new_capacity;
}
queue->nodes[queue->size++] = node;
heapify(queue);
return 1;
}
template <class T> class priority_queue
{
struct node
{
unsigned int m_weight;
T m_data;
};
using node_ptr = std::unique_ptr<node>;
std::size_t m_capacity;
std::size_t m_size;
std::unique_ptr<node_ptr[]> m_nodes;
void heapify() noexcept;
void increase_capacity();
public:
explicit priority_queue(std::size_t capacity = 16) ;
// …
};
template <class U>
push(unsigned int weight, U&& obj)
{
if (m_size >= m_capacity)
increase_capacity();
m_nodes[m_size++].reset(new node({weight, std::forward<U>(obj)}));
heapify();
}
void increase_capacity()
{
const auto new_capacity = m_capacity ? m_capacity * 2 : 1;
std::unique_ptr<node_ptr[]> new_nodes(new node_ptr[new_capacity]);
for (auto src = m_nodes.get(), dest = new_nodes.get(); src != m_nodes.get() + m_size; ++src, ++dest)
*dest = std::move(*src);
m_nodes = std::move(new_nodes);
m_capacity = new_capacity;
}
#define NODE_TYPE_TERM 1
#define NODE_TYPE_NODE 2
struct char_node_base
{
int type;
};
struct char_node_terminal
{
struct char_node_base base;
char c;
};
struct char_node
{
struct char_node_base base;
struct char_node_base* left;
struct char_node_base* right;
};
struct char_node_root
{
struct priority_queue_node pq_node;
int height;
struct char_node_base* node;
};
struct char_node_base
{
virtual ~char_node_base() = default;
};
using char_node_ptr = std::unique_ptr<char_node_base>;
struct char_node_terminal : char_node_base
{
const unsigned char m_c;
char_node_terminal(char c) noexcept : m_c(c) {}
};
struct char_node : char_node_base
{
char_node_ptr m_left;
char_node_ptr m_right;
};
struct nodes_root
{
int m_height;
char_node_ptr m_node;
};
static struct priority_queue* build_priority_queue(
char* buffer, unsigned int size)
{
unsigned char table[256];
memset(table, 0, sizeof(table));
for (unsigned int i = 0; i < size; ++i)
if (table[(unsigned char)buffer[i]] != 255)
++table[(unsigned char)buffer[i]];
struct priority_queue* queue = priority_queue_create(16);
for (unsigned short i = 0; i < 256; ++i)
{
if (table[i])
{
struct char_node_root* node = (struct char_node_root*) malloc(sizeof(struct char_node_root));
struct char_node_terminal* term = (struct char_node_terminal*) malloc(sizeof(struct char_node_terminal));
term->base.type = NODE_TYPE_TERM;
term->c = (char)i;
node->node = (struct char_node_base*) term;
node->height = 0;
node->pq_node.weight = table[i];
priority_queue_push(queue, (struct priority_queue_node*) node);
}
}
return queue;
}
static struct char_node_root* queue_to_tree(struct priority_queue* queue)
{
while (priority_queue_size(queue) > 1)
{
struct char_node_root* node1 = (struct char_node_root*) priority_queue_pop(queue);
struct char_node_root* node2 = (struct char_node_root*) priority_queue_pop(queue);
struct char_node_base* int_node1 = node1->node;
struct char_node_base* int_node2 = node2->node;
struct char_node* join_node = (struct char_node*) malloc(sizeof(struct char_node));
join_node->base.type = NODE_TYPE_NODE;
join_node->left = int_node1;
join_node->right = int_node2;
int new_weight = node1->pq_node.weight;
if (new_weight + node2->pq_node.weight <= 65535)
new_weight += node2->pq_node.weight;
else
new_weight = 65535;
node1->pq_node.weight = new_weight;
if (node1->height > node2->height)
++node1->height;
else
node1->height = node2->height + 1;
free(node2);
node1->node = (struct char_node_base*) join_node;
priority_queue_push(queue, (struct priority_queue_node*) node1);
}
return (struct char_node_root*) priority_queue_pop(queue);
}
void fill_priority_queue(
const unsigned char* buffer,
std::size_t buffer_size,
queue_t& queue)
{
unsigned char counts_table[256]{};
for (auto ptr = buffer; ptr != buffer + buffer_size; ++ptr)
if (counts_table[*ptr] != 255)
++counts_table[*ptr];
for (unsigned short i = 0; i != 256; ++i)
if (counts_table[i])
queue.push(counts_table[i], nodes_root {0, char_node_ptr(new char_node_terminal(i))});
}
void queue_to_tree(queue_t& queue)
{
while (queue.size() > 1)
{
auto old_root1_node = std::move(queue.top());
const auto old_root1_weight = queue.top_weight();
queue.pop();
auto old_root2_node = std::move(queue.top());
const auto old_root2_weight = queue.top_weight();
queue.pop();
auto joined_node = std::unique_ptr<char_node>(new char_node);
joined_node->m_left = std::move(old_root1_node.m_node);
joined_node->m_right = std::move(old_root2_node.m_node);
const auto new_weight = std::min(old_root1_weight + old_root2_weight, 65535U);
const auto new_height = std::max(old_root1_node.m_height, old_root2_node.m_height) + 1;
queue.push(new_weight, nodes_root {new_height, std::move(joined_node)});
}
}
struct bits_line
{
unsigned char bits_count;
unsigned char* bits;
};
static int build_encoding_map_node(struct char_node_base* node, struct bits_line* bits_table, unsigned char* bits_pattern, int bits_count)
{
if (node->type == NODE_TYPE_TERM)
{
unsigned char index = (unsigned char)((struct char_node_terminal*)node)->c;
bits_table[index].bits_count = bits_count;
bits_table[index].bits = (unsigned char*) malloc(bytes_count_from_bits(bits_count + 1));
if (! bits_table[index].bits)
return 0;
memcpy(bits_table[index].bits, bits_pattern, bytes_count_from_bits(bits_count));
return 1;
}
static const unsigned char bit_mask[] = {1, 2, 4, 8, 16, 32, 64, 128};
bits_pattern[bits_count >> 3] &= ~bit_mask[bits_count & 7];
if (! build_encoding_map_node(((struct char_node*)node)->left, bits_table, bits_pattern, bits_count + 1))
return 0;
bits_pattern[bits_count >> 3] |= bit_mask[bits_count & 7];
if (! build_encoding_map_node(((struct char_node*)node)->right, bits_table, bits_pattern, bits_count + 1))
return 0;
return 1;
}
using unique_bytes_ptr = std::unique_ptr<unsigned char[]>;
class bit_ostream
{
std::size_t m_capacity;
unsigned long m_bits_count = 0;
unique_bytes_ptr m_data;
public:
explicit bit_ostream(std::size_t initial_capacity = 0) noexcept
: m_capacity(initial_capacity)
{
}
bit_ostream& push(const unsigned char* bits, unsigned long const bits_count)
{
if (bits_count == 0)
return *this;
const auto new_bits_count = m_bits_count + bits_count;
if (covered_bytes(new_bits_count) + 1 > m_capacity || m_bits_count == 0)
{
decltype(m_capacity) new_capacity = m_capacity * 2;
const auto cov_bytes = static_cast<decltype(m_capacity)>(covered_bytes(new_bits_count) + 1);
if (new_capacity < cov_bytes)
new_capacity = cov_bytes;
unique_bytes_ptr new_data(new unsigned char[new_capacity]);
std::memcpy(new_data.get(), m_data.get(), covered_bytes(m_bits_count));
m_capacity = new_capacity;
m_data = std::move(new_data);
}
unsigned char* curr = m_data.get() + (m_bits_count >> 3);
if ((m_bits_count & 7) == 0)
{
// All it's simple when current output data size is integer number of bytes
std::memcpy(curr, bits, covered_bytes(bits_count));
}
else
{
const unsigned char shift = m_bits_count & 7;
for (auto bytes_count = covered_bytes(bits_count); bytes_count > 0; ++curr, ++bits, --bytes_count)
{
unsigned short val = static_cast<unsigned short>(*bits) << shift;
val |= static_cast<unsigned short>(*curr & g_bits_fill_mask[shift]);
*curr = static_cast<unsigned char>(val & 0xff);
*(curr + 1) = static_cast<unsigned char>(val >> 8);
}
}
m_bits_count += bits_count;
assert(covered_bytes(m_bits_count) <= m_capacity);
return *this;
}
bit_ostream& push(const bit_ostream& other)
{
return push(other.data(), other.bits_count());
}
bit_ostream& clear_tail() noexcept
{
if (m_bits_count & 7)
m_data.get()[m_bits_count >> 3] &= g_bits_fill_mask[m_bits_count & 7];
return *this;
}
unsigned long bits_count() const noexcept { return m_bits_count; }
bool empty() const noexcept { return ! m_bits_count; }
unsigned char* data() noexcept { return m_data.get(); }
const unsigned char* data() const noexcept { return m_data.get(); }
};
template <class T>
constexpr inline std::size_t covered_bytes(T bits_count) noexcept
{
return (bits_count >> 3) + (bits_count & 7 ? 1 : 0);
}
> build-c/pack-c -m3 ../sample-1.dat data-c.dat
File packing is done. Read 31962362 bytes, written 32031809 bytes. Total ticks = 1053432 (0.754 seconds), t1 = 209957, t2 = 31023, t3 = 811377.
> build-cpp/pack-cpp -m3 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 32031809 bytes. Total ticks = 1182005 (0.846 seconds), t1 = 228527, t2 = 52680, t3 = 894081
> build-c/pack-c -m3 -b1024 ../sample-1.dat data-c.dat
File packing is done. Read 31962362 bytes, written 31160081 bytes. Total ticks = 9397894 (6.731 seconds), t1 = 5320910, t2 = 1943422, t3 = 2094688.
> build-cpp/pack-cpp -m3 -b1024 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 31160081 bytes. Total ticks = 11586220 (8.3 seconds), t1 = 6399593, t2 = 3125111, t3 = 1663035
class small_bit_ostream
{
unique_bytes_ptr m_data;
unsigned short m_bits_count = 0;
public:
small_bit_ostream& push(const unsigned char* bits, const unsigned short bits_count)
{
const auto cov_bytes {covered_bytes(bits_count)};
m_data.reset(new unsigned char[cov_bytes]);
std::memcpy(m_data.get(), bits, cov_bytes);
m_bits_count = bits_count;
return *this;
}
unsigned long bits_count() const noexcept { return m_bits_count; }
bool empty() const noexcept { return ! m_bits_count; }
unsigned char* data() noexcept { return m_data.get(); }
const unsigned char* data() const noexcept { return m_data.get(); }
};
> build-cpp/pack-cpp -m3 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 32031809 bytes. Total ticks = 1173692 (0.84 seconds), t1 = 229942, t2 = 46677, t3 = 890323
> build-cpp/pack-cpp -m3 -b1024 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 31160081 bytes. Total ticks = 11198578 (8.02 seconds), t1 = 6404650, t2 = 2752852, t3 = 1641317
==2794== I refs: 2,313,382,347
==2794== I1 misses: 14,482
==2794== LLi misses: 1,492
==2794== I1 miss rate: 0.00%
==2794== LLi miss rate: 0.00%
==2794==
==2794== D refs: 601,604,444 (472,330,278 rd + 129,274,166 wr)
==2794== D1 misses: 3,966,884 ( 2,279,553 rd + 1,687,331 wr)
==2794== LLd misses: 7,030 ( 3,034 rd + 3,996 wr)
==2794== D1 miss rate: 0.7% ( 0.5% + 1.3% )
==2794== LLd miss rate: 0.0% ( 0.0% + 0.0% )
==2794==
==2794== LL refs: 3,981,366 ( 2,294,035 rd + 1,687,331 wr)
==2794== LL misses: 8,522 ( 4,526 rd + 3,996 wr)
==2794== LL miss rate: 0.0% ( 0.0% + 0.0% )
==2794==
==2794== Branches: 299,244,261 (298,085,895 cond + 1,158,366 ind)
==2794== Mispredicts: 8,779,093 ( 8,778,920 cond + 173 ind)
==2794== Mispred rate: 2.9% ( 2.9% + 0.0% )
==2994== I refs: 2,464,681,889
==2994== I1 misses: 2,032
==2994== LLi misses: 1,888
==2994== I1 miss rate: 0.00%
==2994== LLi miss rate: 0.00%
==2994==
==2994== D refs: 633,267,329 (491,590,332 rd + 141,676,997 wr)
==2994== D1 misses: 3,992,071 ( 2,298,593 rd + 1,693,478 wr)
==2994== LLd misses: 8,292 ( 3,173 rd + 5,119 wr)
==2994== D1 miss rate: 0.6% ( 0.5% + 1.2% )
==2994== LLd miss rate: 0.0% ( 0.0% + 0.0% )
==2994==
==2994== LL refs: 3,994,103 ( 2,300,625 rd + 1,693,478 wr)
==2994== LL misses: 10,180 ( 5,061 rd + 5,119 wr)
==2994== LL miss rate: 0.0% ( 0.0% + 0.0% )
==2994==
==2994== Branches: 348,146,710 (346,241,481 cond + 1,905,229 ind)
==2994== Mispredicts: 6,977,260 ( 6,792,066 cond + 185,194 ind)
==2994== Mispred rate: 2.0% ( 2.0% + 9.7% )
struct free_deleter
{
void operator()(void* p) const noexcept { std::free(p); }
};
template <class T> inline T* allocate_with_malloc(std::size_t size)
{
T* res = static_cast<T*>(std::malloc(sizeof(T) * size));
if (! res)
throw std::bad_alloc();
return res;
}
template <class T>
using unique_malloc_array_ptr = std::unique_ptr<T[], free_deleter>;
template <class T>
inline unique_malloc_array_ptr<T> unique_allocate_with_malloc(std::size_t size)
{
return unique_malloc_array_ptr<T>(allocate_with_malloc<T>(size));
}
// Typedefs for byte arrays
using unique_bytes_ptr = unique_malloc_array_ptr<std::uint8_t>;
inline unique_bytes_ptr allocate_bytes(std::size_t size)
{
return unique_bytes_ptr(unique_allocate_with_malloc<std::uint8_t>(size));
}
> build-cpp/pack-cpp -m3 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 32031809 bytes. Total ticks = 1042665 (0.746 seconds), t1 = 250480, t2 = 45393, t3 = 740163
> build-cpp/pack-cpp -m3 -b1024 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 31160081 bytes. Total ticks = 11068384 (7.93 seconds), t1 = 6488100, t2 = 2694562, t3 = 1501027
> build-cpp/pack-cpp -m3 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 32031809 bytes. Total ticks = 1008990 (0.722 seconds), t1 = 221001, t2 = 44870, t3 = 736557
> build-cpp/pack-cpp -m3 -b1024 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 31160081 bytes. Total ticks = 10683068 (7.65 seconds), t1 = 6101534, t2 = 2689178, t3 = 1505929
> build-c/pack-c -m3 ../sample-1.dat data-c.dat
File packing is done. Read 31962362 bytes, written 32031809 bytes. Total ticks = 1139373 (0.816 seconds), t1 = 206305, t2 = 29559, t3 = 902493.
> build-cpp/pack-cpp -m3 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 32031809 bytes. Total ticks = 1417576 (1.01 seconds), t1 = 223441, t2 = 53057, t3 = 1134400
> build-cpp/pack-cpp -m3 ../sample-1.dat data-cpp.dat
File packing is done. Read 31962362 bytes, written 32031809 bytes. Total ticks = 1174028 (0.84 seconds), t1 = 215059, t2 = 59738, t3 = 892821
Source: https://habr.com/ru/post/347688/
All Articles