#pragma pack(push,1)
, the fields cannot cross the size limits of their basic types, in our case unsigned - 32 bits): /* Fields */ enum T_field_enum { amount_of_money_e, gender_e, age_e, code_e, height_e, last_e }; struct T_cash_account_row { // 1 – double word (32 bits) unsigned code:20; // 0 - 1000000 unsigned gender:1; // 0 - 1 unsigned age:7; // 0 - 100 // 2 – double word (32 bits) unsigned amount_of_money:20; // 0 - 1000000 unsigned height:9; // 0 – 300 };
const size_t c_array_size = 10000000; struct T_cash_account_row *const array_ptr = ( struct T_cash_account_row *)calloc(c_array_size, sizeof(struct T_cash_account_row)); if (array_ptr == NULL) { printf ("calloc error\n"); exit(1); }
/* Generate random data for the one row */ static inline struct T_cash_account_row generate_row() { struct T_cash_account_row cash_account_row; cash_account_row.age = rand() % 100; cash_account_row.amount_of_money = (rand() % 1000)*(rand() % 1000); cash_account_row.code = (rand() % 1000)*(rand() % 1000); cash_account_row.gender = rand() % 2; cash_account_row.height = rand() % 300; return cash_account_row; } /* ----------------------------------------------------------------------- */ /* in int main() { … } */ /* Fill table random data */ for(i = 0; i < c_array_size; ++i) array_ptr[i] = generate_row();
last_e
is an enumeration with the value of the number of fields in a string (C ++ 03 7.2 Enumeration declarations) : /* Filters */ struct T_range_filters { struct T_cash_account_row begin, end; /* bytes array or bitset from https://gist.github.com/jmbr/667605 */ unsigned char use_filter[last_e]; }; /* ----------------------------------------------------------------------- */
use_filter[]
used to specify whether to filter by this condition-field or not. /* Compare row with filters */ static inline unsigned char test_predicate(struct T_cash_account_row const*const row, struct T_range_filters const*const range_filters) { return (!range_filters->use_filter[amount_of_money_e] || (row->amount_of_money >= range_filters->begin.amount_of_money && row->amount_of_money <= range_filters->end.amount_of_money)) && (!range_filters->use_filter[gender_e] || (row->gender >= range_filters->begin.gender && row->gender <= range_filters->end.gender)) && (!range_filters->use_filter[age_e] || (row->age >= range_filters->begin.age && row->age <= range_filters->end.age)) && (!range_filters->use_filter[code_e] || (row->code >= range_filters->begin.code && row->code <= range_filters->end.code)) && (!range_filters->use_filter[height_e] || (row->height >= range_filters->begin.height && row->height <= range_filters->end.height)); } /* ----------------------------------------------------------------------- */ /* search */ static inline size_t search(struct T_cash_account_row const*const array_ptr, const size_t c_array_size, struct T_cash_account_row *const result_ptr, struct T_range_filters const*const range_filters) { size_t result_size = 0; size_t i; /* loop index */ for(i = 0; i < c_array_size; ++i) { if(test_predicate(array_ptr + i, range_filters)) result_ptr[result_size] = array_ptr[i], ++result_size; } return result_size; }
/* srand (time(NULL)); */
to compare the results of different program launches. But you can uncomment it and make sure that the absolute execution time varies, but the relative acceleration from my optimizations remains about the same.search_12()
function: /* Compare row with filters */ static inline unsigned char test_predicate_12(struct T_cash_account_row const*const __restrict row, struct T_range_filters const*const __restrict range_filters) { return (row->age >= range_filters->begin.age && row->age <= range_filters->end.age) && (row->code >= range_filters->begin.code && row->code <= range_filters->end.code); } /* ----------------------------------------------------------------------- */ /* search */ static inline size_t search_12(struct T_cash_account_row const*const __restrict array_ptr, const size_t c_array_size, struct T_cash_account_row *const __restrict result_ptr, struct T_range_filters const*const __restrict range_filters) { size_t result_size = 0; size_t i; /* loop index */ for(i = 0; i < c_array_size; ++i) { if(test_predicate_12(array_ptr + i, range_filters)) result_ptr[result_size] = array_ptr[i], ++result_size; } return result_size; }
test_predicate
function: github.com . This happened because after each comparison of use_filter[] == false
was a conditional transition to comparisons in the next field. Those. in addition to these 4-ex comparisons, only 5 more comparisons with use_filter[]
were performed there.char[]
compared via strcmp()
. In C ++, this is solved by creating a custom type with an overloaded comparison operator. (Operators for fundamental types in C ++ cannot be overloaded - one of the operator parameters must be a custom class.)inline
operator is called to call the next instance of the template. In this promotion can participate, as template functions, and template classes. // The templated constructor of unrolling of the class (no return, mandatory call to the constructor, called once) template<unsigned unroll_count, template<unsigned> class T> struct T_unroll_constructor { T_unroll_constructor<unroll_count-1, T> next_unroll; T<unroll_count-1> functor; template<typename T1> inline T_unroll_constructor(T1 & val1) : next_unroll(val1), functor(val1) {} }; // End of unroll template<template<unsigned> class T> struct T_unroll_constructor<0, T> { template<typename T1> inline T_unroll_constructor(T1 &) {} }; // -------------------------------------------------------------------------
unsigned int
value as a template parameter, each bit of which will mean whether or not the corresponding filter is used: // Abstract base class of filters for each search variant (range_filters) struct T_filter { // search virtual size_t search(T_cash_account_row const*const __restrict array_ptr, const size_t c_array_size, T_cash_account_row *const __restrict result_ptr, T_range_filters const*const __restrict range_filters) = 0; }; // ------------------------------------------------------------------------- // The filters for each search variant (range_filters) template<unsigned index_pred> struct T_custom_filter : T_filter { inline unsigned char test_predicate(T_cash_account_row const*const __restrict row, T_range_filters const*const __restrict range_filters) { return (!(index_pred & 1<<amount_of_money_e) || (row->amount_of_money >= range_filters->begin.amount_of_money && row->amount_of_money <= range_filters->end.amount_of_money)) && (!(index_pred & 1<<gender_e) || (row->gender >= range_filters->begin.gender && row->gender <= range_filters->end.gender)) && (!(index_pred & 1<<age_e) || (row->age >= range_filters->begin.age && row->age <= range_filters->end.age)) && (!(index_pred & 1<<code_e) || (row->code >= range_filters->begin.code && row->code <= range_filters->end.code)) && (!(index_pred & 1<<height_e) || (row->height >= range_filters->begin.height && row->height <= range_filters->end.height)); } // ------------------------------------------------------------------------- // search virtual size_t search(T_cash_account_row const*const __restrict array_ptr, const size_t c_array_size, T_cash_account_row *const __restrict result_ptr, T_range_filters const*const __restrict range_filters) final { size_t result_size = 0; size_t i; // loop index for(i = 0; i < c_array_size; ++i) { if(test_predicate(array_ptr + i, range_filters)) result_ptr[result_size] = array_ptr[i], ++result_size; } return result_size; } }; // -------------------------------------------------------------------------
index_pred
and enumeration amount_of_money_e, gender_e …
known at the compilation stage, the compiler will throw out some of the conditions, as always true. In fact, we help the compiler to optimize our program. This is the most important decision in this!template<unsigned index_pred> struct T_custom_filter
into 32 classes. Create 32 objects of each of them and store the pointers of the base type on them into a static array std::array<>
. And at the time of execution we will polymorphically refer to the desired object, depending on the search conditions: class T_optimized_search { // unroll tamplates template<unsigned index_pred> struct T_unroll_find { template<typename T> T_unroll_find(T &filters) { filters[index_pred].reset( new T_custom_filter<index_pred>() ); } }; // ------------------------------------------------------------------------- // Get index of T_test_pred version for current search range inline unsigned get_index_pred(T_range_filters const*const __restrict range_filters) { unsigned result = 0; for(size_t i = 0; i < last_e; ++i) result |= range_filters->use_filter[i]?(1<<i):0; return result; } std::array<std::unique_ptr<T_filter>, 1<<last_e> filters; T_unroll_constructor< 1<<last_e, T_unroll_find> fill_filter; public: T_optimized_search() : fill_filter(filters) {} // C++ optimized search inline size_t search(T_cash_account_row const*const __restrict array_ptr, const size_t c_array_size, T_cash_account_row *const __restrict result_ptr, T_range_filters const*const __restrict range_filters) { auto const& filter = filters[get_index_pred(range_filters)]; return filter->search(array_ptr, c_array_size, result_ptr, range_filters); } }; // -------------------------------------------------------------------------
unsigned get_index_pred((T_range_filters const*const __restrict range_filters)
returns the index number of the required search object for the given search condition range_filters
. T_optimized_search optimized_search; // C++ optimized search result_size = optimized_search.search(array_ptr, c_array_size, result_ptr, &range_filters);
test_predicate
functions on C and optimized in C ++ compiled on MSVC11 (MSVS 2012) with my comments - the difference is clearly visible if you look through TortoiseDiff : diff link on GitHub.comuse_filter[]
filters use_filter[]
known in compile-time were obtained, which allowed the compiler to exclude them during optimization. Those. This optimization is applicable to all similar cases of the removal of calculations or checks from a loop to an out.In the C ++ example, I used the C-style method of passing parameters to a function using the constant pointer*const
, so that in diff between C and C ++, the changes related only to the discussed optimization. However, using interfaces in C ++ - style, the function can also take parameters via the & link, which will exclude the possibility of forgettingconst
after * and this is somewhat shorter. But Google C ++ Style Guide recommends transferring immutable parameters via a constant linkconst&
, and modifiable using a constant pointer*const
. If the code is written in this style, then you completely control the change (or not change) of your variables passed to another function - that is, if you pass by value
void func(int const& a, int *b) {} // function of other developer in Google C++ Style int* a = new int(1); int b = 2; func(*a, b);
then the compiler will generate an error stating that the function wants to change your b parameter. This is especially important when developing through TDD testing, when external calls to tests rigidly specify the interface format, and in this case such a call in external tests would tell the developer of the function that b cannot be changed.
And if we pass by the pointer (or by taking the address):
void func(int const& a, int *b) {} // function of other developer in Google C++ Style int* a = new int(1); int b = 2; func(*a, &b);
it will compile without errors. And even from the function call, it is obvious to us that the functiona
does not change the variable, and the variableb
changes. And in the case of TDD, we say that the developer must takeb
by pointer, and therefore must change it. Anda
he will have to take a constant link or value, and will not be able to change its external value.
But in C, where there are no links, this approach is not possible, since if the function always accepts only by the pointer, then it is impossible to guarantee on the side of the caller that they cannot be modified, and passing on the value of variables of a custom type can have significant overhead costs.
Generated rows: 10,000,000
C ++ - Searching ...
C ++ - optimized search took 0.061000 seconds.
Found rows: 38
C-Searching ...
C-search took 0.089000 seconds.
The C ++ is faster than C: 1.459016 times
Found rows: 38
Generated rows: 10,000,000
C ++ - Searching ...
C ++ - optimized search took 0.056000 seconds.
Found rows: 38
C-Searching ...
C-search took 0.074000 seconds.
The C ++ faster than C: 1.321429 times
Found rows: 38
Source: https://habr.com/ru/post/182356/
All Articles