void ext_sort(const std::string filename, const size_t memory)
const size_t type_size = sizeof(T); const uint64_t filesize = file_size(filename); std::fstream data(filename, std::ios::in | std::ios::out | std::ios::binary); const uint64_t chunk_number = filesize / memory; const size_t chunk_size = memory / type_size - (memory / type_size) % 2, chunk_byte_size = chunk_size * type_size, half_chunk_byte_size = chunk_byte_size / 2, half_chunk_size = chunk_size / 2; std::vector<T> *chunk = new std::vector<T>(chunk_size);
for (uint64_t i = 0; i < chunk_number; ++i) { data.seekg(chunk_byte_size * i); data.read((char *) chunk->data(), chunk_byte_size); flat_quick_sort(chunk->begin(), chunk->end()); data.seekp(chunk_byte_size * i); data.write((char *) chunk->data(), chunk_byte_size); }
int64_t s = chunk_number, start = 0; while (s > 0) { data.seekp(half_chunk_byte_size * start); data.read((char *) chunk->data(), half_chunk_byte_size); for (int64_t q = 1; q < s; ++q) { data.seekg(half_chunk_byte_size * start + chunk_byte_size * q); data.read((char *) (chunk->data() + half_chunk_size), half_chunk_byte_size); in_place_merge(chunk->begin(), chunk->begin() + half_chunk_size - 1, chunk->begin() + chunk_size); data.seekp(half_chunk_byte_size * start + chunk_byte_size * q); data.write((char *) (chunk->data() + half_chunk_size), half_chunk_byte_size); } data.seekp(half_chunk_byte_size * start); data.write((char *) chunk->data(), half_chunk_byte_size);
data.seekp(half_chunk_byte_size * start + chunk_byte_size * s - half_chunk_byte_size); data.read((char *) (chunk->data() + half_chunk_size), half_chunk_byte_size); for (int64_t q = s - 2; q >= 0; --q) { data.seekg(half_chunk_byte_size * (start + 1) + chunk_byte_size * q); data.read((char *) chunk->data(), half_chunk_byte_size); in_place_merge(chunk->begin(), chunk->begin() + half_chunk_size - 1, chunk->begin() + chunk_size); data.seekp(half_chunk_byte_size * (start + 1) + chunk_byte_size * q); data.write((char *) chunk->data(), half_chunk_byte_size); } data.seekg(half_chunk_byte_size * start + chunk_byte_size * s - half_chunk_byte_size); data.write((char *) (chunk->data() + half_chunk_size), half_chunk_byte_size);
--s; ++start; for (int64_t p = 0; p < s; ++p) { data.seekp(half_chunk_byte_size * start + chunk_byte_size * p); data.read((char *) chunk->data(), chunk_byte_size); in_place_merge(chunk->begin(), chunk->begin() + half_chunk_size - 1, chunk->begin() + chunk_size); data.seekg(half_chunk_byte_size * start + chunk_byte_size * p); data.write((char *) chunk->data(), chunk_byte_size); } } delete chunk; }
template<typename T> void ext_sort(const std::string filename, const size_t memory) { const size_t type_size = sizeof(T); const uint64_t filesize = file_size(filename); std::fstream data(filename, std::ios::in | std::ios::out | std::ios::binary); const uint64_t chunk_number = filesize / memory; const size_t chunk_size = memory / type_size - (memory / type_size) % 2, chunk_byte_size = chunk_size * type_size, half_chunk_byte_size = chunk_byte_size / 2, half_chunk_size = chunk_size / 2; std::vector<T> *chunk = new std::vector<T>(chunk_size); for (uint64_t i = 0; i < chunk_number; ++i) { data.seekg(chunk_byte_size * i); data.read((char *) chunk->data(), chunk_byte_size); flat_quick_sort(chunk->begin(), chunk->end()); data.seekp(chunk_byte_size * i); data.write((char *) chunk->data(), chunk_byte_size); } int64_t s = chunk_number, start = 0; while (s > 0) { data.seekp(half_chunk_byte_size * start); data.read((char *) chunk->data(), half_chunk_byte_size); for (int64_t q = 1; q < s; ++q) { data.seekg(half_chunk_byte_size * start + chunk_byte_size * q); data.read((char *) (chunk->data() + half_chunk_size), half_chunk_byte_size); in_place_merge(chunk->begin(), chunk->begin() + half_chunk_size - 1, chunk->begin() + chunk_size); data.seekp(half_chunk_byte_size * start + chunk_byte_size * q); data.write((char *) (chunk->data() + half_chunk_size), half_chunk_byte_size); } data.seekp(half_chunk_byte_size * start); data.write((char *) chunk->data(), half_chunk_byte_size); data.seekp(half_chunk_byte_size * start + chunk_byte_size * s - half_chunk_byte_size); data.read((char *) (chunk->data() + half_chunk_size), half_chunk_byte_size); for (int64_t q = s - 2; q >= 0; --q) { data.seekg(half_chunk_byte_size * (start + 1) + chunk_byte_size * q); data.read((char *) chunk->data(), half_chunk_byte_size); in_place_merge(chunk->begin(), chunk->begin() + half_chunk_size - 1, chunk->begin() + chunk_size); data.seekp(half_chunk_byte_size * (start + 1) + chunk_byte_size * q); data.write((char *) chunk->data(), half_chunk_byte_size); } data.seekg(half_chunk_byte_size * start + chunk_byte_size * s - half_chunk_byte_size); data.write((char *) (chunk->data() + half_chunk_size), half_chunk_byte_size); --s; ++start; for (int64_t p = 0; p < s; ++p) { data.seekp(half_chunk_byte_size * start + chunk_byte_size * p); data.read((char *) chunk->data(), chunk_byte_size); in_place_merge(chunk->begin(), chunk->begin() + half_chunk_size - 1, chunk->begin() + chunk_size); data.seekg(half_chunk_byte_size * start + chunk_byte_size * p); data.write((char *) chunk->data(), chunk_byte_size); } } delete chunk; }
#ifndef EXTERNAL_SORT_FLAT_QUICKSORT_H #define EXTERNAL_SORT_FLAT_QUICKSORT_H template<class ForwIt> void insertion_sort(ForwIt be, ForwIt en) { for (ForwIt ii = be + 1; ii != en; ii++) { ForwIt jj = ii - 1; auto val = *ii; while (jj >= be and *jj > val) { *(jj + 1) = *jj; --jj; } *(jj + 1) = val; } } template<class ForwIt> ForwIt median_of_3(ForwIt it1, ForwIt it2, ForwIt it3) { return (*it1 > *it2) ? (*it3 > *it2) ? (*it1 > *it3) ? it3 : it1 : it2 : (*it3 > *it1) ? (*it2 > *it3) ? it3 : it2 : it1; } template<class ForwIt> ForwIt choose_pivot(ForwIt be, ForwIt en) { ptrdiff_t s = (en - be) / 8; ForwIt mid = be + (en - be) / 2; ForwIt best1 = median_of_3(be, be + s, be + 2 * s), best2 = median_of_3(mid - s, mid, mid + s), best3 = median_of_3( en - 2 * s, en - s, en); return median_of_3(best1, best2, best3); } // search for the end of the current block template<class ForwIt> ForwIt block_range(ForwIt be, ForwIt en) { ForwIt it = be; while (it != en) { if (*be < *it) return it; ++it; } return it; } // warning: after the partition outer pivot may point to random element template<class ForwIt> std::pair<ForwIt, ForwIt> partition_range(ForwIt be, ForwIt en, ForwIt pivot) { std::pair<ForwIt, ForwIt> re; ForwIt j = be; for (ForwIt i = be; i != en; ++i) if (*i < *pivot) { if (pivot == i) pivot = j; else if (pivot == j) pivot = i; std::swap(*j, *i); ++j; } re.first = j; for (ForwIt i = j; i != en; ++i) if (*pivot == *i) { if (pivot == i) pivot = j; else if (pivot == j) pivot = i; std::swap(*j, *i); ++j; } re.second = j; return re; } // makes largest element the first template<class ForwIt> void blockify(ForwIt be, ForwIt en) { for (ForwIt it = be; it != en; ++it) if (*be < *it) std::swap(*be, *it); } template<class ForwIt> void flat_quick_sort(ForwIt be, ForwIt en) { ForwIt tmp = en; // the current end of block while (be != en) { if (std::is_sorted(be, tmp)) { be = tmp; tmp = block_range(be, en); continue; } if (tmp - be < 32) insertion_sort(be, tmp); else { ForwIt pivot = choose_pivot(be, tmp); std::pair<ForwIt, ForwIt> range = partition_range(be, tmp, pivot); blockify(range.second, tmp); tmp = range.first; } } } #endif //EXTERNAL_SORT_FLAT_QUICKSORT_H
template<typename T> void merge(std::vector<T> &chunk, size_t s, size_t q, size_t r) { std::vector<T> *chunk2 = new std::vector<T>(q - s + 1); auto it2 = chunk2->begin(), it1 = chunk.begin() + q + 1, it = chunk.begin() + s; std::copy(it, it1, it2); while (it2 != chunk2->end() && it1 != chunk.begin() + r + 1) { if (*it1 > *it2) { *it = *it2; ++it2; } else { *it = *it1; ++it1; } ++it; } if (it1 == chunk.begin() + r + 1) std::copy(it2, chunk2->end(), it); else std::copy(it1, chunk.begin() + r + 1, it); delete chunk2; }
template<typename Iter> void merge_B_and_Y(Iter z, Iter y, Iter yn) { for (; z < y && y < yn; ++z) { Iter j = std::min_element(z, y); if (*j <= *y) std::swap(*z, *j); else { std::swap(*z, *y); ++y; } } if (z < y) flat_quick_sort(z, yn); } template<typename Iter> Iter find_next_X_block(Iter x0, Iter z, Iter y, size_t k, size_t f, Iter b1, Iter b2, auto max) { auto min1 = max, min2 = max; Iter m = x0 + (ptrdiff_t) floor((z - x0 - f) / k) * k + f, x = x0; if (m <= z) m += k; for (auto i = m; i + k <= y; i += k) { if (i != b1 && i != b2) { Iter j = (i < b1 && b1 < i + k) ? m - 1 : i + k - 1; if (*i <= min1 && *j <= min2) { x = i; min1 = *i; min2 = *j; } } } return x; } template<typename Iter> void in_place_merge(Iter x0, Iter y0, Iter yn, int64_t k, bool rec) { if (k == -1) k = (int64_t) sqrt(yn - x0); size_t f = (y0 - x0) % k; Iter x = (f == 0) ? y0 - 2 * k : y0 - k - f; auto t = *x, max = *std::max_element(x0, yn); *x = *x0; Iter z = x0, y = y0, b1 = x + 1, b2 = y0 - k; int i = 0; while (y - z > 2 * k) { ++i; if (*x <= *y || y >= yn) { *z = *x; *x = *b1; ++x; if ((x - x0) % k == f) if (z < x - k) b2 = x - k; x = find_next_X_block(x0, z, y, k, f, b1, b2, max); } else { *z = *y; *y = *b1; ++y; if ((y - y0) % k == 0) b2 = y - k; } ++z; *b1 = *z; if (z == x) x = b1; if (z == b2) b2 = yn + 1; ++b1; if ((b1 - x0) % k == f) b1 = (b2 == yn + 1) ? b1 - k : b2; } *z = t; if (rec) merge_B_and_Y(z, y, yn); else { flat_quick_sort(z, y); in_place_merge(z,y,yn,(int64_t)sqrt(k),true); } }
Algorithm | Time (75MB int64 in 7.5MB of memory) | Speed (75MB int64 in 7.5MB of memory) | Time (7.5MB int64 to 75KB of memory) | Speed (7.5MB int64 in 75KB of memory) | Time (750MB int64 to 75MB of memory) | Speed (750MB int64 in 75MB memory) |
---|---|---|---|---|---|---|
In-place merge | 6.04329 s | 1,241,045 b / s | 24.2993 s | 3,086,508 b / s | - | - |
Merge | 0.932663 s | 8,041,489 b / s | 2.73895 s | 27,382,756 B / s | 47.7946 s | 15 691 689 b / s |
Algorithm SLY_G | 1.79629 s | 4175272 b / s | 3.84775 s | 19,491,910 B / s | 39.77 s | 18,858,436 B / s |
Source: https://habr.com/ru/post/268535/
All Articles