__kernel void sum(__global const float *a, __global const float *b, __global float *c) { int gid = get_global_id(0); c[gid] = a[gid] + b[gid]; }
import pyopencl as cl import numpy import numpy.linalg as la a = numpy.random.rand(50000).astype(numpy.float32) b = numpy.random.rand(50000).astype(numpy.float32) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) mf = cl.mem_flags a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a) b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b) dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes) prg = cl.Program(ctx, """ __kernel void sum(__global const float *a, __global const float *b, __global float *c) { int gid = get_global_id(0); c[gid] = a[gid] + b[gid]; } """).build() prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf) a_plus_b = numpy.empty_like(a) cl.enqueue_copy(queue, a_plus_b, dest_buf) print la.norm(a_plus_b - (a+b))
#define K1 0x5A827999 #define K2 0x6ED9EBA1 #define K3 0x8F1BBCDC #define K4 0xCA62C1D6 #define a0 0x67452301; #define b0 0xEFCDAB89; #define c0 0x98BADCFE; #define d0 0x10325476; #define e0 0xC3D2E1F0; #define f1(x,y,z) ( z ^ ( x & ( y ^ z ) ) ) /* Rounds 0-19 */ #define f2(x,y,z) ( x ^ y ^ z ) /* Rounds 20-39 */ #define f3(x,y,z) ( ( x & y ) | ( z & ( x | y ) ) ) /* Rounds 40-59 */ #define f4(x,y,z) ( x ^ y ^ z ) /* Rounds 60-79 */ #define ROTL(n,X) ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) ) #define expand(W,i) ( W[ i & 15 ] = ROTL( 1, ( W[ i & 15 ] ^ W[ (i - 14) & 15 ] ^ \ W[ (i - 8) & 15 ] ^ W[ (i - 3) & 15 ] ) ) ) #define subRound(a, b, c, d, e, f, k, data) \ ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) ) #define REVERSE(value) value = ((value & 0xFF000000) >> 24) | ((value & 0x00FF0000) >> 8) | ((value & 0x0000FF00) << 8) | ((value & 0x000000FF) << 24) long sha1(uint *eData, const int length) { unsigned int A = a0; unsigned int B = b0; unsigned int C = c0; unsigned int D = d0; unsigned int E = e0; ((__local char *)eData)[length] = 0x80; for (int i = 0; i <= length / 4; i++) { REVERSE(eData[i]); } eData[14] = 0; eData[15] = length * 8; subRound( A, B, C, D, E, f1, K1, eData[ 0 ] ); subRound( E, A, B, C, D, f1, K1, eData[ 1 ] ); subRound( D, E, A, B, C, f1, K1, eData[ 2 ] ); subRound( C, D, E, A, B, f1, K1, eData[ 3 ] ); subRound( B, C, D, E, A, f1, K1, eData[ 4 ] ); subRound( A, B, C, D, E, f1, K1, eData[ 5 ] ); subRound( E, A, B, C, D, f1, K1, eData[ 6 ] ); subRound( D, E, A, B, C, f1, K1, eData[ 7 ] ); subRound( C, D, E, A, B, f1, K1, eData[ 8 ] ); subRound( B, C, D, E, A, f1, K1, eData[ 9 ] ); subRound( A, B, C, D, E, f1, K1, eData[ 10 ] ); subRound( E, A, B, C, D, f1, K1, eData[ 11 ] ); subRound( D, E, A, B, C, f1, K1, eData[ 12 ] ); subRound( C, D, E, A, B, f1, K1, eData[ 13 ] ); subRound( B, C, D, E, A, f1, K1, eData[ 14 ] ); subRound( A, B, C, D, E, f1, K1, eData[ 15 ] ); subRound( E, A, B, C, D, f1, K1, expand( eData, 16 ) ); subRound( D, E, A, B, C, f1, K1, expand( eData, 17 ) ); subRound( C, D, E, A, B, f1, K1, expand( eData, 18 ) ); subRound( B, C, D, E, A, f1, K1, expand( eData, 19 ) ); subRound( A, B, C, D, E, f2, K2, expand( eData, 20 ) ); subRound( E, A, B, C, D, f2, K2, expand( eData, 21 ) ); subRound( D, E, A, B, C, f2, K2, expand( eData, 22 ) ); subRound( C, D, E, A, B, f2, K2, expand( eData, 23 ) ); subRound( B, C, D, E, A, f2, K2, expand( eData, 24 ) ); subRound( A, B, C, D, E, f2, K2, expand( eData, 25 ) ); subRound( E, A, B, C, D, f2, K2, expand( eData, 26 ) ); subRound( D, E, A, B, C, f2, K2, expand( eData, 27 ) ); subRound( C, D, E, A, B, f2, K2, expand( eData, 28 ) ); subRound( B, C, D, E, A, f2, K2, expand( eData, 29 ) ); subRound( A, B, C, D, E, f2, K2, expand( eData, 30 ) ); subRound( E, A, B, C, D, f2, K2, expand( eData, 31 ) ); subRound( D, E, A, B, C, f2, K2, expand( eData, 32 ) ); subRound( C, D, E, A, B, f2, K2, expand( eData, 33 ) ); subRound( B, C, D, E, A, f2, K2, expand( eData, 34 ) ); subRound( A, B, C, D, E, f2, K2, expand( eData, 35 ) ); subRound( E, A, B, C, D, f2, K2, expand( eData, 36 ) ); subRound( D, E, A, B, C, f2, K2, expand( eData, 37 ) ); subRound( C, D, E, A, B, f2, K2, expand( eData, 38 ) ); subRound( B, C, D, E, A, f2, K2, expand( eData, 39 ) ); subRound( A, B, C, D, E, f3, K3, expand( eData, 40 ) ); subRound( E, A, B, C, D, f3, K3, expand( eData, 41 ) ); subRound( D, E, A, B, C, f3, K3, expand( eData, 42 ) ); subRound( C, D, E, A, B, f3, K3, expand( eData, 43 ) ); subRound( B, C, D, E, A, f3, K3, expand( eData, 44 ) ); subRound( A, B, C, D, E, f3, K3, expand( eData, 45 ) ); subRound( E, A, B, C, D, f3, K3, expand( eData, 46 ) ); subRound( D, E, A, B, C, f3, K3, expand( eData, 47 ) ); subRound( C, D, E, A, B, f3, K3, expand( eData, 48 ) ); subRound( B, C, D, E, A, f3, K3, expand( eData, 49 ) ); subRound( A, B, C, D, E, f3, K3, expand( eData, 50 ) ); subRound( E, A, B, C, D, f3, K3, expand( eData, 51 ) ); subRound( D, E, A, B, C, f3, K3, expand( eData, 52 ) ); subRound( C, D, E, A, B, f3, K3, expand( eData, 53 ) ); subRound( B, C, D, E, A, f3, K3, expand( eData, 54 ) ); subRound( A, B, C, D, E, f3, K3, expand( eData, 55 ) ); subRound( E, A, B, C, D, f3, K3, expand( eData, 56 ) ); subRound( D, E, A, B, C, f3, K3, expand( eData, 57 ) ); subRound( C, D, E, A, B, f3, K3, expand( eData, 58 ) ); subRound( B, C, D, E, A, f3, K3, expand( eData, 59 ) ); subRound( A, B, C, D, E, f4, K4, expand( eData, 60 ) ); subRound( E, A, B, C, D, f4, K4, expand( eData, 61 ) ); subRound( D, E, A, B, C, f4, K4, expand( eData, 62 ) ); subRound( C, D, E, A, B, f4, K4, expand( eData, 63 ) ); subRound( B, C, D, E, A, f4, K4, expand( eData, 64 ) ); subRound( A, B, C, D, E, f4, K4, expand( eData, 65 ) ); subRound( E, A, B, C, D, f4, K4, expand( eData, 66 ) ); subRound( D, E, A, B, C, f4, K4, expand( eData, 67 ) ); subRound( C, D, E, A, B, f4, K4, expand( eData, 68 ) ); subRound( B, C, D, E, A, f4, K4, expand( eData, 69 ) ); subRound( A, B, C, D, E, f4, K4, expand( eData, 70 ) ); subRound( E, A, B, C, D, f4, K4, expand( eData, 71 ) ); subRound( D, E, A, B, C, f4, K4, expand( eData, 72 ) ); subRound( C, D, E, A, B, f4, K4, expand( eData, 73 ) ); subRound( B, C, D, E, A, f4, K4, expand( eData, 74 ) ); subRound( A, B, C, D, E, f4, K4, expand( eData, 75 ) ); subRound( E, A, B, C, D, f4, K4, expand( eData, 76 ) ); subRound( D, E, A, B, C, f4, K4, expand( eData, 77 ) ); subRound( C, D, E, A, B, f4, K4, expand( eData, 78 ) ); subRound( B, C, D, E, A, f4, K4, expand( eData, 79 ) ); A += a0; B += b0; C += c0; D += d0; E += e0; return as_ulong((uint2)(D, E)); }
__kernel void do_brute( __global const long *table, const ulong start_num, __global ulong *result, __global uint *res_ind) { char s[64]; uint *s_l = (__local uint *)s; int i, j; ulong _n, n; ulong start = start_num + get_global_id(0) * HASHES_PER_WORKITEM; for (j = 0; j < HASHES_PER_WORKITEM; j++) { n = _n = j + start; for (i = 15; i >= 0; i--) { s_l[i] = 0; } for (i = COMB_LEN - 1; i >= 0; i--) { s[i] = charset[n % CHARS_CNT]; n /= CHARS_CNT; } if (check_in_table(table, sha1(s_l, COMB_LEN))) { result[atomic_inc(res_ind)] = _n; } } }
ulong start = start_num + get_global_id(0) * HASHES_PER_WORKITEM;
calculates the number for a specific thread (get_global_id (0) - a standard function that returns a thread index from 0 to the current global work size). bool check_in_table( __global const long *table, const long value) { uint index = calc_index(value); uint step = calc_step(value); for (uint a = 1; ; a++) { index %= TABLE_SIZE; if (table[index] == 0) { return false; } if (table[index] == value) { return true; } index += a * step + 1; } }
Source: https://habr.com/ru/post/150289/
All Articles