Math.Sin (double) function for GPU

Foreword

I needed to calculate the arc with increased accuracy on the video card processor in real time.

The author did not set himself the goal of exceeding the standard function System.Math.Sin () (C #) and did not achieve it.

The result of the work and my choice (for those who do not want to read):

Sin_3 (rad)

using System; class Math_d { const double PI025 = Math.PI / 4; /// <summary> 2^17 = 131072 (1 ),   10000 ( ),  2^21 = 22097152 (16 )   +-1 ( ) (  ) </summary> const int length_mem = 22097152; const int length_mem_M1 = length_mem - 1; /// <summary>    sin,    . </summary> static double[] mem_sin; /// <summary>    cos,    . </summary> static double[] mem_cos; /// <summary>  ,   sin,    . </summary> public static void Initialise() { Ini_Mem_Sin(); Ini_Mem_Cos(); } /// <summary>       Cos,   . </summary> /// <param name="rad"></param> public static double Sin_3(double rad) { double rad_025; int i; //    //if (rad < 0) { rad = -rad + Math.PI; } i = (int)(rad / PI025); //   rad_025 = rad - PI025 * i; //     ( ) i = i & 7; //      8 //    switch (i) { case 0: return Sin_Lerp(rad_025); case 1: return Cos_Lerp(PI025 - rad_025); case 2: return Cos_Lerp(rad_025); case 3: return Sin_Lerp(PI025 - rad_025); case 4: return -Sin_Lerp(rad_025); case 5: return -Cos_Lerp(PI025 - rad_025); case 6: return -Cos_Lerp(rad_025); case 7: return -Sin_Lerp(PI025 - rad_025); } return 0; } /// <summary>   sin    </summary> static void Ini_Mem_Sin() { double rad; mem_sin = new double[length_mem]; for (int i = 0; i < length_mem; i++) { rad = (i * PI025) / length_mem_M1; mem_sin[i] = Math.Sin(rad); } } /// <summary>   cos    </summary> static void Ini_Mem_Cos() { double rad; mem_cos = new double[length_mem]; for (int i = 0; i < length_mem; i++) { rad = (i * PI025) / length_mem_M1; mem_cos[i] = Math.Cos(rad); } } /// <summary>      sin  0  pi/4. </summary> /// <param name="rad">     0  pi/4. </param> static double Sin_Lerp(double rad) { int i_0; int i_1; double i_0d; double percent; double a; double b; double s; percent = rad / PI025; i_0d = percent * length_mem_M1; i_0 = (int)i_0d; i_1 = i_0 + 1; a = mem_sin[i_0]; b = mem_sin[i_1]; s = i_0d - i_0; return Lerp(a, b, s); } /// <summary>      cos  0  pi/4. </summary> /// <param name="rad">     0  pi/4. </param> static double Cos_Lerp(double rad) { int i_0; int i_1; double i_0d; double percent; double a; double b; double s; percent = rad / PI025; i_0d = percent * length_mem_M1; i_0 = (int)i_0d; i_1 = i_0 + 1; a = mem_cos[i_0]; b = mem_cos[i_1]; s = i_0d - i_0; return Lerp(a, b, s); } /// <summary>      . (return a + s * (b - a)) </summary> /// <param name="a">  . </param> /// <param name="b">  . </param> /// <param name="s">  . 0 = a, 1 = b, 0.5 =   a  b. </param> public static double Lerp(double a, double b, double s) { return a + s * (b - a); } }

Reasons for posting

There is no standard Sin for double function in the HLSL language (but this is not accurate)
On the Internet there is little information available on this topic.

Considered approaches

Taylor Rows (Wikipedia)
Polynomials ( author of the function: "asvp" )
Linear interpolation of the pre-calculated Math.Sin and Math.Cos results (author: I)

Analyzed parameters

Accuracy with respect to Math.Sin
Speed relative to Math.Sin

In addition to the analysis, we will improve their speed.
')

Taylor Rows

Pros:

Highest accuracy
This function, used to calculate the Sin value, can be used to calculate an infinitely accurate Sin value . The more iterations it undergoes, the more accurate the output is the value (in the hypothesis). In programming practice, it is worth considering rounding errors of calculations depending on the types of parameters used (double, float, decimal, etc.).
Calculates any angle
As an argument, you can enter any value in the function, so there is no need to monitor the input parameters.
Independence
It does not require preliminary calculations (as functions discussed below), and often is the basis on which faster functions are assembled.

Minuses:

Very low speed (4-10%)
It requires a lot of iteration for accuracy to be close to the accuracy of Math.Sin, as a result of which it works 25 times slower than the standard function.
The greater the angle, the lower the accuracy.
The greater the angle entered in the function, the more iterations needed to achieve the same accuracy as Math.Sin.

Original appearance (speed: 4%):

The standard function involves the calculation of factorials, as well as the exponentiation of each iteration.

Modified (speed: 10%):

The calculation of some powers can be reduced in a cycle (a * = aa;), and other factorials can be precomputed and put into an array, while changing the signs (+, -, +, ...) can be not raised to a power and also reducing their calculation using the previous values.

The result is the following code:

Sin (rad, steps)

  // <summary>  ,    Fact </summary> static double[] fact; /// <summary>            . ///   rad,   . ///  ( Math): 4% (fps)  steps = 17 </summary> /// <param name="rad">   .      pi/4. </param> /// <param name="steps">  :  ,   .  pi/4   E-15  8. </param> public static double Sin(double rad, int steps) { double ret; double a; //,     double aa; // *  int i_f; //  int sign; // (  -  +,     = +) ret = 0; sign = -1; aa = rad * rad; a = rad; i_f = 1; //      for (int n = 0; n < steps; n++) { sign *= -1; ret += sign * a / Fact(i_f); a *= aa; i_f += 2; } return ret; } /// <summary>   (n!).  n > fact.Length,  -1. </summary> /// <param name="n"> ,     . </param> public static double Fact(int n) { if (n >= 0 && n < fact.Length) { return fact[n]; } else { Debug.Log("    . n = " + n + ",   = " + fact.Length); return -1; } } /// <summary>  . </summary> static void Init_Fact() { int steps; steps = 46; fact = new double[steps]; fact[0] = 1; for (int n = 1; n < steps; n++) { fact[n] = fact[n - 1] * n; } }

Improved view (speed: 19%):

We know that the smaller the angle, the less iteration is needed. The smallest angle we need is 0.25 * PI, i.e. 45 degrees. Considering Sin and Cos in the area of 45 degrees, we can get all the values from -1 to 1 for Sin (in the area of 2 * PI). To do this, we divide the circle (2 * PI) into 8 parts and for each part we indicate our own method of calculating the sine. Moreover, in order to speed up the calculation, we abandon the use of the function for obtaining the residue (%) (to obtain the position of the angle inside the 45 degree zone)

Sin_2 (rad)

  // <summary>  ,    Fact </summary> static double[] fact; /// <summary>   Sin </summary> /// <param name="rad"></param> public static double Sin_2(double rad) { double rad_025; int i; //rad = rad % PI2; //% -   .  , fps   90  150 (  100 000   ) //rad_025 = rad % PI025; i = (int)(rad / PI025); rad_025 = rad - PI025 * i; i = i & 7; //     8  //    switch (i) { case 0: return Sin(rad_025, 8); case 1: return Cos(PI025 - rad_025, 8); case 2: return Cos(rad_025, 8); case 3: return Sin(PI025 - rad_025, 8); case 4: return -Sin(rad_025, 8); case 5: return -Cos(PI025 - rad_025, 8); case 6: return -Cos(rad_025, 8); case 7: return -Sin(PI025 - rad_025, 8); } return 0; } /// <summary>            . ///   rad,   . ///  ( Math): 10% (fps)  steps = 17 </summary> /// <param name="rad">   .      pi/4. </param> /// <param name="steps">  :  ,   .  pi/4   E-15  8. </param> public static double Sin(double rad, int steps) { double ret; double a; //,     double aa; // *  int i_f; //  int sign; // (  -  +,     = +) ret = 0; sign = -1; aa = rad * rad; a = rad; i_f = 1; //      for (int n = 0; n < steps; n++) { sign *= -1; ret += sign * a / Fact(i_f); a *= aa; i_f += 2; } return ret; } /// <summary>            . ///   rad,   . ///  ( Math): 10% (fps), 26% (test)  steps = 17 </summary> /// <param name="rad">   .      pi/4. </param> /// <param name="steps">  :  ,   .  pi/4   E-15  8. </param> public static double Cos(double rad, int steps) { double ret; double a; double aa; // *  int i_f; //  int sign; // (  -  +,     = +) ret = 0; sign = -1; aa = rad * rad; a = 1; i_f = 0; //      for (int n = 0; n < steps; n++) { sign *= -1; ret += sign * a / Fact(i_f); a *= aa; i_f += 2; } return ret; } /// <summary>   (n!).  n > fact.Length,  -1. </summary> /// <param name="n"> ,     . </param> public static double Fact(int n) { if (n >= 0 && n < fact.Length) { return fact[n]; } else { Debug.Log("    . n = " + n + ",   = " + fact.Length); return -1; } } /// <summary>  . </summary> static void Init_Fact() { int steps; steps = 46; fact = new double[steps]; fact[0] = 1; for (int n = 1; n < steps; n++) { fact[n] = fact[n - 1] * n; } }

Polynomials

I encountered this method on the Internet, the author needed a quick search function Sin for doubles of lower accuracy (error <0.000 001) without using libraries of previously calculated values.

Pros:

High speed (9-84%)
The initial polynomial unchanged showed a speed of 9% of the original Math.Sin, which is 10 times slower. Due to small changes, the speed increases dramatically to 84%, which is not bad if you close your eyes to accuracy.
No additional preliminary calculations and memory required.
If above and below we need to make arrays of variables to speed up the calculations, then all the key coefficients were kindly calculated and put into the formula by the author himself in the form of constants.
Accuracy is higher than Mathf.Sin (float)
For comparison:

0.84147 1 - Mathf.Sin (1) (Unity engine);
0.841470984807897 - Math.Sin (1) (standard C # function);
0.8414709 56802368 - sin (1) (GPU, hlsl language);
0.84147 1184637935 - Sin_0 (1) .

Minuses:

Not universal
You can not adjust the accuracy manually because it is not known what tools the author used to calculate this polynomial.
What for?
Why did the author need a function that does not require any arrays and which has such low (compared to double) accuracy?

Original appearance:

Sin_1 (x)

 /// <summary>  ( Math): 9% (fps)</summary> /// <param name="x">     -2*Pi  2*Pi </param> public static double Sin_1(double x) { return 0.9999997192673006 * x - 0.1666657564532464 * Math.Pow(x, 3) + 0.008332803647181511 * Math.Pow(x, 5) - 0.00019830197237204295 * Math.Pow(x, 7) + 2.7444305061093514e-6 * Math.Pow(x, 9) - 2.442176561869478e-8 * Math.Pow(x, 11) + 1.407555708887347e-10 * Math.Pow(x, 13) - 4.240664814288337e-13 * Math.Pow(x, 15); }

Superior view:

Sin_0 (rad)

 /// <summary>  ( Math): 83% (fps)</summary> /// <param name="rad">     -2*Pi  2*Pi </param> public static double Sin_0(double rad) { double x; double xx; double ret; xx = rad * rad; x = rad; //1 ret = 0.9999997192673006 * x; x *= xx; //3 ret -= 0.1666657564532464 * x; x *= xx; //5 ret += 0.008332803647181511 * x; x *= xx; //7 ret -= 0.00019830197237204295 * x; x *= xx; //9 ret += 2.7444305061093514e-6 * x; x *= xx; //11 ret -= 2.442176561869478e-8 * x; x *= xx; //13 ret += 1.407555708887347e-10 * x; x *= xx; //15 ret -= 4.240664814288337e-13 * x; return ret; }

Linear interpolation

This method is based on linear interpolation between the results of two records in an array.
The records are divided into mem_sin and mem_cos, they contain the pre-calculated results of the standard function Math.Sin and Math.Cos on a segment of input parameters from 0 to 0.25 * PI.

The principle of manipulation with an angle from 0 to 45 degrees does not differ from the improved version of the Taylor series, but it calls a function that finds — between which two records there is an angle — and finds a value between them.

Pros:

High speed (65%)
Due to the simplicity of the interpolation algorithm, the speed reaches 65% of the speed of Math.Sin. I consider speed> 33% satisfactory.
Highest accuracy
Example of a rare deviation case:
0.255835595715180 - Math.Sin;
0.2558355957151 79 - Sin_3 .
Fast leg
I love this function because it was born in agony, written by me and surpassed the requirements: speed> 33%, accuracy higher than 1e-14. I will give her a proud name - Vēlōx Pes.

Minuses:

Requires memory space
To work, you must first calculate two arrays: for sin and for cos; each array weighs ~ 16mb (16 * 2 = 32mb)

Original appearance:

Sin_3 (rad)

 class Math_d { const double PI025 = Math.PI / 4; /// <summary> 2^17 = 131072 (1 ),   10000 ( ),  2^21 = 22097152 (16 )   +-1 ( ) (  ) </summary> const int length_mem = 22097152; const int length_mem_M1 = length_mem - 1; /// <summary>    sin,    . </summary> static double[] mem_sin; /// <summary>    cos,    . </summary> static double[] mem_cos; /// <summary>  ,   sin,    . </summary> public static void Initialise() { Ini_Mem_Sin(); Ini_Mem_Cos(); } /// <summary>       Cos,   . </summary> /// <param name="rad"></param> public static double Sin_3(double rad) { double rad_025; int i; //    //if (rad < 0) { rad = -rad + Math.PI; } i = (int)(rad / PI025); //   rad_025 = rad - PI025 * i; //     ( ) i = i & 7; //      8 //    switch (i) { case 0: return Sin_Lerp(rad_025); case 1: return Cos_Lerp(PI025 - rad_025); case 2: return Cos_Lerp(rad_025); case 3: return Sin_Lerp(PI025 - rad_025); case 4: return -Sin_Lerp(rad_025); case 5: return -Cos_Lerp(PI025 - rad_025); case 6: return -Cos_Lerp(rad_025); case 7: return -Sin_Lerp(PI025 - rad_025); } return 0; } /// <summary>   sin    </summary> static void Ini_Mem_Sin() { double rad; mem_sin = new double[length_mem]; for (int i = 0; i < length_mem; i++) { rad = (i * PI025) / length_mem_M1; mem_sin[i] = Math.Sin(rad); } } /// <summary>   cos    </summary> static void Ini_Mem_Cos() { double rad; mem_cos = new double[length_mem]; for (int i = 0; i < length_mem; i++) { rad = (i * PI025) / length_mem_M1; mem_cos[i] = Math.Cos(rad); } } /// <summary>      sin  0  pi/4. </summary> /// <param name="rad">     0  pi/4. </param> static double Sin_Lerp(double rad) { int i_0; int i_1; double i_0d; double percent; double a; double b; double s; percent = rad / PI025; i_0d = percent * length_mem_M1; i_0 = (int)i_0d; i_1 = i_0 + 1; a = mem_sin[i_0]; b = mem_sin[i_1]; s = i_0d - i_0; return Lerp(a, b, s); } /// <summary>      cos  0  pi/4. </summary> /// <param name="rad">     0  pi/4. </param> static double Cos_Lerp(double rad) { int i_0; int i_1; double i_0d; double percent; double a; double b; double s; percent = rad / PI025; i_0d = percent * length_mem_M1; i_0 = (int)i_0d; i_1 = i_0 + 1; a = mem_cos[i_0]; b = mem_cos[i_1]; s = i_0d - i_0; return Lerp(a, b, s); } /// <summary>      . (return a + s * (b - a)) </summary> /// <param name="a">  . </param> /// <param name="b">  . </param> /// <param name="s">  . 0 = a, 1 = b, 0.5 =   a  b. </param> public static double Lerp(double a, double b, double s) { return a + s * (b - a); } }

UPD: Fixed the error in determining the index in Sin_Lerp (), Cos_Lerp (), Ini_Mem_Sin () and Ini_Mem_Cos ().

Source: https://habr.com/ru/post/426355/

All Articles

Math.Sin (double) function for GPU

Foreword

Reasons for posting

Considered approaches

Analyzed parameters

Taylor Rows

Polynomials

Linear interpolation

More articles: