32 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__)
39 #define gone 1065353216
40 #define gsine_pi_over_eight 1053028117
42 #define gcosine_pi_over_eight 1064076127
43 #define gtiny_number 1.e-20
44 #define gfour_gamma_squared 5.8284273147583007813
52 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__)
53 #define __fadd_rn(x, y) __fadd_rn(x, y)
54 #define __fsub_rn(x, y) __fsub_rn(x, y)
55 #define __frsqrt_rn(x) __frsqrt_rn(x)
57 #define __dadd_rn(x, y) __dadd_rn(x, y)
58 #define __dsub_rn(x, y) __dsub_rn(x, y)
59 #define __drsqrt_rn(x) __drcp_rn(__dsqrt_rn(x))
62 #define __fadd_rn(x, y) (x + y)
63 #define __fsub_rn(x, y) (x - y)
64 #define __frsqrt_rn(x) (1.0 / sqrt(x))
66 #define __dadd_rn(x, y) (x + y)
67 #define __dsub_rn(x, y) (x - y)
68 #define __drsqrt_rn(x) (1.0 / sqrt(x))
70 #define __add_rn(x, y) (x + y)
71 #define __sub_rn(x, y) (x - y)
72 #define __rsqrt_rn(x) (1.0 / sqrt(x))
80 template <
typename scalar_t>
86 template <
typename scalar_t>
97 double gsmall_number = 1.e-12;
99 un<double> Sa11, Sa21, Sa31, Sa12, Sa22, Sa32, Sa13, Sa23, Sa33;
100 un<double> Su11, Su21, Su31, Su12, Su22, Su32, Su13, Su23, Su33;
101 un<double> Sv11, Sv21, Sv31, Sv12, Sv22, Sv32, Sv13, Sv23, Sv33;
104 un<double> Ss11, Ss21, Ss31, Ss22, Ss32, Ss33;
121 Ss11.
f = Sa11.
f * Sa11.
f;
122 Stmp1.
f = Sa21.
f * Sa21.
f;
124 Stmp1.
f = Sa31.
f * Sa31.
f;
127 Ss21.
f = Sa12.
f * Sa11.
f;
128 Stmp1.
f = Sa22.
f * Sa21.
f;
130 Stmp1.
f = Sa32.
f * Sa31.
f;
133 Ss31.
f = Sa13.
f * Sa11.
f;
134 Stmp1.
f = Sa23.
f * Sa21.
f;
136 Stmp1.
f = Sa33.
f * Sa31.
f;
139 Ss22.
f = Sa12.
f * Sa12.
f;
140 Stmp1.
f = Sa22.
f * Sa22.
f;
142 Stmp1.
f = Sa32.
f * Sa32.
f;
145 Ss32.
f = Sa13.
f * Sa12.
f;
146 Stmp1.
f = Sa23.
f * Sa22.
f;
148 Stmp1.
f = Sa33.
f * Sa32.
f;
151 Ss33.
f = Sa13.
f * Sa13.
f;
152 Stmp1.
f = Sa23.
f * Sa23.
f;
154 Stmp1.
f = Sa33.
f * Sa33.
f;
165 for (
int i = 0; i < 4; i++) {
166 Ssh.
f = Ss21.
f * 0.5f;
169 Stmp2.
f = Ssh.
f * Ssh.
f;
171 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
172 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
174 Sch.
ui = Sch.
ui | Stmp2.
ui;
176 Stmp1.
f = Ssh.
f * Ssh.
f;
177 Stmp2.
f = Sch.
f * Sch.
f;
181 Ssh.
f = Stmp4.
f * Ssh.
f;
182 Sch.
f = Stmp4.
f * Sch.
f;
184 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
187 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
188 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
190 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
191 Sch.
ui = Sch.
ui | Stmp2.
ui;
193 Stmp1.
f = Ssh.
f * Ssh.
f;
194 Stmp2.
f = Sch.
f * Sch.
f;
196 Ss.
f = Sch.
f * Ssh.
f;
199 #ifdef DEBUG_JACOBI_CONJUGATE
200 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
208 Ss33.
f = Ss33.
f * Stmp3.
f;
209 Ss31.
f = Ss31.
f * Stmp3.
f;
210 Ss32.
f = Ss32.
f * Stmp3.
f;
211 Ss33.
f = Ss33.
f * Stmp3.
f;
213 Stmp1.
f = Ss.
f * Ss31.
f;
214 Stmp2.
f = Ss.
f * Ss32.
f;
215 Ss31.
f = Sc.
f * Ss31.
f;
216 Ss32.
f = Sc.
f * Ss32.
f;
220 Stmp2.
f = Ss.
f * Ss.
f;
221 Stmp1.
f = Ss22.
f * Stmp2.
f;
222 Stmp3.
f = Ss11.
f * Stmp2.
f;
223 Stmp4.
f = Sc.
f * Sc.
f;
224 Ss11.
f = Ss11.
f * Stmp4.
f;
225 Ss22.
f = Ss22.
f * Stmp4.
f;
230 Ss21.
f = Ss21.
f * Stmp4.
f;
231 Stmp4.
f = Sc.
f * Ss.
f;
232 Stmp2.
f = Stmp2.
f * Stmp4.
f;
233 Stmp5.
f = Stmp5.
f * Stmp4.
f;
238 #ifdef DEBUG_JACOBI_CONJUGATE
239 printf(
"%.20g\n", Ss11.
f);
240 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
241 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
248 Stmp1.
f = Ssh.
f * Sqvvx.
f;
249 Stmp2.
f = Ssh.
f * Sqvvy.
f;
250 Stmp3.
f = Ssh.
f * Sqvvz.
f;
251 Ssh.
f = Ssh.
f * Sqvs.
f;
253 Sqvs.
f = Sch.
f * Sqvs.
f;
254 Sqvvx.
f = Sch.
f * Sqvvx.
f;
255 Sqvvy.
f = Sch.
f * Sqvvy.
f;
256 Sqvvz.
f = Sch.
f * Sqvvz.
f;
263 #ifdef DEBUG_JACOBI_CONJUGATE
264 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
271 Ssh.
f = Ss32.
f * 0.5f;
274 Stmp2.
f = Ssh.
f * Ssh.
f;
276 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
277 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
279 Sch.
ui = Sch.
ui | Stmp2.
ui;
281 Stmp1.
f = Ssh.
f * Ssh.
f;
282 Stmp2.
f = Sch.
f * Sch.
f;
286 Ssh.
f = Stmp4.
f * Ssh.
f;
287 Sch.
f = Stmp4.
f * Sch.
f;
289 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
292 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
293 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
295 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
296 Sch.
ui = Sch.
ui | Stmp2.
ui;
298 Stmp1.
f = Ssh.
f * Ssh.
f;
299 Stmp2.
f = Sch.
f * Sch.
f;
301 Ss.
f = Sch.
f * Ssh.
f;
304 #ifdef DEBUG_JACOBI_CONJUGATE
305 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
314 Ss11.
f = Ss11.
f * Stmp3.
f;
315 Ss21.
f = Ss21.
f * Stmp3.
f;
316 Ss31.
f = Ss31.
f * Stmp3.
f;
317 Ss11.
f = Ss11.
f * Stmp3.
f;
319 Stmp1.
f = Ss.
f * Ss21.
f;
320 Stmp2.
f = Ss.
f * Ss31.
f;
321 Ss21.
f = Sc.
f * Ss21.
f;
322 Ss31.
f = Sc.
f * Ss31.
f;
326 Stmp2.
f = Ss.
f * Ss.
f;
327 Stmp1.
f = Ss33.
f * Stmp2.
f;
328 Stmp3.
f = Ss22.
f * Stmp2.
f;
329 Stmp4.
f = Sc.
f * Sc.
f;
330 Ss22.
f = Ss22.
f * Stmp4.
f;
331 Ss33.
f = Ss33.
f * Stmp4.
f;
336 Ss32.
f = Ss32.
f * Stmp4.
f;
337 Stmp4.
f = Sc.
f * Ss.
f;
338 Stmp2.
f = Stmp2.
f * Stmp4.
f;
339 Stmp5.
f = Stmp5.
f * Stmp4.
f;
344 #ifdef DEBUG_JACOBI_CONJUGATE
345 printf(
"%.20g\n", Ss11.
f);
346 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
347 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
354 Stmp1.
f = Ssh.
f * Sqvvx.
f;
355 Stmp2.
f = Ssh.
f * Sqvvy.
f;
356 Stmp3.
f = Ssh.
f * Sqvvz.
f;
357 Ssh.
f = Ssh.
f * Sqvs.
f;
359 Sqvs.
f = Sch.
f * Sqvs.
f;
360 Sqvvx.
f = Sch.
f * Sqvvx.
f;
361 Sqvvy.
f = Sch.
f * Sqvvy.
f;
362 Sqvvz.
f = Sch.
f * Sqvvz.
f;
369 #ifdef DEBUG_JACOBI_CONJUGATE
370 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
378 Ssh.
f = Ss31.
f * 0.5f;
381 Stmp2.
f = Ssh.
f * Ssh.
f;
383 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
384 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
386 Sch.
ui = Sch.
ui | Stmp2.
ui;
388 Stmp1.
f = Ssh.
f * Ssh.
f;
389 Stmp2.
f = Sch.
f * Sch.
f;
393 Ssh.
f = Stmp4.
f * Ssh.
f;
394 Sch.
f = Stmp4.
f * Sch.
f;
396 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
399 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
400 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
402 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
403 Sch.
ui = Sch.
ui | Stmp2.
ui;
405 Stmp1.
f = Ssh.
f * Ssh.
f;
406 Stmp2.
f = Sch.
f * Sch.
f;
408 Ss.
f = Sch.
f * Ssh.
f;
411 #ifdef DEBUG_JACOBI_CONJUGATE
412 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
421 Ss22.
f = Ss22.
f * Stmp3.
f;
422 Ss32.
f = Ss32.
f * Stmp3.
f;
423 Ss21.
f = Ss21.
f * Stmp3.
f;
424 Ss22.
f = Ss22.
f * Stmp3.
f;
426 Stmp1.
f = Ss.
f * Ss32.
f;
427 Stmp2.
f = Ss.
f * Ss21.
f;
428 Ss32.
f = Sc.
f * Ss32.
f;
429 Ss21.
f = Sc.
f * Ss21.
f;
433 Stmp2.
f = Ss.
f * Ss.
f;
434 Stmp1.
f = Ss11.
f * Stmp2.
f;
435 Stmp3.
f = Ss33.
f * Stmp2.
f;
436 Stmp4.
f = Sc.
f * Sc.
f;
437 Ss33.
f = Ss33.
f * Stmp4.
f;
438 Ss11.
f = Ss11.
f * Stmp4.
f;
443 Ss31.
f = Ss31.
f * Stmp4.
f;
444 Stmp4.
f = Sc.
f * Ss.
f;
445 Stmp2.
f = Stmp2.
f * Stmp4.
f;
446 Stmp5.
f = Stmp5.
f * Stmp4.
f;
451 #ifdef DEBUG_JACOBI_CONJUGATE
452 printf(
"%.20g\n", Ss11.
f);
453 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
454 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
461 Stmp1.
f = Ssh.
f * Sqvvx.
f;
462 Stmp2.
f = Ssh.
f * Sqvvy.
f;
463 Stmp3.
f = Ssh.
f * Sqvvz.
f;
464 Ssh.
f = Ssh.
f * Sqvs.
f;
466 Sqvs.
f = Sch.
f * Sqvs.
f;
467 Sqvvx.
f = Sch.
f * Sqvvx.
f;
468 Sqvvy.
f = Sch.
f * Sqvvy.
f;
469 Sqvvz.
f = Sch.
f * Sqvvz.
f;
482 Stmp2.
f = Sqvs.
f * Sqvs.
f;
483 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
485 Stmp1.
f = Sqvvy.
f * Sqvvy.
f;
487 Stmp1.
f = Sqvvz.
f * Sqvvz.
f;
491 Stmp4.
f = Stmp1.
f * 0.5f;
492 Stmp3.
f = Stmp1.
f * Stmp4.
f;
493 Stmp3.
f = Stmp1.
f * Stmp3.
f;
494 Stmp3.
f = Stmp2.
f * Stmp3.
f;
498 Sqvs.
f = Sqvs.
f * Stmp1.
f;
499 Sqvvx.
f = Sqvvx.
f * Stmp1.
f;
500 Sqvvy.
f = Sqvvy.
f * Stmp1.
f;
501 Sqvvz.
f = Sqvvz.
f * Stmp1.
f;
507 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
508 Stmp2.
f = Sqvvy.
f * Sqvvy.
f;
509 Stmp3.
f = Sqvvz.
f * Sqvvz.
f;
510 Sv11.
f = Sqvs.
f * Sqvs.
f;
522 Sv32.
f = Sqvs.
f * Stmp1.
f;
523 Sv13.
f = Sqvs.
f * Stmp2.
f;
524 Sv21.
f = Sqvs.
f * Stmp3.
f;
525 Stmp1.
f = Sqvvy.
f * Stmp1.
f;
526 Stmp2.
f = Sqvvz.
f * Stmp2.
f;
527 Stmp3.
f = Sqvvx.
f * Stmp3.
f;
541 Sa12.
f = Sv12.
f * Sa11.
f;
542 Sa13.
f = Sv13.
f * Sa11.
f;
543 Sa11.
f = Sv11.
f * Sa11.
f;
544 Stmp1.
f = Sv21.
f * Stmp2.
f;
546 Stmp1.
f = Sv31.
f * Stmp3.
f;
548 Stmp1.
f = Sv22.
f * Stmp2.
f;
550 Stmp1.
f = Sv32.
f * Stmp3.
f;
552 Stmp1.
f = Sv23.
f * Stmp2.
f;
554 Stmp1.
f = Sv33.
f * Stmp3.
f;
559 Sa22.
f = Sv12.
f * Sa21.
f;
560 Sa23.
f = Sv13.
f * Sa21.
f;
561 Sa21.
f = Sv11.
f * Sa21.
f;
562 Stmp1.
f = Sv21.
f * Stmp2.
f;
564 Stmp1.
f = Sv31.
f * Stmp3.
f;
566 Stmp1.
f = Sv22.
f * Stmp2.
f;
568 Stmp1.
f = Sv32.
f * Stmp3.
f;
570 Stmp1.
f = Sv23.
f * Stmp2.
f;
572 Stmp1.
f = Sv33.
f * Stmp3.
f;
577 Sa32.
f = Sv12.
f * Sa31.
f;
578 Sa33.
f = Sv13.
f * Sa31.
f;
579 Sa31.
f = Sv11.
f * Sa31.
f;
580 Stmp1.
f = Sv21.
f * Stmp2.
f;
582 Stmp1.
f = Sv31.
f * Stmp3.
f;
584 Stmp1.
f = Sv22.
f * Stmp2.
f;
586 Stmp1.
f = Sv32.
f * Stmp3.
f;
588 Stmp1.
f = Sv23.
f * Stmp2.
f;
590 Stmp1.
f = Sv33.
f * Stmp3.
f;
597 Stmp1.
f = Sa11.
f * Sa11.
f;
598 Stmp4.
f = Sa21.
f * Sa21.
f;
600 Stmp4.
f = Sa31.
f * Sa31.
f;
603 Stmp2.
f = Sa12.
f * Sa12.
f;
604 Stmp4.
f = Sa22.
f * Sa22.
f;
606 Stmp4.
f = Sa32.
f * Sa32.
f;
609 Stmp3.
f = Sa13.
f * Sa13.
f;
610 Stmp4.
f = Sa23.
f * Sa23.
f;
612 Stmp4.
f = Sa33.
f * Sa33.
f;
617 Stmp4.
ui = (Stmp1.
f < Stmp2.
f) ? 0xffffffff : 0;
618 Stmp5.
ui = Sa11.
ui ^ Sa12.
ui;
619 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
620 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
621 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
623 Stmp5.
ui = Sa21.
ui ^ Sa22.
ui;
624 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
625 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
626 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
628 Stmp5.
ui = Sa31.
ui ^ Sa32.
ui;
629 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
630 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
631 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
633 Stmp5.
ui = Sv11.
ui ^ Sv12.
ui;
634 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
635 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
636 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
638 Stmp5.
ui = Sv21.
ui ^ Sv22.
ui;
639 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
640 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
641 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
643 Stmp5.
ui = Sv31.
ui ^ Sv32.
ui;
644 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
645 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
646 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
648 Stmp5.
ui = Stmp1.
ui ^ Stmp2.
ui;
649 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
650 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
651 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
657 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
661 Sa12.
f = Sa12.
f * Stmp4.
f;
662 Sa22.
f = Sa22.
f * Stmp4.
f;
663 Sa32.
f = Sa32.
f * Stmp4.
f;
665 Sv12.
f = Sv12.
f * Stmp4.
f;
666 Sv22.
f = Sv22.
f * Stmp4.
f;
667 Sv32.
f = Sv32.
f * Stmp4.
f;
671 Stmp4.
ui = (Stmp1.
f < Stmp3.
f) ? 0xffffffff : 0;
672 Stmp5.
ui = Sa11.
ui ^ Sa13.
ui;
673 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
674 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
675 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
677 Stmp5.
ui = Sa21.
ui ^ Sa23.
ui;
678 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
679 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
680 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
682 Stmp5.
ui = Sa31.
ui ^ Sa33.
ui;
683 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
684 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
685 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
687 Stmp5.
ui = Sv11.
ui ^ Sv13.
ui;
688 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
689 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
690 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
692 Stmp5.
ui = Sv21.
ui ^ Sv23.
ui;
693 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
694 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
695 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
697 Stmp5.
ui = Sv31.
ui ^ Sv33.
ui;
698 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
699 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
700 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
702 Stmp5.
ui = Stmp1.
ui ^ Stmp3.
ui;
703 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
704 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
705 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
711 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
715 Sa11.
f = Sa11.
f * Stmp4.
f;
716 Sa21.
f = Sa21.
f * Stmp4.
f;
717 Sa31.
f = Sa31.
f * Stmp4.
f;
719 Sv11.
f = Sv11.
f * Stmp4.
f;
720 Sv21.
f = Sv21.
f * Stmp4.
f;
721 Sv31.
f = Sv31.
f * Stmp4.
f;
725 Stmp4.
ui = (Stmp2.
f < Stmp3.
f) ? 0xffffffff : 0;
726 Stmp5.
ui = Sa12.
ui ^ Sa13.
ui;
727 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
728 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
729 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
731 Stmp5.
ui = Sa22.
ui ^ Sa23.
ui;
732 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
733 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
734 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
736 Stmp5.
ui = Sa32.
ui ^ Sa33.
ui;
737 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
738 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
739 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
741 Stmp5.
ui = Sv12.
ui ^ Sv13.
ui;
742 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
743 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
744 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
746 Stmp5.
ui = Sv22.
ui ^ Sv23.
ui;
747 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
748 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
749 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
751 Stmp5.
ui = Sv32.
ui ^ Sv33.
ui;
752 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
753 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
754 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
756 Stmp5.
ui = Stmp2.
ui ^ Stmp3.
ui;
757 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
758 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
759 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
765 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
769 Sa13.
f = Sa13.
f * Stmp4.
f;
770 Sa23.
f = Sa23.
f * Stmp4.
f;
771 Sa33.
f = Sa33.
f * Stmp4.
f;
773 Sv13.
f = Sv13.
f * Stmp4.
f;
774 Sv23.
f = Sv23.
f * Stmp4.
f;
775 Sv33.
f = Sv33.
f * Stmp4.
f;
791 Ssh.
f = Sa21.
f * Sa21.
f;
792 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
793 Ssh.
ui = Ssh.
ui & Sa21.
ui;
797 Sch.
f = max(Sch.
f, Sa11.
f);
798 Sch.
f = max(Sch.
f, gsmall_number);
799 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
801 Stmp1.
f = Sch.
f * Sch.
f;
802 Stmp2.
f = Ssh.
f * Ssh.
f;
806 Stmp4.
f = Stmp1.
f * 0.5f;
807 Stmp3.
f = Stmp1.
f * Stmp4.
f;
808 Stmp3.
f = Stmp1.
f * Stmp3.
f;
809 Stmp3.
f = Stmp2.
f * Stmp3.
f;
812 Stmp1.
f = Stmp1.
f * Stmp2.
f;
816 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
817 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
818 Sch.
ui = Stmp5.
ui & Sch.
ui;
819 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
820 Sch.
ui = Sch.
ui | Stmp1.
ui;
821 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
823 Stmp1.
f = Sch.
f * Sch.
f;
824 Stmp2.
f = Ssh.
f * Ssh.
f;
828 Stmp4.
f = Stmp1.
f * 0.5f;
829 Stmp3.
f = Stmp1.
f * Stmp4.
f;
830 Stmp3.
f = Stmp1.
f * Stmp3.
f;
831 Stmp3.
f = Stmp2.
f * Stmp3.
f;
835 Sch.
f = Sch.
f * Stmp1.
f;
836 Ssh.
f = Ssh.
f * Stmp1.
f;
838 Sc.
f = Sch.
f * Sch.
f;
839 Ss.
f = Ssh.
f * Ssh.
f;
841 Ss.
f = Ssh.
f * Sch.
f;
848 Stmp1.
f = Ss.
f * Sa11.
f;
849 Stmp2.
f = Ss.
f * Sa21.
f;
850 Sa11.
f = Sc.
f * Sa11.
f;
851 Sa21.
f = Sc.
f * Sa21.
f;
855 Stmp1.
f = Ss.
f * Sa12.
f;
856 Stmp2.
f = Ss.
f * Sa22.
f;
857 Sa12.
f = Sc.
f * Sa12.
f;
858 Sa22.
f = Sc.
f * Sa22.
f;
862 Stmp1.
f = Ss.
f * Sa13.
f;
863 Stmp2.
f = Ss.
f * Sa23.
f;
864 Sa13.
f = Sc.
f * Sa13.
f;
865 Sa23.
f = Sc.
f * Sa23.
f;
873 Stmp1.
f = Ss.
f * Su11.
f;
874 Stmp2.
f = Ss.
f * Su12.
f;
875 Su11.
f = Sc.
f * Su11.
f;
876 Su12.
f = Sc.
f * Su12.
f;
880 Stmp1.
f = Ss.
f * Su21.
f;
881 Stmp2.
f = Ss.
f * Su22.
f;
882 Su21.
f = Sc.
f * Su21.
f;
883 Su22.
f = Sc.
f * Su22.
f;
887 Stmp1.
f = Ss.
f * Su31.
f;
888 Stmp2.
f = Ss.
f * Su32.
f;
889 Su31.
f = Sc.
f * Su31.
f;
890 Su32.
f = Sc.
f * Su32.
f;
896 Ssh.
f = Sa31.
f * Sa31.
f;
897 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
898 Ssh.
ui = Ssh.
ui & Sa31.
ui;
902 Sch.
f = max(Sch.
f, Sa11.
f);
903 Sch.
f = max(Sch.
f, gsmall_number);
904 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
906 Stmp1.
f = Sch.
f * Sch.
f;
907 Stmp2.
f = Ssh.
f * Ssh.
f;
911 Stmp4.
f = Stmp1.
f * 0.5;
912 Stmp3.
f = Stmp1.
f * Stmp4.
f;
913 Stmp3.
f = Stmp1.
f * Stmp3.
f;
914 Stmp3.
f = Stmp2.
f * Stmp3.
f;
917 Stmp1.
f = Stmp1.
f * Stmp2.
f;
921 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
922 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
923 Sch.
ui = Stmp5.
ui & Sch.
ui;
924 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
925 Sch.
ui = Sch.
ui | Stmp1.
ui;
926 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
928 Stmp1.
f = Sch.
f * Sch.
f;
929 Stmp2.
f = Ssh.
f * Ssh.
f;
933 Stmp4.
f = Stmp1.
f * 0.5f;
934 Stmp3.
f = Stmp1.
f * Stmp4.
f;
935 Stmp3.
f = Stmp1.
f * Stmp3.
f;
936 Stmp3.
f = Stmp2.
f * Stmp3.
f;
940 Sch.
f = Sch.
f * Stmp1.
f;
941 Ssh.
f = Ssh.
f * Stmp1.
f;
943 Sc.
f = Sch.
f * Sch.
f;
944 Ss.
f = Ssh.
f * Ssh.
f;
946 Ss.
f = Ssh.
f * Sch.
f;
953 Stmp1.
f = Ss.
f * Sa11.
f;
954 Stmp2.
f = Ss.
f * Sa31.
f;
955 Sa11.
f = Sc.
f * Sa11.
f;
956 Sa31.
f = Sc.
f * Sa31.
f;
960 Stmp1.
f = Ss.
f * Sa12.
f;
961 Stmp2.
f = Ss.
f * Sa32.
f;
962 Sa12.
f = Sc.
f * Sa12.
f;
963 Sa32.
f = Sc.
f * Sa32.
f;
967 Stmp1.
f = Ss.
f * Sa13.
f;
968 Stmp2.
f = Ss.
f * Sa33.
f;
969 Sa13.
f = Sc.
f * Sa13.
f;
970 Sa33.
f = Sc.
f * Sa33.
f;
978 Stmp1.
f = Ss.
f * Su11.
f;
979 Stmp2.
f = Ss.
f * Su13.
f;
980 Su11.
f = Sc.
f * Su11.
f;
981 Su13.
f = Sc.
f * Su13.
f;
985 Stmp1.
f = Ss.
f * Su21.
f;
986 Stmp2.
f = Ss.
f * Su23.
f;
987 Su21.
f = Sc.
f * Su21.
f;
988 Su23.
f = Sc.
f * Su23.
f;
992 Stmp1.
f = Ss.
f * Su31.
f;
993 Stmp2.
f = Ss.
f * Su33.
f;
994 Su31.
f = Sc.
f * Su31.
f;
995 Su33.
f = Sc.
f * Su33.
f;
1001 Ssh.
f = Sa32.
f * Sa32.
f;
1002 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
1003 Ssh.
ui = Ssh.
ui & Sa32.
ui;
1007 Sch.
f = max(Sch.
f, Sa22.
f);
1008 Sch.
f = max(Sch.
f, gsmall_number);
1009 Stmp5.
ui = (Sa22.
f >= Stmp5.
f) ? 0xffffffff : 0;
1011 Stmp1.
f = Sch.
f * Sch.
f;
1012 Stmp2.
f = Ssh.
f * Ssh.
f;
1016 Stmp4.
f = Stmp1.
f * 0.5f;
1017 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1018 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1019 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1022 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1026 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1027 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1028 Sch.
ui = Stmp5.
ui & Sch.
ui;
1029 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1030 Sch.
ui = Sch.
ui | Stmp1.
ui;
1031 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1033 Stmp1.
f = Sch.
f * Sch.
f;
1034 Stmp2.
f = Ssh.
f * Ssh.
f;
1038 Stmp4.
f = Stmp1.
f * 0.5f;
1039 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1040 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1041 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1045 Sch.
f = Sch.
f * Stmp1.
f;
1046 Ssh.
f = Ssh.
f * Stmp1.
f;
1048 Sc.
f = Sch.
f * Sch.
f;
1049 Ss.
f = Ssh.
f * Ssh.
f;
1051 Ss.
f = Ssh.
f * Sch.
f;
1058 Stmp1.
f = Ss.
f * Sa21.
f;
1059 Stmp2.
f = Ss.
f * Sa31.
f;
1060 Sa21.
f = Sc.
f * Sa21.
f;
1061 Sa31.
f = Sc.
f * Sa31.
f;
1065 Stmp1.
f = Ss.
f * Sa22.
f;
1066 Stmp2.
f = Ss.
f * Sa32.
f;
1067 Sa22.
f = Sc.
f * Sa22.
f;
1068 Sa32.
f = Sc.
f * Sa32.
f;
1072 Stmp1.
f = Ss.
f * Sa23.
f;
1073 Stmp2.
f = Ss.
f * Sa33.
f;
1074 Sa23.
f = Sc.
f * Sa23.
f;
1075 Sa33.
f = Sc.
f * Sa33.
f;
1083 Stmp1.
f = Ss.
f * Su12.
f;
1084 Stmp2.
f = Ss.
f * Su13.
f;
1085 Su12.
f = Sc.
f * Su12.
f;
1086 Su13.
f = Sc.
f * Su13.
f;
1090 Stmp1.
f = Ss.
f * Su22.
f;
1091 Stmp2.
f = Ss.
f * Su23.
f;
1092 Su22.
f = Sc.
f * Su22.
f;
1093 Su23.
f = Sc.
f * Su23.
f;
1097 Stmp1.
f = Ss.
f * Su32.
f;
1098 Stmp2.
f = Ss.
f * Su33.
f;
1099 Su32.
f = Sc.
f * Su32.
f;
1100 Su33.
f = Sc.
f * Su33.
f;
1136 float gsmall_number = 1.e-12;
1138 un<float> Sa11, Sa21, Sa31, Sa12, Sa22, Sa32, Sa13, Sa23, Sa33;
1139 un<float> Su11, Su21, Su31, Su12, Su22, Su32, Su13, Su23, Su33;
1140 un<float> Sv11, Sv21, Sv31, Sv12, Sv22, Sv32, Sv13, Sv23, Sv33;
1142 un<float> Stmp1, Stmp2, Stmp3, Stmp4, Stmp5;
1143 un<float> Ss11, Ss21, Ss31, Ss22, Ss32, Ss33;
1160 Ss11.
f = Sa11.
f * Sa11.
f;
1161 Stmp1.
f = Sa21.
f * Sa21.
f;
1163 Stmp1.
f = Sa31.
f * Sa31.
f;
1166 Ss21.
f = Sa12.
f * Sa11.
f;
1167 Stmp1.
f = Sa22.
f * Sa21.
f;
1169 Stmp1.
f = Sa32.
f * Sa31.
f;
1172 Ss31.
f = Sa13.
f * Sa11.
f;
1173 Stmp1.
f = Sa23.
f * Sa21.
f;
1175 Stmp1.
f = Sa33.
f * Sa31.
f;
1178 Ss22.
f = Sa12.
f * Sa12.
f;
1179 Stmp1.
f = Sa22.
f * Sa22.
f;
1181 Stmp1.
f = Sa32.
f * Sa32.
f;
1184 Ss32.
f = Sa13.
f * Sa12.
f;
1185 Stmp1.
f = Sa23.
f * Sa22.
f;
1187 Stmp1.
f = Sa33.
f * Sa32.
f;
1190 Ss33.
f = Sa13.
f * Sa13.
f;
1191 Stmp1.
f = Sa23.
f * Sa23.
f;
1193 Stmp1.
f = Sa33.
f * Sa33.
f;
1204 for (
int i = 0; i < 4; i++) {
1205 Ssh.
f = Ss21.
f * 0.5f;
1208 Stmp2.
f = Ssh.
f * Ssh.
f;
1210 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
1211 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
1213 Sch.
ui = Sch.
ui | Stmp2.
ui;
1215 Stmp1.
f = Ssh.
f * Ssh.
f;
1216 Stmp2.
f = Sch.
f * Sch.
f;
1220 Ssh.
f = Stmp4.
f * Ssh.
f;
1221 Sch.
f = Stmp4.
f * Sch.
f;
1223 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
1226 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
1227 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1229 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
1230 Sch.
ui = Sch.
ui | Stmp2.
ui;
1232 Stmp1.
f = Ssh.
f * Ssh.
f;
1233 Stmp2.
f = Sch.
f * Sch.
f;
1235 Ss.
f = Sch.
f * Ssh.
f;
1238 #ifdef DEBUG_JACOBI_CONJUGATE
1239 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
1247 Ss33.
f = Ss33.
f * Stmp3.
f;
1248 Ss31.
f = Ss31.
f * Stmp3.
f;
1249 Ss32.
f = Ss32.
f * Stmp3.
f;
1250 Ss33.
f = Ss33.
f * Stmp3.
f;
1252 Stmp1.
f = Ss.
f * Ss31.
f;
1253 Stmp2.
f = Ss.
f * Ss32.
f;
1254 Ss31.
f = Sc.
f * Ss31.
f;
1255 Ss32.
f = Sc.
f * Ss32.
f;
1259 Stmp2.
f = Ss.
f * Ss.
f;
1260 Stmp1.
f = Ss22.
f * Stmp2.
f;
1261 Stmp3.
f = Ss11.
f * Stmp2.
f;
1262 Stmp4.
f = Sc.
f * Sc.
f;
1263 Ss11.
f = Ss11.
f * Stmp4.
f;
1264 Ss22.
f = Ss22.
f * Stmp4.
f;
1269 Ss21.
f = Ss21.
f * Stmp4.
f;
1270 Stmp4.
f = Sc.
f * Ss.
f;
1271 Stmp2.
f = Stmp2.
f * Stmp4.
f;
1272 Stmp5.
f = Stmp5.
f * Stmp4.
f;
1277 #ifdef DEBUG_JACOBI_CONJUGATE
1278 printf(
"%.20g\n", Ss11.
f);
1279 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
1280 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
1287 Stmp1.
f = Ssh.
f * Sqvvx.
f;
1288 Stmp2.
f = Ssh.
f * Sqvvy.
f;
1289 Stmp3.
f = Ssh.
f * Sqvvz.
f;
1290 Ssh.
f = Ssh.
f * Sqvs.
f;
1292 Sqvs.
f = Sch.
f * Sqvs.
f;
1293 Sqvvx.
f = Sch.
f * Sqvvx.
f;
1294 Sqvvy.
f = Sch.
f * Sqvvy.
f;
1295 Sqvvz.
f = Sch.
f * Sqvvz.
f;
1302 #ifdef DEBUG_JACOBI_CONJUGATE
1303 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
1310 Ssh.
f = Ss32.
f * 0.5f;
1313 Stmp2.
f = Ssh.
f * Ssh.
f;
1315 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
1316 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
1318 Sch.
ui = Sch.
ui | Stmp2.
ui;
1320 Stmp1.
f = Ssh.
f * Ssh.
f;
1321 Stmp2.
f = Sch.
f * Sch.
f;
1325 Ssh.
f = Stmp4.
f * Ssh.
f;
1326 Sch.
f = Stmp4.
f * Sch.
f;
1328 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
1331 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
1332 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1334 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
1335 Sch.
ui = Sch.
ui | Stmp2.
ui;
1337 Stmp1.
f = Ssh.
f * Ssh.
f;
1338 Stmp2.
f = Sch.
f * Sch.
f;
1340 Ss.
f = Sch.
f * Ssh.
f;
1343 #ifdef DEBUG_JACOBI_CONJUGATE
1344 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
1353 Ss11.
f = Ss11.
f * Stmp3.
f;
1354 Ss21.
f = Ss21.
f * Stmp3.
f;
1355 Ss31.
f = Ss31.
f * Stmp3.
f;
1356 Ss11.
f = Ss11.
f * Stmp3.
f;
1358 Stmp1.
f = Ss.
f * Ss21.
f;
1359 Stmp2.
f = Ss.
f * Ss31.
f;
1360 Ss21.
f = Sc.
f * Ss21.
f;
1361 Ss31.
f = Sc.
f * Ss31.
f;
1365 Stmp2.
f = Ss.
f * Ss.
f;
1366 Stmp1.
f = Ss33.
f * Stmp2.
f;
1367 Stmp3.
f = Ss22.
f * Stmp2.
f;
1368 Stmp4.
f = Sc.
f * Sc.
f;
1369 Ss22.
f = Ss22.
f * Stmp4.
f;
1370 Ss33.
f = Ss33.
f * Stmp4.
f;
1375 Ss32.
f = Ss32.
f * Stmp4.
f;
1376 Stmp4.
f = Sc.
f * Ss.
f;
1377 Stmp2.
f = Stmp2.
f * Stmp4.
f;
1378 Stmp5.
f = Stmp5.
f * Stmp4.
f;
1383 #ifdef DEBUG_JACOBI_CONJUGATE
1384 printf(
"%.20g\n", Ss11.
f);
1385 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
1386 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
1393 Stmp1.
f = Ssh.
f * Sqvvx.
f;
1394 Stmp2.
f = Ssh.
f * Sqvvy.
f;
1395 Stmp3.
f = Ssh.
f * Sqvvz.
f;
1396 Ssh.
f = Ssh.
f * Sqvs.
f;
1398 Sqvs.
f = Sch.
f * Sqvs.
f;
1399 Sqvvx.
f = Sch.
f * Sqvvx.
f;
1400 Sqvvy.
f = Sch.
f * Sqvvy.
f;
1401 Sqvvz.
f = Sch.
f * Sqvvz.
f;
1408 #ifdef DEBUG_JACOBI_CONJUGATE
1409 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.
f, Sqvvy.
f, Sqvvz.
f,
1417 Ssh.
f = Ss31.
f * 0.5f;
1420 Stmp2.
f = Ssh.
f * Ssh.
f;
1422 Ssh.
ui = Stmp1.
ui & Ssh.
ui;
1423 Sch.
ui = Stmp1.
ui & Stmp5.
ui;
1425 Sch.
ui = Sch.
ui | Stmp2.
ui;
1427 Stmp1.
f = Ssh.
f * Ssh.
f;
1428 Stmp2.
f = Sch.
f * Sch.
f;
1432 Ssh.
f = Stmp4.
f * Ssh.
f;
1433 Sch.
f = Stmp4.
f * Sch.
f;
1435 Stmp1.
ui = (Stmp2.
f <= Stmp1.
f) ? 0xffffffff : 0;
1438 Ssh.
ui = ~Stmp1.
ui & Ssh.
ui;
1439 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1441 Sch.
ui = ~Stmp1.
ui & Sch.
ui;
1442 Sch.
ui = Sch.
ui | Stmp2.
ui;
1444 Stmp1.
f = Ssh.
f * Ssh.
f;
1445 Stmp2.
f = Sch.
f * Sch.
f;
1447 Ss.
f = Sch.
f * Ssh.
f;
1450 #ifdef DEBUG_JACOBI_CONJUGATE
1451 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.
f, Sc.
f, Ssh.
f,
1460 Ss22.
f = Ss22.
f * Stmp3.
f;
1461 Ss32.
f = Ss32.
f * Stmp3.
f;
1462 Ss21.
f = Ss21.
f * Stmp3.
f;
1463 Ss22.
f = Ss22.
f * Stmp3.
f;
1465 Stmp1.
f = Ss.
f * Ss32.
f;
1466 Stmp2.
f = Ss.
f * Ss21.
f;
1467 Ss32.
f = Sc.
f * Ss32.
f;
1468 Ss21.
f = Sc.
f * Ss21.
f;
1472 Stmp2.
f = Ss.
f * Ss.
f;
1473 Stmp1.
f = Ss11.
f * Stmp2.
f;
1474 Stmp3.
f = Ss33.
f * Stmp2.
f;
1475 Stmp4.
f = Sc.
f * Sc.
f;
1476 Ss33.
f = Ss33.
f * Stmp4.
f;
1477 Ss11.
f = Ss11.
f * Stmp4.
f;
1482 Ss31.
f = Ss31.
f * Stmp4.
f;
1483 Stmp4.
f = Sc.
f * Ss.
f;
1484 Stmp2.
f = Stmp2.
f * Stmp4.
f;
1485 Stmp5.
f = Stmp5.
f * Stmp4.
f;
1490 #ifdef DEBUG_JACOBI_CONJUGATE
1491 printf(
"%.20g\n", Ss11.
f);
1492 printf(
"%.20g %.20g\n", Ss21.
f, Ss22.
f);
1493 printf(
"%.20g %.20g %.20g\n", Ss31.
f, Ss32.
f, Ss33.
f);
1500 Stmp1.
f = Ssh.
f * Sqvvx.
f;
1501 Stmp2.
f = Ssh.
f * Sqvvy.
f;
1502 Stmp3.
f = Ssh.
f * Sqvvz.
f;
1503 Ssh.
f = Ssh.
f * Sqvs.
f;
1505 Sqvs.
f = Sch.
f * Sqvs.
f;
1506 Sqvvx.
f = Sch.
f * Sqvvx.
f;
1507 Sqvvy.
f = Sch.
f * Sqvvy.
f;
1508 Sqvvz.
f = Sch.
f * Sqvvz.
f;
1521 Stmp2.
f = Sqvs.
f * Sqvs.
f;
1522 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
1524 Stmp1.
f = Sqvvy.
f * Sqvvy.
f;
1526 Stmp1.
f = Sqvvz.
f * Sqvvz.
f;
1530 Stmp4.
f = Stmp1.
f * 0.5f;
1531 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1532 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1533 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1537 Sqvs.
f = Sqvs.
f * Stmp1.
f;
1538 Sqvvx.
f = Sqvvx.
f * Stmp1.
f;
1539 Sqvvy.
f = Sqvvy.
f * Stmp1.
f;
1540 Sqvvz.
f = Sqvvz.
f * Stmp1.
f;
1546 Stmp1.
f = Sqvvx.
f * Sqvvx.
f;
1547 Stmp2.
f = Sqvvy.
f * Sqvvy.
f;
1548 Stmp3.
f = Sqvvz.
f * Sqvvz.
f;
1549 Sv11.
f = Sqvs.
f * Sqvs.
f;
1561 Sv32.
f = Sqvs.
f * Stmp1.
f;
1562 Sv13.
f = Sqvs.
f * Stmp2.
f;
1563 Sv21.
f = Sqvs.
f * Stmp3.
f;
1564 Stmp1.
f = Sqvvy.
f * Stmp1.
f;
1565 Stmp2.
f = Sqvvz.
f * Stmp2.
f;
1566 Stmp3.
f = Sqvvx.
f * Stmp3.
f;
1580 Sa12.
f = Sv12.
f * Sa11.
f;
1581 Sa13.
f = Sv13.
f * Sa11.
f;
1582 Sa11.
f = Sv11.
f * Sa11.
f;
1583 Stmp1.
f = Sv21.
f * Stmp2.
f;
1585 Stmp1.
f = Sv31.
f * Stmp3.
f;
1587 Stmp1.
f = Sv22.
f * Stmp2.
f;
1589 Stmp1.
f = Sv32.
f * Stmp3.
f;
1591 Stmp1.
f = Sv23.
f * Stmp2.
f;
1593 Stmp1.
f = Sv33.
f * Stmp3.
f;
1598 Sa22.
f = Sv12.
f * Sa21.
f;
1599 Sa23.
f = Sv13.
f * Sa21.
f;
1600 Sa21.
f = Sv11.
f * Sa21.
f;
1601 Stmp1.
f = Sv21.
f * Stmp2.
f;
1603 Stmp1.
f = Sv31.
f * Stmp3.
f;
1605 Stmp1.
f = Sv22.
f * Stmp2.
f;
1607 Stmp1.
f = Sv32.
f * Stmp3.
f;
1609 Stmp1.
f = Sv23.
f * Stmp2.
f;
1611 Stmp1.
f = Sv33.
f * Stmp3.
f;
1616 Sa32.
f = Sv12.
f * Sa31.
f;
1617 Sa33.
f = Sv13.
f * Sa31.
f;
1618 Sa31.
f = Sv11.
f * Sa31.
f;
1619 Stmp1.
f = Sv21.
f * Stmp2.
f;
1621 Stmp1.
f = Sv31.
f * Stmp3.
f;
1623 Stmp1.
f = Sv22.
f * Stmp2.
f;
1625 Stmp1.
f = Sv32.
f * Stmp3.
f;
1627 Stmp1.
f = Sv23.
f * Stmp2.
f;
1629 Stmp1.
f = Sv33.
f * Stmp3.
f;
1636 Stmp1.
f = Sa11.
f * Sa11.
f;
1637 Stmp4.
f = Sa21.
f * Sa21.
f;
1639 Stmp4.
f = Sa31.
f * Sa31.
f;
1642 Stmp2.
f = Sa12.
f * Sa12.
f;
1643 Stmp4.
f = Sa22.
f * Sa22.
f;
1645 Stmp4.
f = Sa32.
f * Sa32.
f;
1648 Stmp3.
f = Sa13.
f * Sa13.
f;
1649 Stmp4.
f = Sa23.
f * Sa23.
f;
1651 Stmp4.
f = Sa33.
f * Sa33.
f;
1656 Stmp4.
ui = (Stmp1.
f < Stmp2.
f) ? 0xffffffff : 0;
1657 Stmp5.
ui = Sa11.
ui ^ Sa12.
ui;
1658 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1659 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
1660 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
1662 Stmp5.
ui = Sa21.
ui ^ Sa22.
ui;
1663 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1664 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
1665 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
1667 Stmp5.
ui = Sa31.
ui ^ Sa32.
ui;
1668 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1669 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
1670 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
1672 Stmp5.
ui = Sv11.
ui ^ Sv12.
ui;
1673 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1674 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
1675 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
1677 Stmp5.
ui = Sv21.
ui ^ Sv22.
ui;
1678 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1679 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
1680 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
1682 Stmp5.
ui = Sv31.
ui ^ Sv32.
ui;
1683 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1684 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
1685 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
1687 Stmp5.
ui = Stmp1.
ui ^ Stmp2.
ui;
1688 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1689 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
1690 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
1696 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1700 Sa12.
f = Sa12.
f * Stmp4.
f;
1701 Sa22.
f = Sa22.
f * Stmp4.
f;
1702 Sa32.
f = Sa32.
f * Stmp4.
f;
1704 Sv12.
f = Sv12.
f * Stmp4.
f;
1705 Sv22.
f = Sv22.
f * Stmp4.
f;
1706 Sv32.
f = Sv32.
f * Stmp4.
f;
1710 Stmp4.
ui = (Stmp1.
f < Stmp3.
f) ? 0xffffffff : 0;
1711 Stmp5.
ui = Sa11.
ui ^ Sa13.
ui;
1712 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1713 Sa11.
ui = Sa11.
ui ^ Stmp5.
ui;
1714 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
1716 Stmp5.
ui = Sa21.
ui ^ Sa23.
ui;
1717 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1718 Sa21.
ui = Sa21.
ui ^ Stmp5.
ui;
1719 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
1721 Stmp5.
ui = Sa31.
ui ^ Sa33.
ui;
1722 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1723 Sa31.
ui = Sa31.
ui ^ Stmp5.
ui;
1724 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
1726 Stmp5.
ui = Sv11.
ui ^ Sv13.
ui;
1727 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1728 Sv11.
ui = Sv11.
ui ^ Stmp5.
ui;
1729 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
1731 Stmp5.
ui = Sv21.
ui ^ Sv23.
ui;
1732 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1733 Sv21.
ui = Sv21.
ui ^ Stmp5.
ui;
1734 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
1736 Stmp5.
ui = Sv31.
ui ^ Sv33.
ui;
1737 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1738 Sv31.
ui = Sv31.
ui ^ Stmp5.
ui;
1739 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
1741 Stmp5.
ui = Stmp1.
ui ^ Stmp3.
ui;
1742 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1743 Stmp1.
ui = Stmp1.
ui ^ Stmp5.
ui;
1744 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
1750 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1754 Sa11.
f = Sa11.
f * Stmp4.
f;
1755 Sa21.
f = Sa21.
f * Stmp4.
f;
1756 Sa31.
f = Sa31.
f * Stmp4.
f;
1758 Sv11.
f = Sv11.
f * Stmp4.
f;
1759 Sv21.
f = Sv21.
f * Stmp4.
f;
1760 Sv31.
f = Sv31.
f * Stmp4.
f;
1764 Stmp4.
ui = (Stmp2.
f < Stmp3.
f) ? 0xffffffff : 0;
1765 Stmp5.
ui = Sa12.
ui ^ Sa13.
ui;
1766 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1767 Sa12.
ui = Sa12.
ui ^ Stmp5.
ui;
1768 Sa13.
ui = Sa13.
ui ^ Stmp5.
ui;
1770 Stmp5.
ui = Sa22.
ui ^ Sa23.
ui;
1771 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1772 Sa22.
ui = Sa22.
ui ^ Stmp5.
ui;
1773 Sa23.
ui = Sa23.
ui ^ Stmp5.
ui;
1775 Stmp5.
ui = Sa32.
ui ^ Sa33.
ui;
1776 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1777 Sa32.
ui = Sa32.
ui ^ Stmp5.
ui;
1778 Sa33.
ui = Sa33.
ui ^ Stmp5.
ui;
1780 Stmp5.
ui = Sv12.
ui ^ Sv13.
ui;
1781 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1782 Sv12.
ui = Sv12.
ui ^ Stmp5.
ui;
1783 Sv13.
ui = Sv13.
ui ^ Stmp5.
ui;
1785 Stmp5.
ui = Sv22.
ui ^ Sv23.
ui;
1786 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1787 Sv22.
ui = Sv22.
ui ^ Stmp5.
ui;
1788 Sv23.
ui = Sv23.
ui ^ Stmp5.
ui;
1790 Stmp5.
ui = Sv32.
ui ^ Sv33.
ui;
1791 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1792 Sv32.
ui = Sv32.
ui ^ Stmp5.
ui;
1793 Sv33.
ui = Sv33.
ui ^ Stmp5.
ui;
1795 Stmp5.
ui = Stmp2.
ui ^ Stmp3.
ui;
1796 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1797 Stmp2.
ui = Stmp2.
ui ^ Stmp5.
ui;
1798 Stmp3.
ui = Stmp3.
ui ^ Stmp5.
ui;
1804 Stmp5.
ui = Stmp5.
ui & Stmp4.
ui;
1808 Sa13.
f = Sa13.
f * Stmp4.
f;
1809 Sa23.
f = Sa23.
f * Stmp4.
f;
1810 Sa33.
f = Sa33.
f * Stmp4.
f;
1812 Sv13.
f = Sv13.
f * Stmp4.
f;
1813 Sv23.
f = Sv23.
f * Stmp4.
f;
1814 Sv33.
f = Sv33.
f * Stmp4.
f;
1830 Ssh.
f = Sa21.
f * Sa21.
f;
1831 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
1832 Ssh.
ui = Ssh.
ui & Sa21.
ui;
1836 Sch.
f = max(Sch.
f, Sa11.
f);
1837 Sch.
f = max(Sch.
f, gsmall_number);
1838 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
1840 Stmp1.
f = Sch.
f * Sch.
f;
1841 Stmp2.
f = Ssh.
f * Ssh.
f;
1845 Stmp4.
f = Stmp1.
f * 0.5f;
1846 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1847 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1848 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1851 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1855 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1856 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1857 Sch.
ui = Stmp5.
ui & Sch.
ui;
1858 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1859 Sch.
ui = Sch.
ui | Stmp1.
ui;
1860 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1862 Stmp1.
f = Sch.
f * Sch.
f;
1863 Stmp2.
f = Ssh.
f * Ssh.
f;
1867 Stmp4.
f = Stmp1.
f * 0.5f;
1868 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1869 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1870 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1874 Sch.
f = Sch.
f * Stmp1.
f;
1875 Ssh.
f = Ssh.
f * Stmp1.
f;
1877 Sc.
f = Sch.
f * Sch.
f;
1878 Ss.
f = Ssh.
f * Ssh.
f;
1880 Ss.
f = Ssh.
f * Sch.
f;
1887 Stmp1.
f = Ss.
f * Sa11.
f;
1888 Stmp2.
f = Ss.
f * Sa21.
f;
1889 Sa11.
f = Sc.
f * Sa11.
f;
1890 Sa21.
f = Sc.
f * Sa21.
f;
1894 Stmp1.
f = Ss.
f * Sa12.
f;
1895 Stmp2.
f = Ss.
f * Sa22.
f;
1896 Sa12.
f = Sc.
f * Sa12.
f;
1897 Sa22.
f = Sc.
f * Sa22.
f;
1901 Stmp1.
f = Ss.
f * Sa13.
f;
1902 Stmp2.
f = Ss.
f * Sa23.
f;
1903 Sa13.
f = Sc.
f * Sa13.
f;
1904 Sa23.
f = Sc.
f * Sa23.
f;
1912 Stmp1.
f = Ss.
f * Su11.
f;
1913 Stmp2.
f = Ss.
f * Su12.
f;
1914 Su11.
f = Sc.
f * Su11.
f;
1915 Su12.
f = Sc.
f * Su12.
f;
1919 Stmp1.
f = Ss.
f * Su21.
f;
1920 Stmp2.
f = Ss.
f * Su22.
f;
1921 Su21.
f = Sc.
f * Su21.
f;
1922 Su22.
f = Sc.
f * Su22.
f;
1926 Stmp1.
f = Ss.
f * Su31.
f;
1927 Stmp2.
f = Ss.
f * Su32.
f;
1928 Su31.
f = Sc.
f * Su31.
f;
1929 Su32.
f = Sc.
f * Su32.
f;
1935 Ssh.
f = Sa31.
f * Sa31.
f;
1936 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
1937 Ssh.
ui = Ssh.
ui & Sa31.
ui;
1941 Sch.
f = max(Sch.
f, Sa11.
f);
1942 Sch.
f = max(Sch.
f, gsmall_number);
1943 Stmp5.
ui = (Sa11.
f >= Stmp5.
f) ? 0xffffffff : 0;
1945 Stmp1.
f = Sch.
f * Sch.
f;
1946 Stmp2.
f = Ssh.
f * Ssh.
f;
1950 Stmp4.
f = Stmp1.
f * 0.5;
1951 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1952 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1953 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1956 Stmp1.
f = Stmp1.
f * Stmp2.
f;
1960 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
1961 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
1962 Sch.
ui = Stmp5.
ui & Sch.
ui;
1963 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
1964 Sch.
ui = Sch.
ui | Stmp1.
ui;
1965 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
1967 Stmp1.
f = Sch.
f * Sch.
f;
1968 Stmp2.
f = Ssh.
f * Ssh.
f;
1972 Stmp4.
f = Stmp1.
f * 0.5f;
1973 Stmp3.
f = Stmp1.
f * Stmp4.
f;
1974 Stmp3.
f = Stmp1.
f * Stmp3.
f;
1975 Stmp3.
f = Stmp2.
f * Stmp3.
f;
1979 Sch.
f = Sch.
f * Stmp1.
f;
1980 Ssh.
f = Ssh.
f * Stmp1.
f;
1982 Sc.
f = Sch.
f * Sch.
f;
1983 Ss.
f = Ssh.
f * Ssh.
f;
1985 Ss.
f = Ssh.
f * Sch.
f;
1992 Stmp1.
f = Ss.
f * Sa11.
f;
1993 Stmp2.
f = Ss.
f * Sa31.
f;
1994 Sa11.
f = Sc.
f * Sa11.
f;
1995 Sa31.
f = Sc.
f * Sa31.
f;
1999 Stmp1.
f = Ss.
f * Sa12.
f;
2000 Stmp2.
f = Ss.
f * Sa32.
f;
2001 Sa12.
f = Sc.
f * Sa12.
f;
2002 Sa32.
f = Sc.
f * Sa32.
f;
2006 Stmp1.
f = Ss.
f * Sa13.
f;
2007 Stmp2.
f = Ss.
f * Sa33.
f;
2008 Sa13.
f = Sc.
f * Sa13.
f;
2009 Sa33.
f = Sc.
f * Sa33.
f;
2017 Stmp1.
f = Ss.
f * Su11.
f;
2018 Stmp2.
f = Ss.
f * Su13.
f;
2019 Su11.
f = Sc.
f * Su11.
f;
2020 Su13.
f = Sc.
f * Su13.
f;
2024 Stmp1.
f = Ss.
f * Su21.
f;
2025 Stmp2.
f = Ss.
f * Su23.
f;
2026 Su21.
f = Sc.
f * Su21.
f;
2027 Su23.
f = Sc.
f * Su23.
f;
2031 Stmp1.
f = Ss.
f * Su31.
f;
2032 Stmp2.
f = Ss.
f * Su33.
f;
2033 Su31.
f = Sc.
f * Su31.
f;
2034 Su33.
f = Sc.
f * Su33.
f;
2040 Ssh.
f = Sa32.
f * Sa32.
f;
2041 Ssh.
ui = (Ssh.
f >= gsmall_number) ? 0xffffffff : 0;
2042 Ssh.
ui = Ssh.
ui & Sa32.
ui;
2046 Sch.
f = max(Sch.
f, Sa22.
f);
2047 Sch.
f = max(Sch.
f, gsmall_number);
2048 Stmp5.
ui = (Sa22.
f >= Stmp5.
f) ? 0xffffffff : 0;
2050 Stmp1.
f = Sch.
f * Sch.
f;
2051 Stmp2.
f = Ssh.
f * Ssh.
f;
2055 Stmp4.
f = Stmp1.
f * 0.5f;
2056 Stmp3.
f = Stmp1.
f * Stmp4.
f;
2057 Stmp3.
f = Stmp1.
f * Stmp3.
f;
2058 Stmp3.
f = Stmp2.
f * Stmp3.
f;
2061 Stmp1.
f = Stmp1.
f * Stmp2.
f;
2065 Stmp1.
ui = ~Stmp5.
ui & Ssh.
ui;
2066 Stmp2.
ui = ~Stmp5.
ui & Sch.
ui;
2067 Sch.
ui = Stmp5.
ui & Sch.
ui;
2068 Ssh.
ui = Stmp5.
ui & Ssh.
ui;
2069 Sch.
ui = Sch.
ui | Stmp1.
ui;
2070 Ssh.
ui = Ssh.
ui | Stmp2.
ui;
2072 Stmp1.
f = Sch.
f * Sch.
f;
2073 Stmp2.
f = Ssh.
f * Ssh.
f;
2077 Stmp4.
f = Stmp1.
f * 0.5f;
2078 Stmp3.
f = Stmp1.
f * Stmp4.
f;
2079 Stmp3.
f = Stmp1.
f * Stmp3.
f;
2080 Stmp3.
f = Stmp2.
f * Stmp3.
f;
2084 Sch.
f = Sch.
f * Stmp1.
f;
2085 Ssh.
f = Ssh.
f * Stmp1.
f;
2087 Sc.
f = Sch.
f * Sch.
f;
2088 Ss.
f = Ssh.
f * Ssh.
f;
2090 Ss.
f = Ssh.
f * Sch.
f;
2097 Stmp1.
f = Ss.
f * Sa21.
f;
2098 Stmp2.
f = Ss.
f * Sa31.
f;
2099 Sa21.
f = Sc.
f * Sa21.
f;
2100 Sa31.
f = Sc.
f * Sa31.
f;
2104 Stmp1.
f = Ss.
f * Sa22.
f;
2105 Stmp2.
f = Ss.
f * Sa32.
f;
2106 Sa22.
f = Sc.
f * Sa22.
f;
2107 Sa32.
f = Sc.
f * Sa32.
f;
2111 Stmp1.
f = Ss.
f * Sa23.
f;
2112 Stmp2.
f = Ss.
f * Sa33.
f;
2113 Sa23.
f = Sc.
f * Sa23.
f;
2114 Sa33.
f = Sc.
f * Sa33.
f;
2122 Stmp1.
f = Ss.
f * Su12.
f;
2123 Stmp2.
f = Ss.
f * Su13.
f;
2124 Su12.
f = Sc.
f * Su12.
f;
2125 Su13.
f = Sc.
f * Su13.
f;
2129 Stmp1.
f = Ss.
f * Su22.
f;
2130 Stmp2.
f = Ss.
f * Su23.
f;
2131 Su22.
f = Sc.
f * Su22.
f;
2132 Su23.
f = Sc.
f * Su23.
f;
2136 Stmp1.
f = Ss.
f * Su32.
f;
2137 Stmp2.
f = Ss.
f * Su33.
f;
2138 Su32.
f = Sc.
f * Su32.
f;
2139 Su33.
f = Sc.
f * Su33.
f;
2170 template <
typename scalar_t>
2172 const scalar_t *A_3x3,
2173 const scalar_t *B_3x1,
2184 const scalar_t epsilon = 1e-10;
2185 S[0] = abs(S[0]) < epsilon ? 0 : 1.0 / S[0];
2186 S[1] = abs(S[1]) < epsilon ? 0 : 1.0 / S[1];
2187 S[2] = abs(S[2]) < epsilon ? 0 : 1.0 / S[2];
2194 S_UT[0] = U[0] * S[0];
2195 S_UT[1] = U[3] * S[0];
2196 S_UT[2] = U[6] * S[0];
2197 S_UT[3] = U[1] * S[1];
2198 S_UT[4] = U[4] * S[1];
2199 S_UT[5] = U[7] * S[1];
2200 S_UT[6] = U[2] * S[2];
2201 S_UT[7] = U[5] * S[2];
2202 S_UT[8] = U[8] * S[2];
2207 scalar_t Ainv[9] = {0};
#define OPEN3D_DEVICE
Definition: CUDAUtils.h:45
#define OPEN3D_FORCE_INLINE
Definition: CUDAUtils.h:43
#define __fsub_rn(x, y)
Definition: SVD3x3.h:63
#define __fadd_rn(x, y)
Definition: SVD3x3.h:62
#define __drsqrt_rn(x)
Definition: SVD3x3.h:68
#define gone
Definition: SVD3x3.h:39
#define __frsqrt_rn(x)
Definition: SVD3x3.h:64
#define gcosine_pi_over_eight
Definition: SVD3x3.h:42
#define __dadd_rn(x, y)
Definition: SVD3x3.h:66
#define gsine_pi_over_eight
Definition: SVD3x3.h:40
#define gfour_gamma_squared
Definition: SVD3x3.h:44
#define __dsub_rn(x, y)
Definition: SVD3x3.h:67
#define gtiny_number
Definition: SVD3x3.h:43
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void svd3x3< double >(const double *A_3x3, double *U_3x3, double *S_3x1, double *V_3x3)
Definition: SVD3x3.h:93
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void matmul3x3_3x3(const scalar_t *A_3x3, const scalar_t *B_3x3, scalar_t *C_3x3)
Definition: Matrix.h:48
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void svd3x3< float >(const float *A_3x3, float *U_3x3, float *S_3x1, float *V_3x3)
Definition: SVD3x3.h:1132
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void svd3x3(const scalar_t *A_3x3, scalar_t *U_3x3, scalar_t *S_3x1, scalar_t *V_3x3)
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void solve_svd3x3(const scalar_t *A_3x3, const scalar_t *B_3x1, scalar_t *X_3x1)
Definition: SVD3x3.h:2171
OPEN3D_HOST_DEVICE OPEN3D_FORCE_INLINE void matmul3x3_3x1(const scalar_t *A_3x3, const scalar_t *B_3x1, scalar_t *C_3x1)
Definition: Matrix.h:39
Definition: PinholeCameraIntrinsic.cpp:16
unsigned int ui
Definition: SVD3x3.h:83
scalar_t f
Definition: SVD3x3.h:82