49 #include <m4ri/m4ri.h>
50 #include <m4rie/gf2e.h>
51 #include <m4rie/m4ri_functions.h>
114 mzd_concat(C->
x, A->
x, B->
x);
138 mzd_stack(C->
x, A->
x, B->
x);
161 mzd_submatrix(S->
x, M->
x, lowr, lowc*M->
w, highr, highc*M->
w);
191 B->
nrows = highr - lowr;
192 B->
ncols = highc - lowc;
193 B->
x = mzd_init_window(A->
x, lowr, B->
w*lowc, highr, B->
w*highc);
206 mzd_free_window(A->
x);
247 #define mzed_sub mzed_add
259 #define _mzed_sub _mzed_add
425 return __mzd_read_bits(A->
x, row, A->
w*col, A->
w);
440 __mzd_xor_bits(A->
x, row, A->
w*col, A->
w, elem);
455 __mzd_clear_bits(A->
x, row, A->
w*col, A->
w);
456 __mzd_xor_bits(A->
x, row, A->
w*col, A->
w, elem);
473 return mzd_cmp(A->
x,B->
x);
485 return mzd_is_zero(A->
x);
517 assert(start_col < A->ncols);
519 const rci_t start = A->
w*start_col;
520 const wi_t startblock = start/m4ri_radix;
521 const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
522 const word bitmask_end = A->
x->high_bitmask;
524 word *_a = A->
x->rows[ar];
525 const word *_b = B->
x->rows[br];
528 if (A->
x->width - startblock > 1) {
529 _a[startblock] ^= _b[startblock] & bitmask_begin;
530 for(j=startblock+1; j<A->
x->width-1; j++)
532 _a[j] ^= _b[j] & bitmask_end;
534 _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end);
550 assert(start_col < A->ncols);
553 const rci_t start = A->
w*start_col;
554 const wi_t startblock = start/m4ri_radix;
555 word *_a = A->
x->rows[r];
556 const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
557 const word bitmask_end = A->
x->high_bitmask;
558 register word __a = _a[startblock]>>(start%m4ri_radix);
559 register word __t = 0;
563 switch( (start/2) % 32 ) {
564 case 0: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2;
565 case 1: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2;
566 case 2: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2;
567 case 3: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2;
568 case 4: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2;
569 case 5: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2;
570 case 6: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2;
571 case 7: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2;
572 case 8: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2;
573 case 9: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2;
574 case 10: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2;
575 case 11: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2;
576 case 12: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2;
577 case 13: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2;
578 case 14: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2;
579 case 15: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2;
580 case 16: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2;
581 case 17: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2;
582 case 18: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2;
583 case 19: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2;
584 case 20: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2;
585 case 21: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2;
586 case 22: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2;
587 case 23: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2;
588 case 24: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2;
589 case 25: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2;
590 case 26: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2;
591 case 27: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2;
592 case 28: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2;
593 case 29: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2;
594 case 30: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2;
595 case 31: __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<62;
break;
596 default: m4ri_die(
"impossible");
598 if(A->
x->width-startblock == 1) {
599 _a[startblock] &= ~(bitmask_begin & bitmask_end);
600 _a[startblock] ^= __t & bitmask_begin & bitmask_end;
603 _a[startblock] &= ~bitmask_begin;
604 _a[startblock] ^= __t & bitmask_begin;
607 for(j=startblock+1; j<A->
x->width -1; j++) {
608 __a = _a[j], __t = 0;
609 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2;
610 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2;
611 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2;
612 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2;
613 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2;
614 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2;
615 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2;
616 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2;
617 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2;
618 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2;
619 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2;
620 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2;
621 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2;
622 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2;
623 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2;
624 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2;
625 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2;
626 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2;
627 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2;
628 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2;
629 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2;
630 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2;
631 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2;
632 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2;
633 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2;
634 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2;
635 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2;
636 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2;
637 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2;
638 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2;
639 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2;
640 __t ^= ff->
mul(ff, x, __a & 0x0000000000000003ULL)<<62;
644 __t = _a[j] & ~bitmask_end;
645 switch(A->
x->ncols % m4ri_radix) {
646 case 0: __t ^= ff->
mul(ff, x, (_a[j] & 0xC000000000000000ULL)>>62)<<62;
647 case 62: __t ^= ff->
mul(ff, x, (_a[j] & 0x3000000000000000ULL)>>60)<<60;
648 case 60: __t ^= ff->
mul(ff, x, (_a[j] & 0x0C00000000000000ULL)>>58)<<58;
649 case 58: __t ^= ff->
mul(ff, x, (_a[j] & 0x0300000000000000ULL)>>56)<<56;
650 case 56: __t ^= ff->
mul(ff, x, (_a[j] & 0x00C0000000000000ULL)>>54)<<54;
651 case 54: __t ^= ff->
mul(ff, x, (_a[j] & 0x0030000000000000ULL)>>52)<<52;
652 case 52: __t ^= ff->
mul(ff, x, (_a[j] & 0x000C000000000000ULL)>>50)<<50;
653 case 50: __t ^= ff->
mul(ff, x, (_a[j] & 0x0003000000000000ULL)>>48)<<48;
654 case 48: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000C00000000000ULL)>>46)<<46;
655 case 46: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000300000000000ULL)>>44)<<44;
656 case 44: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000C0000000000ULL)>>42)<<42;
657 case 42: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000030000000000ULL)>>40)<<40;
658 case 40: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000C000000000ULL)>>38)<<38;
659 case 38: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000003000000000ULL)>>36)<<36;
660 case 36: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000C00000000ULL)>>34)<<34;
661 case 34: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000300000000ULL)>>32)<<32;
662 case 32: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000000C0000000ULL)>>30)<<30;
663 case 30: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000030000000ULL)>>28)<<28;
664 case 28: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000000C000000ULL)>>26)<<26;
665 case 26: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000003000000ULL)>>24)<<24;
666 case 24: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000C00000ULL)>>22)<<22;
667 case 22: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000300000ULL)>>20)<<20;
668 case 20: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000000000C0000ULL)>>18)<<18;
669 case 18: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000030000ULL)>>16)<<16;
670 case 16: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000000000C000ULL)>>14)<<14;
671 case 14: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000003000ULL)>>12)<<12;
672 case 12: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000000C00ULL)>>10)<<10;
673 case 10: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000000300ULL)>> 8)<< 8;
674 case 8: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000000000000C0ULL)>> 6)<< 6;
675 case 6: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000000030ULL)>> 4)<< 4;
676 case 4: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000000000000CULL)>> 2)<< 2;
677 case 2: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000000003ULL)>> 0)<< 0;
681 }
else if(A->
w == 4) {
682 switch( (start/4)%16 ) {
683 case 0: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4;
684 case 1: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4;
685 case 2: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4;
686 case 3: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4;
687 case 4: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4;
688 case 5: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4;
689 case 6: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4;
690 case 7: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4;
691 case 8: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4;
692 case 9: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4;
693 case 10: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4;
694 case 11: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4;
695 case 12: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4;
696 case 13: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4;
697 case 14: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4;
698 case 15: __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<60;
break;
699 default: m4ri_die(
"impossible");
701 if(A->
x->width-startblock == 1) {
702 _a[startblock] &= ~(bitmask_begin & bitmask_end);
703 _a[startblock] ^= __t & bitmask_begin & bitmask_end;
706 _a[startblock] &= ~bitmask_begin;
707 _a[startblock] ^= __t & bitmask_begin;
710 for(j=startblock+1; j<A->
x->width -1; j++) {
711 __a = _a[j], __t = 0;
712 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4;
713 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4;
714 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4;
715 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4;
716 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4;
717 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4;
718 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4;
719 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4;
720 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4;
721 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4;
722 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4;
723 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4;
724 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4;
725 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4;
726 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4;
727 __t ^= ff->
mul(ff, x, __a & 0x000000000000000FULL)<<60;
731 __t = _a[j] & ~bitmask_end;
732 switch(A->
x->ncols % m4ri_radix) {
733 case 0: __t ^= ff->
mul(ff, x, (_a[j] & 0xF000000000000000ULL)>>60)<<60;
734 case 60: __t ^= ff->
mul(ff, x, (_a[j] & 0x0F00000000000000ULL)>>56)<<56;
735 case 56: __t ^= ff->
mul(ff, x, (_a[j] & 0x00F0000000000000ULL)>>52)<<52;
736 case 52: __t ^= ff->
mul(ff, x, (_a[j] & 0x000F000000000000ULL)>>48)<<48;
737 case 48: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000F00000000000ULL)>>44)<<44;
738 case 44: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000F0000000000ULL)>>40)<<40;
739 case 40: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000F000000000ULL)>>36)<<36;
740 case 36: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000F00000000ULL)>>32)<<32;
741 case 32: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000000F0000000ULL)>>28)<<28;
742 case 28: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000000F000000ULL)>>24)<<24;
743 case 24: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000F00000ULL)>>20)<<20;
744 case 20: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000000000F0000ULL)>>16)<<16;
745 case 16: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000000000F000ULL)>>12)<<12;
746 case 12: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000000F00ULL)>> 8)<< 8;
747 case 8: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000000000000F0ULL)>> 4)<< 4;
748 case 4: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000000000000FULL)>> 0)<< 0;
752 }
else if (A->
w == 8) {
754 register word __a0 = _a[startblock]>>(start%m4ri_radix);
756 register word __t0 = 0;
759 switch( (start/8) %8 ) {
760 case 0: __t0 ^= ff->
mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 0; __a0 >>= 8;
761 case 1: __t0 ^= ff->
mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 8; __a0 >>= 8;
762 case 2: __t0 ^= ff->
mul(ff, x, (__a0 & 0x00000000000000FFULL))<<16; __a0 >>= 8;
763 case 3: __t0 ^= ff->
mul(ff, x, (__a0 & 0x00000000000000FFULL))<<24; __a0 >>= 8;
764 case 4: __t0 ^= ff->
mul(ff, x, (__a0 & 0x00000000000000FFULL))<<32; __a0 >>= 8;
765 case 5: __t0 ^= ff->
mul(ff, x, (__a0 & 0x00000000000000FFULL))<<40; __a0 >>= 8;
766 case 6: __t0 ^= ff->
mul(ff, x, (__a0 & 0x00000000000000FFULL))<<48; __a0 >>= 8;
767 case 7: __t0 ^= ff->
mul(ff, x, (__a0 & 0x00000000000000FFULL))<<56;
break;
768 default: m4ri_die(
"impossible");
770 if(A->
x->width-startblock == 1) {
771 _a[startblock] &= ~(bitmask_begin & bitmask_end);
772 _a[startblock] ^= __t0 & bitmask_begin & bitmask_end;
775 _a[startblock] &= ~bitmask_begin;
776 _a[startblock] ^= __t0 & bitmask_begin;
779 for(j=startblock+1; j+2 < A->
x->width; j+=2) {
780 __a0 = _a[j], __t0 = 0;
781 __a1 = _a[j+1], __t1 = 0;
782 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
783 __t1 ^= ff->
mul(ff, x, __a1 & 0x00000000000000FFULL)<< 0; __a1 >>= 8;
784 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
785 __t1 ^= ff->
mul(ff, x, __a1 & 0x00000000000000FFULL)<< 8; __a1 >>= 8;
786 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
787 __t1 ^= ff->
mul(ff, x, __a1 & 0x00000000000000FFULL)<<16; __a1 >>= 8;
788 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
789 __t1 ^= ff->
mul(ff, x, __a1 & 0x00000000000000FFULL)<<24; __a1 >>= 8;
790 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
791 __t1 ^= ff->
mul(ff, x, __a1 & 0x00000000000000FFULL)<<32; __a1 >>= 8;
792 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
793 __t1 ^= ff->
mul(ff, x, __a1 & 0x00000000000000FFULL)<<40; __a1 >>= 8;
794 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
795 __t1 ^= ff->
mul(ff, x, __a1 & 0x00000000000000FFULL)<<48; __a1 >>= 8;
796 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<56; __a0 >>= 8;
797 __t1 ^= ff->
mul(ff, x, __a1 & 0x00000000000000FFULL)<<56;
802 for(; j < A->
x->width-1; j++) {
803 __a0 = _a[j], __t0 = 0;
804 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
805 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
806 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
807 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
808 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
809 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
810 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
811 __t0 ^= ff->
mul(ff, x, __a0 & 0x00000000000000FFULL)<<56;
815 __t = _a[j] & ~bitmask_end;
816 switch(A->
x->ncols % m4ri_radix ) {
817 case 0: __t ^= ff->
mul(ff, x, (_a[j] & 0xFF00000000000000ULL)>>56)<<56;
818 case 56: __t ^= ff->
mul(ff, x, (_a[j] & 0x00FF000000000000ULL)>>48)<<48;
819 case 48: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000FF0000000000ULL)>>40)<<40;
820 case 40: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000FF00000000ULL)>>32)<<32;
821 case 32: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000000FF000000ULL)>>24)<<24;
822 case 24: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000000000FF0000ULL)>>16)<<16;
823 case 16: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000000000FF00ULL)>> 8)<< 8;
824 case 8: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000000000000FFULL)>> 0)<< 0;
828 }
else if (A->
w == 16) {
829 switch( (start/16) %4 ) {
830 case 0: __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
831 case 1: __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
832 case 2: __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
833 case 3: __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
break;
834 default: m4ri_die(
"impossible");
836 if(A->
x->width-startblock == 1) {
837 _a[startblock] &= ~(bitmask_begin & bitmask_end);
838 _a[startblock] ^= __t & bitmask_begin & bitmask_end;
841 _a[startblock] &= ~bitmask_begin;
842 _a[startblock] ^= __t & bitmask_begin;
845 for(j=startblock+1; j+4<A->
x->width; j+=4) {
846 __a = _a[j], __t = 0;
847 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
848 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
849 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
850 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
853 __a = _a[j+1], __t = 0;
854 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
855 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
856 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
857 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
860 __a = _a[j+2], __t = 0;
861 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
862 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
863 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
864 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
867 __a = _a[j+3], __t = 0;
868 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
869 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
870 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
871 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
874 for( ; j<A->
x->width-1; j++) {
875 __a = _a[j], __t = 0;
876 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
877 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
878 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
879 __t ^= ff->
mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
883 __t = _a[j] & ~bitmask_end;
884 switch(A->
x->ncols % m4ri_radix) {
885 case 0: __t ^= ff->
mul(ff, x, (_a[j] & 0xFFFF000000000000ULL)>>48)<<48;
886 case 48: __t ^= ff->
mul(ff, x, (_a[j] & 0x0000FFFF00000000ULL)>>32)<<32;
887 case 32: __t ^= ff->
mul(ff, x, (_a[j] & 0x00000000FFFF0000ULL)>>16)<<16;
888 case 16: __t ^= ff->
mul(ff, x, (_a[j] & 0x000000000000FFFFULL)>> 0)<< 0;
893 for(rci_t j=start_col; j<A->
ncols; j++) {
910 mzd_row_swap(M->
x, rowa, rowb);
927 mzd_copy_row(B->
x, i, A->
x, j);
941 for(rci_t i=0; i<M->
w; i++)
942 mzd_col_swap(M->
x,M->
w*cola+i, M->
w*colb+i);
959 mzd_col_swap_in_rows(A->
x, A->
w*cola+e, A->
w*colb+e, start_row, stop_row);
977 mzd_row_add(M->
x, sourcerow, destrow);
991 return mzd_first_zero_row(A->
x);
1020 #endif //M4RIE_MATRIX_H
mzed_t * _mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:110
static void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow)
Add the rows sourcerow and destrow and stores the total in the row destrow.
Definition: mzed.h:976
static void mzed_free_window(mzed_t *A)
Free a matrix window created with mzed_init_window().
Definition: mzed.h:205
static void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word x)
Rescale the row r in A by X starting c.
Definition: mzed.h:549
mzed_t * mzed_init(const gf2e *ff, const rci_t m, const rci_t n)
Create a new matrix of dimension m x n over ff.
Definition: mzed.c:28
const gf2e * finite_field
Definition: mzed.h:61
static void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem)
Write the element elem to the position (row,col) in the matrix A.
Definition: mzed.h:454
word(* mul)(const gf2e *ff, const word a, const word b)
Definition: gf2e.h:59
Dense matrices over represented as packed matrices.
Definition: mzed.h:59
static size_t gf2e_degree_to_w(const gf2e *ff)
Definition: gf2e.h:120
void mzed_print(const mzed_t *M)
Print a matrix to stdout.
Definition: mzed.c:254
mzed_t * _mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B)
using naive cubic multiplication.
Definition: mzed.c:128
deg_t degree
Definition: gf2e.h:51
static void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem)
At the element elem to the element at position (row,col) in the matrix A.
Definition: mzed.h:439
mzd_t * x
Definition: mzed.h:60
rci_t nrows
Definition: mzed.h:62
rci_t mzed_echelonize_naive(mzed_t *A, int full)
Gaussian elimination.
Definition: mzed.c:208
static int mzed_cmp(mzed_t *A, mzed_t *B)
Return -1,0,1 if if A < B, A == B or A > B respectively.
Definition: mzed.h:472
mzed_t * mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B)
.
Definition: mzed.c:141
void mzed_randomize(mzed_t *A)
Fill matrix A with random elements.
Definition: mzed.c:44
void mzed_free(mzed_t *A)
Free a matrix created with mzed_init().
Definition: mzed.c:39
mzed_t * _mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear)
Definition: mzed.c:74
void mzed_set_ui(mzed_t *A, word value)
Return diagonal matrix with value on the diagonal.
Definition: mzed.c:245
rci_t ncols
Definition: mzed.h:63
static mzed_t * mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc)
Create a window/view into the matrix A.
Definition: mzed.h:187
mzed_t * _mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:68
static mzed_t * mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B)
Stack A on top of B and write the result to C.
Definition: mzed.h:135
static mzed_t * mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B)
Concatenate B to A and write the result to C.
Definition: mzed.h:111
static void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb)
Swap the two rows rowa and rowb.
Definition: mzed.h:909
static void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row)
Swap the two columns cola and colb but only between start_row and stop_row.
Definition: mzed.h:957
mzed_t * mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:90
mzed_t * mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:96
static rci_t mzed_first_zero_row(mzed_t *A)
Return the first row with all zero entries.
Definition: mzed.h:990
mzed_t * mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:53
static int mzed_is_zero(const mzed_t *A)
Zero test for matrix.
Definition: mzed.h:484
void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col)
Definition: mzed.c:272
mzed_t * mzed_copy(mzed_t *B, const mzed_t *A)
Copy matrix A to B.
Definition: mzed.c:196
mzed_t * _mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:102
static void mzed_copy_row(mzed_t *B, rci_t i, const mzed_t *A, rci_t j)
copy row j from A to row i from B.
Definition: mzed.h:926
mzed_t * mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B)
using naive cubic multiplication.
Definition: mzed.c:118
mzed_t * mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B)
using naive cubic multiplication.
Definition: mzed.c:123
static mzed_t * mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc)
Copy a submatrix.
Definition: mzed.h:157
static void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col)
Definition: mzed.h:515
wi_t w
Definition: mzed.h:64
static void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb)
Swap the two columns cola and colb.
Definition: mzed.h:940
static word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col)
Get the element at position (row,col) from the matrix A.
Definition: mzed.h:424