m4rie/mzed_8h_source.html

 #ifndef M4RIE_MZED_H

 #define M4RIE_MZED_H


 /******************************************************************************

 *

 *            M4RIE: Linear Algebra over GF(2^e)

 *

 *    Copyright (C) 2010,2011 Martin Albrecht <martinralbrecht@googlemail.com>

 *

 *  Distributed under the terms of the GNU General Public License (GEL)

 *  version 2 or higher.

 *

 *    This code is distributed in the hope that it will be useful,

 *    but WITHOUT ANY WARRANTY; without even the implied warranty of

 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 *    General Public License for more details.

 *

 *  The full text of the GPL is available at:

 *

 *                  http://www.gnu.org/licenses/

 ******************************************************************************/


 #include <m4ri/m4ri.h>

 #include <m4rie/gf2e.h>

 #include <m4rie/m4ri_functions.h>


 typedef struct {

   mzd_t *x;

   const gf2e *finite_field;

   rci_t nrows;

   rci_t ncols;

   wi_t w;

 } mzed_t;


 mzed_t *mzed_init(const gf2e *ff, const rci_t m, const rci_t n);


 void mzed_free(mzed_t *A);


 static inline mzed_t *mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B) {

   if(C==NULL)

     C = mzed_init(A->finite_field, A->nrows, A->ncols + B->ncols);

   mzd_concat(C->x, A->x, B->x);

   return C;

 }


 static inline mzed_t *mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B) {

   if(C==NULL)

     C = mzed_init(A->finite_field, A->nrows + B->nrows, A->ncols);

   mzd_stack(C->x, A->x, B->x);

   return C;

 }


 static inline mzed_t *mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {

   if(S==NULL)

     S = mzed_init(M->finite_field, highr - lowr, highc - lowc);


   mzd_submatrix(S->x, M->x, lowr, lowc*M->w, highr, highc*M->w);

   return S;

 }


 static inline mzed_t *mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {

   mzed_t *B = (mzed_t *)m4ri_mm_malloc(sizeof(mzed_t));

   B->finite_field = A->finite_field;

   B->w = gf2e_degree_to_w(A->finite_field);

   B->nrows = highr - lowr;

   B->ncols = highc - lowc;

   B->x = mzd_init_window(A->x, lowr, B->w*lowc, highr, B->w*highc);

   return B;

 }


 static inline void mzed_free_window(mzed_t *A) {

   mzd_free_window(A->x);

   m4ri_mm_free(A);

 }


 mzed_t *mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);


 mzed_t *_mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);


 #define mzed_sub mzed_add


 #define _mzed_sub _mzed_add


 mzed_t *mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);


 mzed_t *mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);


 mzed_t *_mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);


 mzed_t *_mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);


 mzed_t *mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);


 mzed_t *mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);


 mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);


 mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B);


 mzed_t *_mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear);


 void mzed_randomize(mzed_t *A);


 mzed_t *mzed_copy(mzed_t *B, const mzed_t *A);


 void mzed_set_ui(mzed_t *A, word value);


 static inline word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col) {

   return __mzd_read_bits(A->x, row, A->w*col, A->w);

 }


 static inline void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {

   __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);

 }


 static inline void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {

   __mzd_clear_bits(A->x, row, A->w*col, A->w);

   __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);

 }


 static inline int mzed_cmp(mzed_t *A, mzed_t *B) {

   return mzd_cmp(A->x,B->x);

 }


 static inline int mzed_is_zero(const mzed_t *A) {

   return mzd_is_zero(A->x);

 }


 void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col);


 static inline void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col) {

   assert(A->ncols == B->ncols && A->finite_field == B->finite_field);

   assert(start_col < A->ncols);


   const rci_t start = A->w*start_col;

   const wi_t startblock = start/m4ri_radix;

   const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));

   const word bitmask_end = A->x->high_bitmask;


   word *_a = A->x->rows[ar];

   const word *_b = B->x->rows[br];

   wi_t j;


   if (A->x->width - startblock > 1) {

     _a[startblock] ^= _b[startblock] & bitmask_begin;

     for(j=startblock+1; j<A->x->width-1; j++)

       _a[j] ^= _b[j];

     _a[j] ^= _b[j] & bitmask_end;

   } else {

     _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end);

   }

 }


 static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word x) {

   assert(start_col < A->ncols);


   const gf2e *ff = A->finite_field;

   const rci_t start = A->w*start_col;

   const wi_t startblock = start/m4ri_radix;

   word *_a = A->x->rows[r];

   const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));

   const word bitmask_end   = A->x->high_bitmask;

   register word __a = _a[startblock]>>(start%m4ri_radix);

   register word __t = 0;

   int j;


   if(A->w == 2) {

     switch( (start/2) % 32 ) {

     case  0:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0;  __a >>= 2;

     case  1:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2;  __a >>= 2;

     case  2:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4;  __a >>= 2;

     case  3:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6;  __a >>= 2;

     case  4:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8;  __a >>= 2;

     case  5:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10;  __a >>= 2;

     case  6:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12;  __a >>= 2;

     case  7:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14;  __a >>= 2;

     case  8:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16;  __a >>= 2;

     case  9:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18;  __a >>= 2;

     case 10:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20;  __a >>= 2;

     case 11:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22;  __a >>= 2;

     case 12:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24;  __a >>= 2;

     case 13:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26;  __a >>= 2;

     case 14:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28;  __a >>= 2;

     case 15:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30;  __a >>= 2;

     case 16:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32;  __a >>= 2;

     case 17:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34;  __a >>= 2;

     case 18:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36;  __a >>= 2;

     case 19:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38;  __a >>= 2;

     case 20:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40;  __a >>= 2;

     case 21:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42;  __a >>= 2;

     case 22:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44;  __a >>= 2;

     case 23:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46;  __a >>= 2;

     case 24:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48;  __a >>= 2;

     case 25:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50;  __a >>= 2;

     case 26:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52;  __a >>= 2;

     case 27:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54;  __a >>= 2;

     case 28:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56;  __a >>= 2;

     case 29:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58;  __a >>= 2;

     case 30:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60;  __a >>= 2;

     case 31:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62;  break;

     default: m4ri_die("impossible");

     }

     if(A->x->width-startblock == 1) {

       _a[startblock] &= ~(bitmask_begin & bitmask_end);

       _a[startblock] ^= __t & bitmask_begin & bitmask_end;

       return;

     } else {

       _a[startblock] &= ~bitmask_begin;

       _a[startblock] ^= __t & bitmask_begin;

     }


     for(j=startblock+1; j<A->x->width -1; j++) {

       __a = _a[j], __t = 0;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60;  __a >>= 2;

       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62;

       _a[j] = __t;

     }


     __t = _a[j] & ~bitmask_end;

     switch(A->x->ncols % m4ri_radix) {

     case  0: __t ^= ff->mul(ff, x, (_a[j] & 0xC000000000000000ULL)>>62)<<62;

     case 62: __t ^= ff->mul(ff, x, (_a[j] & 0x3000000000000000ULL)>>60)<<60;

     case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0C00000000000000ULL)>>58)<<58;

     case 58: __t ^= ff->mul(ff, x, (_a[j] & 0x0300000000000000ULL)>>56)<<56;

     case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00C0000000000000ULL)>>54)<<54;

     case 54: __t ^= ff->mul(ff, x, (_a[j] & 0x0030000000000000ULL)>>52)<<52;

     case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000C000000000000ULL)>>50)<<50;

     case 50: __t ^= ff->mul(ff, x, (_a[j] & 0x0003000000000000ULL)>>48)<<48;

     case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000C00000000000ULL)>>46)<<46;

     case 46: __t ^= ff->mul(ff, x, (_a[j] & 0x0000300000000000ULL)>>44)<<44;

     case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000C0000000000ULL)>>42)<<42;

     case 42: __t ^= ff->mul(ff, x, (_a[j] & 0x0000030000000000ULL)>>40)<<40;

     case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000C000000000ULL)>>38)<<38;

     case 38: __t ^= ff->mul(ff, x, (_a[j] & 0x0000003000000000ULL)>>36)<<36;

     case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000C00000000ULL)>>34)<<34;

     case 34: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000300000000ULL)>>32)<<32;

     case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000C0000000ULL)>>30)<<30;

     case 30: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000030000000ULL)>>28)<<28;

     case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000C000000ULL)>>26)<<26;

     case 26: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000003000000ULL)>>24)<<24;

     case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000C00000ULL)>>22)<<22;

     case 22: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000300000ULL)>>20)<<20;

     case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000C0000ULL)>>18)<<18;

     case 18: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000030000ULL)>>16)<<16;

     case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000C000ULL)>>14)<<14;

     case 14: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000003000ULL)>>12)<<12;

     case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000C00ULL)>>10)<<10;

     case 10: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000300ULL)>> 8)<< 8;

     case  8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000C0ULL)>> 6)<< 6;

     case  6: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000030ULL)>> 4)<< 4;

     case  4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000CULL)>> 2)<< 2;

     case  2: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000003ULL)>> 0)<< 0;

     };

     _a[j] = __t;


   } else if(A->w == 4) {

     switch( (start/4)%16 ) {

     case  0: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0;  __a >>= 4;

     case  1: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4;  __a >>= 4;

     case  2: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8;  __a >>= 4;

     case  3: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12;  __a >>= 4;

     case  4: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16;  __a >>= 4;

     case  5: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20;  __a >>= 4;

     case  6: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24;  __a >>= 4;

     case  7: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28;  __a >>= 4;

     case  8: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32;  __a >>= 4;

     case  9: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36;  __a >>= 4;

     case 10: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40;  __a >>= 4;

     case 11: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44;  __a >>= 4;

     case 12: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48;  __a >>= 4;

     case 13: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52;  __a >>= 4;

     case 14: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56;  __a >>= 4;

     case 15: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60;  break;

     default: m4ri_die("impossible");

     }

     if(A->x->width-startblock == 1) {

       _a[startblock] &= ~(bitmask_begin & bitmask_end);

       _a[startblock] ^= __t & bitmask_begin & bitmask_end;

       return;

     } else {

       _a[startblock] &= ~bitmask_begin;

       _a[startblock] ^= __t & bitmask_begin;

     }


     for(j=startblock+1; j<A->x->width -1; j++) {

       __a = _a[j], __t = 0;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56;  __a >>= 4;

       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60;

       _a[j] = __t;

     }


     __t = _a[j] & ~bitmask_end;

     switch(A->x->ncols % m4ri_radix) {

     case  0: __t ^= ff->mul(ff, x, (_a[j] & 0xF000000000000000ULL)>>60)<<60;

     case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0F00000000000000ULL)>>56)<<56;

     case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00F0000000000000ULL)>>52)<<52;

     case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000F000000000000ULL)>>48)<<48;

     case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000F00000000000ULL)>>44)<<44;

     case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000F0000000000ULL)>>40)<<40;

     case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000F000000000ULL)>>36)<<36;

     case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000F00000000ULL)>>32)<<32;

     case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000F0000000ULL)>>28)<<28;

     case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000F000000ULL)>>24)<<24;

     case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000F00000ULL)>>20)<<20;

     case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000F0000ULL)>>16)<<16;

     case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000F000ULL)>>12)<<12;

     case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000F00ULL)>> 8)<< 8;

     case  8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000F0ULL)>> 4)<< 4;

     case  4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000FULL)>> 0)<< 0;

     };

     _a[j] = __t;


   } else if (A->w == 8) {


     register word __a0 = _a[startblock]>>(start%m4ri_radix);

     register word __a1;

     register word __t0 = 0;

     register word __t1;


     switch( (start/8) %8 ) {

     case 0: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 0; __a0 >>= 8;

     case 1: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 8; __a0 >>= 8;

     case 2: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<16; __a0 >>= 8;

     case 3: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<24; __a0 >>= 8;

     case 4: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<32; __a0 >>= 8;

     case 5: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<40; __a0 >>= 8;

     case 6: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<48; __a0 >>= 8;

     case 7: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<56; break;

     default: m4ri_die("impossible");

     }

     if(A->x->width-startblock == 1) {

       _a[startblock] &= ~(bitmask_begin & bitmask_end);

       _a[startblock] ^= __t0 & bitmask_begin & bitmask_end;

       return;

     } else {

       _a[startblock] &= ~bitmask_begin;

       _a[startblock] ^= __t0 & bitmask_begin;

     }


     for(j=startblock+1; j+2 < A->x->width; j+=2) {

       __a0 = _a[j], __t0 = 0;

       __a1 = _a[j+1], __t1 = 0;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;

       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 0; __a1 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;

       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 8; __a1 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;

       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<16; __a1 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;

       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<24; __a1 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;

       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<32; __a1 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;

       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<40; __a1 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;

       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<48; __a1 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56; __a0 >>= 8;

       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<56;

       _a[j+0] = __t0;

       _a[j+1] = __t1;

     }


     for(; j < A->x->width-1; j++) {

       __a0 = _a[j], __t0 = 0;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;

       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56;

       _a[j] = __t0;

     }


     __t = _a[j] & ~bitmask_end;

     switch(A->x->ncols % m4ri_radix ) {

     case  0: __t ^= ff->mul(ff, x, (_a[j] & 0xFF00000000000000ULL)>>56)<<56;

     case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00FF000000000000ULL)>>48)<<48;

     case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FF0000000000ULL)>>40)<<40;

     case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000FF00000000ULL)>>32)<<32;

     case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FF000000ULL)>>24)<<24;

     case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000FF0000ULL)>>16)<<16;

     case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FF00ULL)>> 8)<< 8;

     case  8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000FFULL)>> 0)<< 0;

     };

     _a[j] = __t;


   } else if (A->w == 16) {

     switch( (start/16) %4 ) {

     case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;

     case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;

     case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;

     case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; break;

     default: m4ri_die("impossible");

     }

     if(A->x->width-startblock == 1) {

       _a[startblock] &= ~(bitmask_begin & bitmask_end);

       _a[startblock] ^= __t & bitmask_begin & bitmask_end;

       return;

     } else {

       _a[startblock] &= ~bitmask_begin;

       _a[startblock] ^= __t & bitmask_begin;

     }


     for(j=startblock+1; j+4<A->x->width; j+=4) {

       __a = _a[j], __t = 0;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;

       _a[j] = __t;


       __a = _a[j+1], __t = 0;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;

       _a[j+1] = __t;


       __a = _a[j+2], __t = 0;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;

       _a[j+2] = __t;


       __a = _a[j+3], __t = 0;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;

       _a[j+3] = __t;

     }

     for( ; j<A->x->width-1; j++) {

       __a = _a[j], __t = 0;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;

       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;

       _a[j] = __t;

     }


     __t = _a[j] & ~bitmask_end;

     switch(A->x->ncols % m4ri_radix) {

     case  0: __t ^= ff->mul(ff, x, (_a[j] & 0xFFFF000000000000ULL)>>48)<<48;

     case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FFFF00000000ULL)>>32)<<32;

     case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FFFF0000ULL)>>16)<<16;

     case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FFFFULL)>> 0)<< 0;

     };

     _a[j] = __t;


   } else {

     for(rci_t j=start_col; j<A->ncols; j++) {

       mzed_write_elem(A, r, j, ff->mul(ff, x, mzed_read_elem(A, r, j)));

     }

   }

 }


 static inline void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb) {

   mzd_row_swap(M->x, rowa, rowb);

 }


 static inline void mzed_copy_row(mzed_t* B, rci_t i, const mzed_t* A, rci_t j) {

   mzd_copy_row(B->x, i, A->x, j);

 }


 static inline void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb) {

   for(rci_t i=0; i<M->w; i++)

     mzd_col_swap(M->x,M->w*cola+i, M->w*colb+i);

 }


 static inline void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row) {

   for(unsigned int e=0; e < A->finite_field->degree; e++) {

     mzd_col_swap_in_rows(A->x, A->w*cola+e, A->w*colb+e, start_row, stop_row);

   };

 }


 static inline void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow) {

   mzd_row_add(M->x, sourcerow, destrow);

 }


 static inline rci_t mzed_first_zero_row(mzed_t *A) {

   return mzd_first_zero_row(A->x);

 }


 rci_t mzed_echelonize_naive(mzed_t *A, int full);


 void mzed_print(const mzed_t *M);


 #endif //M4RIE_MATRIX_H

_mzed_addmul
mzed_t * _mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:110

gf2e_struct

Definition: gf2e.h:50

mzed_row_add
static void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow)
Add the rows sourcerow and destrow and stores the total in the row destrow.
Definition: mzed.h:976

mzed_free_window
static void mzed_free_window(mzed_t *A)
Free a matrix window created with mzed_init_window().
Definition: mzed.h:205

mzed_rescale_row
static void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word x)
Rescale the row r in A by X starting c.
Definition: mzed.h:549

mzed_init
mzed_t * mzed_init(const gf2e *ff, const rci_t m, const rci_t n)
Create a new matrix of dimension m x n over ff.
Definition: mzed.c:28

mzed_t::finite_field
const gf2e * finite_field
Definition: mzed.h:61

mzed_write_elem
static void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem)
Write the element elem to the position (row,col) in the matrix A.
Definition: mzed.h:454

gf2e_struct::mul
word(* mul)(const gf2e *ff, const word a, const word b)
Definition: gf2e.h:59

mzed_t
Dense matrices over  represented as packed matrices.
Definition: mzed.h:59

gf2e_degree_to_w
static size_t gf2e_degree_to_w(const gf2e *ff)
Definition: gf2e.h:120

mzed_print
void mzed_print(const mzed_t *M)
Print a matrix to stdout.
Definition: mzed.c:254

_mzed_mul_naive
mzed_t * _mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B)
 using naive cubic multiplication.
Definition: mzed.c:128

gf2e_struct::degree
deg_t degree
Definition: gf2e.h:51

mzed_add_elem
static void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem)
At the element elem to the element at position (row,col) in the matrix A.
Definition: mzed.h:439

mzed_t::x
mzd_t * x
Definition: mzed.h:60

mzed_t::nrows
rci_t nrows
Definition: mzed.h:62

mzed_echelonize_naive
rci_t mzed_echelonize_naive(mzed_t *A, int full)
Gaussian elimination.
Definition: mzed.c:208

mzed_cmp
static int mzed_cmp(mzed_t *A, mzed_t *B)
Return -1,0,1 if if A < B, A == B or A > B respectively.
Definition: mzed.h:472

mzed_mul_scalar
mzed_t * mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B)
.
Definition: mzed.c:141

mzed_randomize
void mzed_randomize(mzed_t *A)
Fill matrix A with random elements.
Definition: mzed.c:44

mzed_free
void mzed_free(mzed_t *A)
Free a matrix created with mzed_init().
Definition: mzed.c:39

_mzed_mul_init
mzed_t * _mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear)
Definition: mzed.c:74

mzed_set_ui
void mzed_set_ui(mzed_t *A, word value)
Return diagonal matrix with value on the diagonal.
Definition: mzed.c:245

mzed_t::ncols
rci_t ncols
Definition: mzed.h:63

mzed_init_window
static mzed_t * mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc)
Create a window/view into the matrix A.
Definition: mzed.h:187

_mzed_add
mzed_t * _mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:68

mzed_stack
static mzed_t * mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B)
Stack A on top of B and write the result to C.
Definition: mzed.h:135

mzed_concat
static mzed_t * mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B)
Concatenate B to A and write the result to C.
Definition: mzed.h:111

mzed_row_swap
static void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb)
Swap the two rows rowa and rowb.
Definition: mzed.h:909

mzed_col_swap_in_rows
static void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row)
Swap the two columns cola and colb but only between start_row and stop_row.
Definition: mzed.h:957

mzed_mul
mzed_t * mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:90

mzed_addmul
mzed_t * mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:96

mzed_first_zero_row
static rci_t mzed_first_zero_row(mzed_t *A)
Return the first row with all zero entries.
Definition: mzed.h:990

mzed_add
mzed_t * mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:53

mzed_is_zero
static int mzed_is_zero(const mzed_t *A)
Zero test for matrix.
Definition: mzed.h:484

mzed_add_multiple_of_row
void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col)
Definition: mzed.c:272

mzed_copy
mzed_t * mzed_copy(mzed_t *B, const mzed_t *A)
Copy matrix A to B.
Definition: mzed.c:196

_mzed_mul
mzed_t * _mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:102

mzed_copy_row
static void mzed_copy_row(mzed_t *B, rci_t i, const mzed_t *A, rci_t j)
copy row j from A to row i from B.
Definition: mzed.h:926

mzed_mul_naive
mzed_t * mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B)
 using naive cubic multiplication.
Definition: mzed.c:118

mzed_addmul_naive
mzed_t * mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B)
 using naive cubic multiplication.
Definition: mzed.c:123

mzed_submatrix
static mzed_t * mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc)
Copy a submatrix.
Definition: mzed.h:157

mzed_add_row
static void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col)
Definition: mzed.h:515

mzed_t::w
wi_t w
Definition: mzed.h:64

mzed_col_swap
static void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb)
Swap the two columns cola and colb.
Definition: mzed.h:940

mzed_read_elem
static word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col)
Get the element at position (row,col) from the matrix A.
Definition: mzed.h:424