M4RIE  0.20111004
 All Data Structures Files Functions Variables Typedefs Macros Groups Pages
mzed.h
Go to the documentation of this file.
1 
27 #ifndef M4RIE_MZED_H
28 #define M4RIE_MZED_H
29 
30 /******************************************************************************
31 *
32 * M4RIE: Linear Algebra over GF(2^e)
33 *
34 * Copyright (C) 2010,2011 Martin Albrecht <martinralbrecht@googlemail.com>
35 *
36 * Distributed under the terms of the GNU General Public License (GEL)
37 * version 2 or higher.
38 *
39 * This code is distributed in the hope that it will be useful,
40 * but WITHOUT ANY WARRANTY; without even the implied warranty of
41 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
42 * General Public License for more details.
43 *
44 * The full text of the GPL is available at:
45 *
46 * http://www.gnu.org/licenses/
47 ******************************************************************************/
48 
49 #include <m4ri/m4ri.h>
50 #include <m4rie/gf2e.h>
51 #include <m4rie/m4ri_functions.h>
52 
59 typedef struct {
60  mzd_t *x;
61  const gf2e *finite_field;
62  rci_t nrows;
63  rci_t ncols;
64  wi_t w;
65 } mzed_t;
66 
67 
80 mzed_t *mzed_init(const gf2e *ff, const rci_t m, const rci_t n);
81 
90 void mzed_free(mzed_t *A);
91 
92 
111 static inline mzed_t *mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B) {
112  if(C==NULL)
113  C = mzed_init(A->finite_field, A->nrows, A->ncols + B->ncols);
114  mzd_concat(C->x, A->x, B->x);
115  return C;
116 }
117 
135 static inline mzed_t *mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B) {
136  if(C==NULL)
137  C = mzed_init(A->finite_field, A->nrows + B->nrows, A->ncols);
138  mzd_stack(C->x, A->x, B->x);
139  return C;
140 }
141 
142 
157 static inline mzed_t *mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
158  if(S==NULL)
159  S = mzed_init(M->finite_field, highr - lowr, highc - lowc);
160 
161  mzd_submatrix(S->x, M->x, lowr, lowc*M->w, highr, highc*M->w);
162  return S;
163 }
164 
187 static inline mzed_t *mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
188  mzed_t *B = (mzed_t *)m4ri_mm_malloc(sizeof(mzed_t));
189  B->finite_field = A->finite_field;
191  B->nrows = highr - lowr;
192  B->ncols = highc - lowc;
193  B->x = mzd_init_window(A->x, lowr, B->w*lowc, highr, B->w*highc);
194  return B;
195 }
196 
205 static inline void mzed_free_window(mzed_t *A) {
206  mzd_free_window(A->x);
207  m4ri_mm_free(A);
208 }
209 
223 mzed_t *mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
224 
235 mzed_t *_mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
236 
247 #define mzed_sub mzed_add
248 
259 #define _mzed_sub _mzed_add
260 
271 mzed_t *mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
272 
283 mzed_t *mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
284 
295 mzed_t *_mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
296 
307 mzed_t *_mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
308 
309 
323 mzed_t *mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
324 
338 mzed_t *mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
339 
350 mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
351 
362 mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B);
363 
374 mzed_t *_mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear);
375 
386 void mzed_randomize(mzed_t *A);
387 
397 mzed_t *mzed_copy(mzed_t *B, const mzed_t *A);
398 
411 void mzed_set_ui(mzed_t *A, word value);
412 
413 
424 static inline word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col) {
425  return __mzd_read_bits(A->x, row, A->w*col, A->w);
426 }
427 
439 static inline void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
440  __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
441 }
442 
454 static inline void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
455  __mzd_clear_bits(A->x, row, A->w*col, A->w);
456  __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
457 }
458 
472 static inline int mzed_cmp(mzed_t *A, mzed_t *B) {
473  return mzd_cmp(A->x,B->x);
474 }
475 
476 
484 static inline int mzed_is_zero(const mzed_t *A) {
485  return mzd_is_zero(A->x);
486 }
487 
501 void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col);
502 
515 static inline void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col) {
516  assert(A->ncols == B->ncols && A->finite_field == B->finite_field);
517  assert(start_col < A->ncols);
518 
519  const rci_t start = A->w*start_col;
520  const wi_t startblock = start/m4ri_radix;
521  const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
522  const word bitmask_end = A->x->high_bitmask;
523 
524  word *_a = A->x->rows[ar];
525  const word *_b = B->x->rows[br];
526  wi_t j;
527 
528  if (A->x->width - startblock > 1) {
529  _a[startblock] ^= _b[startblock] & bitmask_begin;
530  for(j=startblock+1; j<A->x->width-1; j++)
531  _a[j] ^= _b[j];
532  _a[j] ^= _b[j] & bitmask_end;
533  } else {
534  _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end);
535  }
536 }
537 
549 static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word x) {
550  assert(start_col < A->ncols);
551 
552  const gf2e *ff = A->finite_field;
553  const rci_t start = A->w*start_col;
554  const wi_t startblock = start/m4ri_radix;
555  word *_a = A->x->rows[r];
556  const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
557  const word bitmask_end = A->x->high_bitmask;
558  register word __a = _a[startblock]>>(start%m4ri_radix);
559  register word __t = 0;
560  int j;
561 
562  if(A->w == 2) {
563  switch( (start/2) % 32 ) {
564  case 0: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2;
565  case 1: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2;
566  case 2: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2;
567  case 3: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2;
568  case 4: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2;
569  case 5: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2;
570  case 6: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2;
571  case 7: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2;
572  case 8: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2;
573  case 9: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2;
574  case 10: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2;
575  case 11: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2;
576  case 12: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2;
577  case 13: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2;
578  case 14: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2;
579  case 15: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2;
580  case 16: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2;
581  case 17: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2;
582  case 18: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2;
583  case 19: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2;
584  case 20: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2;
585  case 21: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2;
586  case 22: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2;
587  case 23: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2;
588  case 24: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2;
589  case 25: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2;
590  case 26: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2;
591  case 27: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2;
592  case 28: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2;
593  case 29: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2;
594  case 30: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2;
595  case 31: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62; break;
596  default: m4ri_die("impossible");
597  }
598  if(A->x->width-startblock == 1) {
599  _a[startblock] &= ~(bitmask_begin & bitmask_end);
600  _a[startblock] ^= __t & bitmask_begin & bitmask_end;
601  return;
602  } else {
603  _a[startblock] &= ~bitmask_begin;
604  _a[startblock] ^= __t & bitmask_begin;
605  }
606 
607  for(j=startblock+1; j<A->x->width -1; j++) {
608  __a = _a[j], __t = 0;
609  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2;
610  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2;
611  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2;
612  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2;
613  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2;
614  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2;
615  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2;
616  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2;
617  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2;
618  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2;
619  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2;
620  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2;
621  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2;
622  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2;
623  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2;
624  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2;
625  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2;
626  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2;
627  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2;
628  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2;
629  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2;
630  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2;
631  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2;
632  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2;
633  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2;
634  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2;
635  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2;
636  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2;
637  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2;
638  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2;
639  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2;
640  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62;
641  _a[j] = __t;
642  }
643 
644  __t = _a[j] & ~bitmask_end;
645  switch(A->x->ncols % m4ri_radix) {
646  case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xC000000000000000ULL)>>62)<<62;
647  case 62: __t ^= ff->mul(ff, x, (_a[j] & 0x3000000000000000ULL)>>60)<<60;
648  case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0C00000000000000ULL)>>58)<<58;
649  case 58: __t ^= ff->mul(ff, x, (_a[j] & 0x0300000000000000ULL)>>56)<<56;
650  case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00C0000000000000ULL)>>54)<<54;
651  case 54: __t ^= ff->mul(ff, x, (_a[j] & 0x0030000000000000ULL)>>52)<<52;
652  case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000C000000000000ULL)>>50)<<50;
653  case 50: __t ^= ff->mul(ff, x, (_a[j] & 0x0003000000000000ULL)>>48)<<48;
654  case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000C00000000000ULL)>>46)<<46;
655  case 46: __t ^= ff->mul(ff, x, (_a[j] & 0x0000300000000000ULL)>>44)<<44;
656  case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000C0000000000ULL)>>42)<<42;
657  case 42: __t ^= ff->mul(ff, x, (_a[j] & 0x0000030000000000ULL)>>40)<<40;
658  case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000C000000000ULL)>>38)<<38;
659  case 38: __t ^= ff->mul(ff, x, (_a[j] & 0x0000003000000000ULL)>>36)<<36;
660  case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000C00000000ULL)>>34)<<34;
661  case 34: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000300000000ULL)>>32)<<32;
662  case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000C0000000ULL)>>30)<<30;
663  case 30: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000030000000ULL)>>28)<<28;
664  case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000C000000ULL)>>26)<<26;
665  case 26: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000003000000ULL)>>24)<<24;
666  case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000C00000ULL)>>22)<<22;
667  case 22: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000300000ULL)>>20)<<20;
668  case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000C0000ULL)>>18)<<18;
669  case 18: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000030000ULL)>>16)<<16;
670  case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000C000ULL)>>14)<<14;
671  case 14: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000003000ULL)>>12)<<12;
672  case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000C00ULL)>>10)<<10;
673  case 10: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000300ULL)>> 8)<< 8;
674  case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000C0ULL)>> 6)<< 6;
675  case 6: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000030ULL)>> 4)<< 4;
676  case 4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000CULL)>> 2)<< 2;
677  case 2: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000003ULL)>> 0)<< 0;
678  };
679  _a[j] = __t;
680 
681  } else if(A->w == 4) {
682  switch( (start/4)%16 ) {
683  case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4;
684  case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4;
685  case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4;
686  case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4;
687  case 4: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4;
688  case 5: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4;
689  case 6: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4;
690  case 7: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4;
691  case 8: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4;
692  case 9: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4;
693  case 10: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4;
694  case 11: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4;
695  case 12: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4;
696  case 13: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4;
697  case 14: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4;
698  case 15: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60; break;
699  default: m4ri_die("impossible");
700  }
701  if(A->x->width-startblock == 1) {
702  _a[startblock] &= ~(bitmask_begin & bitmask_end);
703  _a[startblock] ^= __t & bitmask_begin & bitmask_end;
704  return;
705  } else {
706  _a[startblock] &= ~bitmask_begin;
707  _a[startblock] ^= __t & bitmask_begin;
708  }
709 
710  for(j=startblock+1; j<A->x->width -1; j++) {
711  __a = _a[j], __t = 0;
712  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4;
713  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4;
714  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4;
715  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4;
716  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4;
717  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4;
718  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4;
719  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4;
720  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4;
721  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4;
722  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4;
723  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4;
724  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4;
725  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4;
726  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4;
727  __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60;
728  _a[j] = __t;
729  }
730 
731  __t = _a[j] & ~bitmask_end;
732  switch(A->x->ncols % m4ri_radix) {
733  case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xF000000000000000ULL)>>60)<<60;
734  case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0F00000000000000ULL)>>56)<<56;
735  case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00F0000000000000ULL)>>52)<<52;
736  case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000F000000000000ULL)>>48)<<48;
737  case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000F00000000000ULL)>>44)<<44;
738  case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000F0000000000ULL)>>40)<<40;
739  case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000F000000000ULL)>>36)<<36;
740  case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000F00000000ULL)>>32)<<32;
741  case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000F0000000ULL)>>28)<<28;
742  case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000F000000ULL)>>24)<<24;
743  case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000F00000ULL)>>20)<<20;
744  case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000F0000ULL)>>16)<<16;
745  case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000F000ULL)>>12)<<12;
746  case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000F00ULL)>> 8)<< 8;
747  case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000F0ULL)>> 4)<< 4;
748  case 4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000FULL)>> 0)<< 0;
749  };
750  _a[j] = __t;
751 
752  } else if (A->w == 8) {
753 
754  register word __a0 = _a[startblock]>>(start%m4ri_radix);
755  register word __a1;
756  register word __t0 = 0;
757  register word __t1;
758 
759  switch( (start/8) %8 ) {
760  case 0: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 0; __a0 >>= 8;
761  case 1: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 8; __a0 >>= 8;
762  case 2: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<16; __a0 >>= 8;
763  case 3: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<24; __a0 >>= 8;
764  case 4: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<32; __a0 >>= 8;
765  case 5: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<40; __a0 >>= 8;
766  case 6: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<48; __a0 >>= 8;
767  case 7: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<56; break;
768  default: m4ri_die("impossible");
769  }
770  if(A->x->width-startblock == 1) {
771  _a[startblock] &= ~(bitmask_begin & bitmask_end);
772  _a[startblock] ^= __t0 & bitmask_begin & bitmask_end;
773  return;
774  } else {
775  _a[startblock] &= ~bitmask_begin;
776  _a[startblock] ^= __t0 & bitmask_begin;
777  }
778 
779  for(j=startblock+1; j+2 < A->x->width; j+=2) {
780  __a0 = _a[j], __t0 = 0;
781  __a1 = _a[j+1], __t1 = 0;
782  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
783  __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 0; __a1 >>= 8;
784  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
785  __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 8; __a1 >>= 8;
786  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
787  __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<16; __a1 >>= 8;
788  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
789  __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<24; __a1 >>= 8;
790  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
791  __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<32; __a1 >>= 8;
792  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
793  __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<40; __a1 >>= 8;
794  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
795  __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<48; __a1 >>= 8;
796  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56; __a0 >>= 8;
797  __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<56;
798  _a[j+0] = __t0;
799  _a[j+1] = __t1;
800  }
801 
802  for(; j < A->x->width-1; j++) {
803  __a0 = _a[j], __t0 = 0;
804  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
805  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
806  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
807  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
808  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
809  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
810  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
811  __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56;
812  _a[j] = __t0;
813  }
814 
815  __t = _a[j] & ~bitmask_end;
816  switch(A->x->ncols % m4ri_radix ) {
817  case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xFF00000000000000ULL)>>56)<<56;
818  case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00FF000000000000ULL)>>48)<<48;
819  case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FF0000000000ULL)>>40)<<40;
820  case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000FF00000000ULL)>>32)<<32;
821  case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FF000000ULL)>>24)<<24;
822  case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000FF0000ULL)>>16)<<16;
823  case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FF00ULL)>> 8)<< 8;
824  case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000FFULL)>> 0)<< 0;
825  };
826  _a[j] = __t;
827 
828  } else if (A->w == 16) {
829  switch( (start/16) %4 ) {
830  case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
831  case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
832  case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
833  case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; break;
834  default: m4ri_die("impossible");
835  }
836  if(A->x->width-startblock == 1) {
837  _a[startblock] &= ~(bitmask_begin & bitmask_end);
838  _a[startblock] ^= __t & bitmask_begin & bitmask_end;
839  return;
840  } else {
841  _a[startblock] &= ~bitmask_begin;
842  _a[startblock] ^= __t & bitmask_begin;
843  }
844 
845  for(j=startblock+1; j+4<A->x->width; j+=4) {
846  __a = _a[j], __t = 0;
847  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
848  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
849  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
850  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
851  _a[j] = __t;
852 
853  __a = _a[j+1], __t = 0;
854  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
855  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
856  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
857  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
858  _a[j+1] = __t;
859 
860  __a = _a[j+2], __t = 0;
861  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
862  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
863  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
864  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
865  _a[j+2] = __t;
866 
867  __a = _a[j+3], __t = 0;
868  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
869  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
870  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
871  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
872  _a[j+3] = __t;
873  }
874  for( ; j<A->x->width-1; j++) {
875  __a = _a[j], __t = 0;
876  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
877  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
878  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
879  __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
880  _a[j] = __t;
881  }
882 
883  __t = _a[j] & ~bitmask_end;
884  switch(A->x->ncols % m4ri_radix) {
885  case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xFFFF000000000000ULL)>>48)<<48;
886  case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FFFF00000000ULL)>>32)<<32;
887  case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FFFF0000ULL)>>16)<<16;
888  case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FFFFULL)>> 0)<< 0;
889  };
890  _a[j] = __t;
891 
892  } else {
893  for(rci_t j=start_col; j<A->ncols; j++) {
894  mzed_write_elem(A, r, j, ff->mul(ff, x, mzed_read_elem(A, r, j)));
895  }
896  }
897 }
898 
909 static inline void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb) {
910  mzd_row_swap(M->x, rowa, rowb);
911 }
912 
926 static inline void mzed_copy_row(mzed_t* B, rci_t i, const mzed_t* A, rci_t j) {
927  mzd_copy_row(B->x, i, A->x, j);
928 }
929 
940 static inline void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb) {
941  for(rci_t i=0; i<M->w; i++)
942  mzd_col_swap(M->x,M->w*cola+i, M->w*colb+i);
943 }
944 
957 static inline void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row) {
958  for(unsigned int e=0; e < A->finite_field->degree; e++) {
959  mzd_col_swap_in_rows(A->x, A->w*cola+e, A->w*colb+e, start_row, stop_row);
960  };
961 }
962 
976 static inline void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow) {
977  mzd_row_add(M->x, sourcerow, destrow);
978 }
979 
990 static inline rci_t mzed_first_zero_row(mzed_t *A) {
991  return mzd_first_zero_row(A->x);
992 }
993 
994 
1008 rci_t mzed_echelonize_naive(mzed_t *A, int full);
1009 
1018 void mzed_print(const mzed_t *M);
1019 
1020 #endif //M4RIE_MATRIX_H
mzed_t * _mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:110
Definition: gf2e.h:50
static void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow)
Add the rows sourcerow and destrow and stores the total in the row destrow.
Definition: mzed.h:976
static void mzed_free_window(mzed_t *A)
Free a matrix window created with mzed_init_window().
Definition: mzed.h:205
static void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word x)
Rescale the row r in A by X starting c.
Definition: mzed.h:549
mzed_t * mzed_init(const gf2e *ff, const rci_t m, const rci_t n)
Create a new matrix of dimension m x n over ff.
Definition: mzed.c:28
const gf2e * finite_field
Definition: mzed.h:61
static void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem)
Write the element elem to the position (row,col) in the matrix A.
Definition: mzed.h:454
word(* mul)(const gf2e *ff, const word a, const word b)
Definition: gf2e.h:59
Dense matrices over represented as packed matrices.
Definition: mzed.h:59
static size_t gf2e_degree_to_w(const gf2e *ff)
Definition: gf2e.h:120
void mzed_print(const mzed_t *M)
Print a matrix to stdout.
Definition: mzed.c:254
mzed_t * _mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B)
using naive cubic multiplication.
Definition: mzed.c:128
deg_t degree
Definition: gf2e.h:51
static void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem)
At the element elem to the element at position (row,col) in the matrix A.
Definition: mzed.h:439
mzd_t * x
Definition: mzed.h:60
rci_t nrows
Definition: mzed.h:62
rci_t mzed_echelonize_naive(mzed_t *A, int full)
Gaussian elimination.
Definition: mzed.c:208
static int mzed_cmp(mzed_t *A, mzed_t *B)
Return -1,0,1 if if A < B, A == B or A > B respectively.
Definition: mzed.h:472
mzed_t * mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B)
.
Definition: mzed.c:141
void mzed_randomize(mzed_t *A)
Fill matrix A with random elements.
Definition: mzed.c:44
void mzed_free(mzed_t *A)
Free a matrix created with mzed_init().
Definition: mzed.c:39
mzed_t * _mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear)
Definition: mzed.c:74
void mzed_set_ui(mzed_t *A, word value)
Return diagonal matrix with value on the diagonal.
Definition: mzed.c:245
rci_t ncols
Definition: mzed.h:63
static mzed_t * mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc)
Create a window/view into the matrix A.
Definition: mzed.h:187
mzed_t * _mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:68
static mzed_t * mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B)
Stack A on top of B and write the result to C.
Definition: mzed.h:135
static mzed_t * mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B)
Concatenate B to A and write the result to C.
Definition: mzed.h:111
static void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb)
Swap the two rows rowa and rowb.
Definition: mzed.h:909
static void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row)
Swap the two columns cola and colb but only between start_row and stop_row.
Definition: mzed.h:957
mzed_t * mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:90
mzed_t * mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:96
static rci_t mzed_first_zero_row(mzed_t *A)
Return the first row with all zero entries.
Definition: mzed.h:990
mzed_t * mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:53
static int mzed_is_zero(const mzed_t *A)
Zero test for matrix.
Definition: mzed.h:484
void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col)
Definition: mzed.c:272
mzed_t * mzed_copy(mzed_t *B, const mzed_t *A)
Copy matrix A to B.
Definition: mzed.c:196
mzed_t * _mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B)
.
Definition: mzed.c:102
static void mzed_copy_row(mzed_t *B, rci_t i, const mzed_t *A, rci_t j)
copy row j from A to row i from B.
Definition: mzed.h:926
mzed_t * mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B)
using naive cubic multiplication.
Definition: mzed.c:118
mzed_t * mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B)
using naive cubic multiplication.
Definition: mzed.c:123
static mzed_t * mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc)
Copy a submatrix.
Definition: mzed.h:157
static void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col)
Definition: mzed.h:515
wi_t w
Definition: mzed.h:64
static void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb)
Swap the two columns cola and colb.
Definition: mzed.h:940
static word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col)
Get the element at position (row,col) from the matrix A.
Definition: mzed.h:424