#ifndef _LinearSystemF2_H
#define _LinearSystemF2_H

/* for time measurements */
#include <time.h>
/* for vector template */
#include <vector>
#include "F2.h"

/* for 64-bit arithmetic */
#include <inttypes.h>
#include <algorithm>
#include <cmath>
#include <fstream>

#include "TwoTuple.h"
#include "ThreeTuple.h"

/**
 * Creates a linear system Ax = b, where A is in compressed column form.
 * Uses gaussian elimination and exact arithmetic to solve.
 * The linear system is given in the form [A|-b]. i.e. the last column of the
 * constraint matrix is assumed to be the right hand side.
 */
template <>
class LinearSystem<F2> 
{
    private:
        /** number of columns in A */ 
        int n;
        /** number of rows in A */
        int m;

        typedef std::vector<int> Row;
        typedef std::vector<Row> Matrix;
        Matrix matrix;

        /** solution vector */
        std::vector<F2> soln;

        /** pivot rows */
        std::vector<int> pivots;

	/** row operations */
        void add_rows(int i, int j); // Add row i to j.
	void add_rows(Row& ri, Row& rj); //Add rows ri and rj.
        void swap_rows(int i, int j); // Swap rows i and j.

        void diagonalise();

        /** if consistent, find actual solution */
        void back_solve2(); 

        void check_solution();

	/** Determine whether or not the system is consistent using different algorithms */
	bool normal_ge_consistent();
	bool striped_ge_consistent();
	bool striped_ge_noGrayCode_consistent();
	bool striped_ge_original();

	/** Find the rank of the system using different algorithms */
	int normal_ge_rank();
	int striped_ge_rank();

	/** Helper functions for Striped Gaussian Elimination funcions */
	void make_stripe(std::vector<std::vector<int> >& col_entries, int thisCol, int s,
			 std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices);

	int find_stripe_basis(std::vector<unsigned long>& stripe2, std::vector<int>& stripe_row_indices,
			      std::vector<int>& independentRows, std::vector<int>& independentCols);

	void find_standard_basis_patterns(std::vector<unsigned long>& stripe, std::vector<int>& independentCols, 
					  int stripeRank,
					  std::vector<unsigned long>& standard_basis_patterns);

	void diagonalize_stripe_basis(std::vector<unsigned long>& stripe, std::vector<int>& independentRows,
				      std::vector<int>& independentCols, int stripeRank);

	void sort_by_gray_code(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
			       std::vector<int>& independentCols, int stripeRank,
			       std::vector<unsigned long>& standard_basis_patterns,
			       std::vector<ThreeTuple>& stripe_gray_codes);

	void sort_by_gray_code_origional(std::vector<unsigned long>& stripe,
				    std::vector<int>& independentCols, int stripeRank,
				    std::vector<TwoTuple>& stripe_gray_codes);
				    		    

	bool eliminate_stripe(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
			      std::vector<unsigned long>& standard_basis_patterns,
			      std::vector<ThreeTuple>& stripe_gray_codes, std::vector<std::vector<int> >& col_entries,
			      std::vector<int>& independentCols, int stripeRank);

	bool eliminate_stripe_origional(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
				   std::vector<TwoTuple>& stripe_gray_codes, std::vector<std::vector<int> >& col_entries,
				   std::vector<int>& independentCols, int stripeRank);

	bool eliminate_stripe_noGrayCodes(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
			      std::vector<ThreeTuple>& stripe_gray_codes, std::vector<std::vector<int> >& col_entries,
			      std::vector<int>& independentCols, int stripeRank);//does not use gray codes.
			      
	void eliminate_stripe2(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
			       std::vector<ThreeTuple>& stripe_gray_codes, std::vector<std::vector<int> >& col_entries,
			       std::vector<int>& independentCols, int stripeRank);//used for finding rank

        /** for time measurement purposes */
        double tic (void) {return (clock () / (double) CLOCKS_PER_SEC);};
        double toc (const double &t) {double s = tic ();return (s - t);};

    public:
        //A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
        
        /** preprocessing */
        void preprocessMatrix();

        /** public constructor */
        LinearSystem<F2>(int num_col);
	LinearSystem<F2>(const LinearSystem<F2> & ls2);

        /** public destructor */
        virtual ~LinearSystem<F2>();

        /** Get the solution vector. */
        const std::vector<F2>& getSolution();

        /** add entry (i,j) to the constraint matrix. */
        void addEntry(int i, int j);
        void addEntry(int i, int j, F2 v);
        bool addEntryIfNoExist(int i, int j); //true=added, false = element already exists

        /** add entry i to the right hand side. */
        void addRHSEntry(int i);
        void addRHSEntry(int i, F2 v);
        
        /** deletes all data from all the vectors **/
        void clearMatrix(); 

        /* get number of rows in matrix */ 
        int getNumRows() const {return m;}
        /* set the number of rows in matrix */ 
        void setNumRows(Index s) { m = s; matrix.resize(s); }
        /* get number of columns in matrix (not including rhs) */ 
        int getNumCols() const {return n-1;}
        /* set number of columns in matrix (not including rhs) */ 
        void setNumCols(int _n) { n = _n+1; }

        /* Get the number of non-zero entries. */
        long int getNumNonZeros() const;

	/* input: xVar, output: b such that Ax = b */
	void matrixVectorMultiplication(std::vector<int> & xVar, std::vector<int> & b);
		
		
        /** Determine if the linear system is consistent. Can choose algorithm */
        bool isConsistent();
	bool isConsistent(int alg);

	/** Computes the rank of the linear system */
        int rank();
	int rank(int alg);

	/** Computes the rank assuming some rows have already been processed */
        int rank_reverse();
        int rank_reverse(Index cutoff);

        /** Print the linear system. */
        void print() const;
	void print_triangle() const;
        void print_dense() const;

        /** Print the solution of the linear system. */
        void printSolution() const;

        int get_codim(Index start, Index end) const;
        
        /** update n and m **/
        void updateRowCol(int newRowSize, int newColSize);

        class RowIter
        {
        public:
            RowIter(const RowIter& ri) { it = ri.it; }
            RowIter& operator=(const RowIter& ri) { it = ri.it; return *this; }
            void operator++() { ++it; }
            bool operator!=(const RowIter& i) { return it != i.it; }
            bool operator==(const RowIter& i) { return it == i.it; }
            const Index& index() { return *it; }
            const F2 coeff() { return 1; }
        protected:
            Row::const_iterator it;
            RowIter(Row::const_iterator _it) { it = _it; }
            friend class LinearSystem<F2>;
        };

        RowIter begin(Index i) const { return RowIter(matrix[i].begin()); }
        RowIter end(Index i) const { return RowIter(matrix[i].end()); }
        RowIter last(Index i) const { return RowIter(--matrix[i].end()); }
};




inline
void
LinearSystem<F2>::swap_rows(int i, int j)
{
    matrix[i].swap(matrix[j]);
}


inline
LinearSystem<F2>::LinearSystem(int _n)
    : n(_n), m(0)
{
    assert(0 <= _n);
}


inline
LinearSystem<F2>::LinearSystem(const LinearSystem<F2> & ls2)
{
	matrix.resize(ls2.matrix.size());
	
    for(int i = 0; i < (int) ls2.matrix.size(); ++i)
    	matrix[i] = ls2.matrix[i];
	
	n = ls2.n;
	m = ls2.m;
	soln = ls2.soln;
	pivots = ls2.pivots;
}


inline
LinearSystem<F2>::~LinearSystem()
{
}

/**
 * Add an entry to the constraint matrix.
 * This function assumes that the same entry is not added twice.
 */
inline
void
LinearSystem<F2>::addEntry(int i, int j)
{
    assert(0 <= j && j < n);
    if (i >= m) {
        matrix.resize(i+1);
        m = i+1;
        matrix[i].push_back(j);
    }
    else {
        //matrix[i].push_back(j);
        Row::iterator it = std::lower_bound(matrix[i].begin(), matrix[i].end(), j);
        if (it != matrix[i].end() && *it == j) { matrix[i].erase(it); }
        else { matrix[i].insert(it, j); }
    }
}

inline
void
LinearSystem<F2>::addEntry(int i, int j, F2 v)
{
    if (v != 0) { addEntry(i,j); }
}

//true=added, false = element already exists
inline
bool
LinearSystem<F2>::addEntryIfNoExist(int i, int j)
{
	//if ( i >= matrix.size())
	//	std::cout << "OH MY GOSH:LS adEntryIfNoExist" << std::endl;
		
	if ( matrix[i].end() == find (matrix[i].begin(), matrix[i].end(), j) ) {
		addEntry(i, j);

	//debug stuff	
	//	if ( matrix[i].end() == find (matrix[i].begin(), matrix[i].end(), j) )
	//		cout << "ERROR\n";
			
		return true;
	}
	else
		return false;
}

/**
 * Add an entry to the right hand side.
 * This function assumes that the same entry is not added twice.
 */
inline
void
LinearSystem<F2>::addRHSEntry(int i)
{
    addEntry(i, n-1);
}

inline
void
LinearSystem<F2>::addRHSEntry(int i, F2 v)
{
    if (v != 0) { addEntry(i, n-1); }
}

inline
void
LinearSystem<F2>::clearMatrix()
{
	for(size_t i = 0; i < matrix.size(); ++i)
		matrix[i].clear();
		
	matrix.clear();
	soln.clear();
	pivots.clear();
	
	m = n = 0;
}//clearMatrix/

inline
long int
LinearSystem<F2>::getNumNonZeros() const
{
    long int num = 0;
    for (Matrix::const_iterator i = matrix.begin(); i!= matrix.end(); ++i) {
        num += i->size();
    }
    return num;
}

//input: xVar, output: b such that A*(xVar) = b 
//Assumes xVar is sorted.
inline
void
LinearSystem<F2>::matrixVectorMultiplication(std::vector<int> & xVar, std::vector<int> & b)
{
    int count, current = 0;
	
    b.clear();
    for(int i = 0; i < m; ++i)
    {
        count = 0;
	current = 0;
	for (int j = 0; j < (int) matrix[i].size() && current < (int) xVar.size() ; ++j)
	    {
	        while ( current < (int) xVar.size()  && matrix[i][j] > xVar[current])
		    ++current;
		if (current < (int) xVar.size() && matrix[i][j] == xVar[current])
		    ++count;
	    }
	if ( count % 2)
	    b.push_back(i);
    }//for every row, find the dot product of matrix[i] and xVar.
}		

inline
void
LinearSystem<F2>::preprocessMatrix()
{
#if 1
    std::vector<bool> zeros(n, false);
    for (int r = 0; r < m; ++r) {
        if (matrix[r].size() == 1) { zeros[matrix[r][0]] = true; }
    }
    for (int r = 0; r < m; ++r) {
        if (matrix[r].size()>1) {
            for (int i = matrix[r].size()-1; i >= 0 ; --i) {
                if (zeros[matrix[r][i]]) { matrix[r].erase(matrix[r].begin()+i); }
            }
        }
    }
#endif
}

/**
 * Adds two rows together.
 * Assumes that the entries in the rows are in sorted order.
 */
inline
void
LinearSystem<F2>::add_rows(int i, int j)
{
    assert(0 <= i && i < m);
    assert(0 <= j && j < m);
    Row& ri = matrix[i];
    Row& rj = matrix[j];
    add_rows(ri, rj);
}

/**
 * Adds two rows together.
 * Assumes that the entries in the rows are in sorted order.
 */
inline
void
LinearSystem<F2>::add_rows(Row& ri, Row& rj)
{
    static Row r;
    r.reserve(ri.size() + rj.size());
    r.resize(0);

    Row::iterator iti = ri.begin();
    Row::iterator itj = rj.begin();
    while (iti != ri.end() && itj != rj.end())
    {
        if (*iti < *itj) { r.push_back(*iti); ++iti; }
        else if (*itj < *iti) { r.push_back(*itj); ++itj; }
        else { ++iti; ++itj; }
    }

    r.insert(r.end(), itj, rj.end());

    rj = r;
    rj.insert(rj.end(), iti, ri.end());
}


inline
bool
LinearSystem<F2>::isConsistent()
{
  return isConsistent(1); // Used striped
}

inline
bool
LinearSystem<F2>::isConsistent(int alg)
{
    switch(alg) {
    case 0:
        return normal_ge_consistent();
    case 1:
        return striped_ge_consistent();
    case 2:
    	return striped_ge_noGrayCode_consistent();       
    case 3:
    	return striped_ge_original(); 
    default:
        std::cout << "ERROR: unknown algorithm index; using Gaussian Elimination" << std::endl;
	return normal_ge_consistent();
    }
}

/**
 * Check whether the linear system is consistent.
 * Uses ordinary Gaussian Elimination
 */
inline
bool
LinearSystem<F2>::normal_ge_consistent()
{
    preprocessMatrix();

    std::vector<std::vector<int> > col_entries(n);
    for (int r = 0; r < m; ++r) {
        if (!matrix[r].empty()) {
            if (matrix[r].front() == n-1) { return false; }
            col_entries[matrix[r].front()].push_back(r);
        }
    }

    for (int c = 0; c < n; ++c) {
        // Choose the next row.
        std::vector<int>& col = col_entries[c];
        if (col.empty()) { continue; }
        int next = col.front();
        for (int i = 1; i < (int) col.size(); ++i) {
                if (matrix[col[i]].size() < matrix[next].size()) { next = col[i]; }
        }

        pivots.push_back(next);

        for (int i = 0; i < (int) col.size(); ++i) {
            int r = col[i];
            if (r != next) {
                add_rows(next, r);
                if (!matrix[r].empty()) {
                    // Check if system is inconsistent.
                    if (matrix[r].front() == n-1) { return false; }
                    // Update first non-zero entries data structure.
                    col_entries[matrix[r].front()].push_back(r);
                }
            }
        }
    }
    return true;
}

inline
int
LinearSystem<F2>::rank()
{
    return rank(1); //use striped
}

inline
int
LinearSystem<F2>::rank(int alg)
{
    switch(alg) {
    case 0:
        return normal_ge_rank();
    case 1:
        return striped_ge_rank();
    default:
        std::cout << "ERROR: unknown algorithm index; using Gaussian Elimination" << std::endl;
	return normal_ge_rank();
    }
}

/**
 * Computes the rank of the matrix using Gaussian Elimination. Processes columns increasing order.
 */
inline
int
LinearSystem<F2>::normal_ge_rank()
{
    preprocessMatrix();
    //print();
    std::vector<std::vector<int> > col_entries(n);
    for (int r = 0; r < m; ++r) {
        if (!matrix[r].empty()) {
                col_entries[matrix[r].front()].push_back(r);
        }
    }

    for (int c = 0; c < n; ++c) {
        // Choose the next row.
        std::vector<int>& col = col_entries[c];
        if (col.empty()) { continue; }
        int next = col.front();
        for (int i = 1; i < (int) col.size(); ++i) {
                if (matrix[col[i]].size() < matrix[next].size()) { next = col[i]; }
        }

        pivots.push_back(next);

        for (int i = 0; i < (int) col.size(); ++i) {
            int r = col[i];
            if (r != next) {
                add_rows(next, r);
                if (!matrix[r].empty()) {
                    // Update first non-zero entries data structure.
                    col_entries[matrix[r].front()].push_back(r);
                }
            }
        }
    }
    return pivots.size();
}

/**
 * Computes the rank of the matrix.  Processes columns in decreasing order.
 */
inline
int
LinearSystem<F2>::rank_reverse()
{
    return rank_reverse(0);
}

/**
 * Computes the rank of the matrix.  Processes columns in decreasing order.
 * Assumes that the rows [0,...,cutoff] have already been processed.
 */
inline
int
LinearSystem<F2>::rank_reverse(Index cutoff)
{
    preprocessMatrix();
    //print();
    std::vector<std::vector<int> > col_entries(n);
    for (int r = 0; r < m; ++r) {
        if (!matrix[r].empty()) {
            col_entries[matrix[r].back()].push_back(r);
        }
    }

    for (int c = n-1; c >=0; --c) {
        // Choose the next row.
        std::vector<int>& col = col_entries[c];
        if (col.empty()) { continue; }
        int next = col.front();
        if (next >= cutoff) {
            for (int i = 1; i < (int) col.size(); ++i) {
                if (matrix[col[i]].size() < matrix[next].size()) { next = col[i]; }
            }
        }

        for (int i = 0; i < (int) col.size(); ++i) {
            int r = col[i];
            if (r != next) {
                add_rows(next, r);
                if (!matrix[r].empty()) {
                    // Update last non-zero entries data structure.
                    col_entries[matrix[r].back()].push_back(r);
                }
            }
        }
    }
    // We now remove empty rows from the matrix.
    Index j = 0;
    for (size_t i = 0; i < matrix.size(); ++i) {
        if (!matrix[i].empty()) { matrix[i].swap(matrix[j]); ++j; }
    }
    matrix.erase(matrix.begin()+j, matrix.end());
    m = j;

    //std::cout << "Diagonalising the matrix..." << std::endl;
    //diagonalise();

    return m;
}

// Assumes that rank_reverse has been called.
inline
void
LinearSystem<F2>::diagonalise()
{
    std::vector<int>  pivots(n, -1);
    for (int r = 0; r < m; ++r) { 
        if (!matrix[r].empty()) {
            pivots[matrix[r].back()] = r;
        }
    }
    std::vector<int> row_pivots;
    for (int c = 0; c < n; ++c) {
        if (pivots[c] == -1) { continue; }
        int r = pivots[c];
        row_pivots.clear();
        Row::iterator last = --matrix[r].end();
        for (Row::iterator it = matrix[r].begin(); it != last; ++it) {
            if (pivots[*it] != -1) { row_pivots.push_back(pivots[*it]); }
        }
        for (size_t i = 0; i < row_pivots.size(); ++i) {
            add_rows(row_pivots[i], r);
        }
    }
}

// Assumes that rank_reverse has just been called.
inline
int
LinearSystem<F2>::get_codim(Index start, Index end) const
{
    int dim = 0;
    for (size_t i = 0; i < matrix.size(); ++i) {
        if (matrix[i].back() >= start && matrix[i].back() < end) { ++dim; }
    }
    return (end-start) - dim;
}

/**
 * Computes the solution of the linear system.
 */
inline
const std::vector<F2>&
LinearSystem<F2>::getSolution()
{
    back_solve2();
    //check_solution();
    return soln;
}

/**
 * Computes the solution of the linear system.
 * This function assumes that the linear system is consistent and
 * that the matrix is in upper triangle form with no zero rows.
 */
inline
void
LinearSystem<F2>::back_solve2()
{
    soln.clear();
    soln.resize(n, 0);
    soln[n-1] = 1;
    for (std::vector<int>::reverse_iterator i = pivots.rbegin(); i != pivots.rend(); ++i) {
        Row& r = matrix[*i];
        int c = r.front();
        for (int j = 1; j < (int) r.size(); ++j) {
            soln[c] = (soln[r[j]] != soln[c]);
        }
    }
    soln.resize(n-1);
}

/**
 * Forms the stripe from thisCol-s+1 to thisCol in bit-packed form.
 * Uses col_entries to only find non-zero  rows.
 * Stores bit-packed rows in stripe.
 * Stores the matrix index of corresponding rows in stripe_row_indices.
 */
inline
void
LinearSystem<F2>::make_stripe(std::vector<std::vector<int> >& col_entries, int thisCol, int s,
			      std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices)
{
    int row, k;
    stripe.clear();
    stripe_row_indices.clear();
    for (int col = thisCol; col >= thisCol - s + 1; --col) {
        for (int i = 0; i < (int) col_entries[col].size(); ++i) {
			unsigned long stripeRow = 0;
	    	row = col_entries[col][i];
	    	while(!matrix[row].empty()) {
	        	k = thisCol - matrix[row].back();        //k is the bit that should be set in the stripe.
				if (k >= s) {
		    		break;
				}//if k is 
				matrix[row].pop_back();                      //remove matrix(rwo, col).
				stripeRow |= (1uL << k);                      //set the stripe bit
		    }

		    stripe.push_back(stripeRow);                     //save the stripe row as an int
	    	stripe_row_indices.push_back(row);               //save the stripe row index.
		}//for every row with a non-zero in column col.
    }//for col.
}

/**
 * Finds a basis for the given stripe.
 * Pivots basis to the top of stripe.
 * independentRows stores the absolute rows that correspond to these basis rows.
 * independentCols stores the columns in the stripe, counting from the right.
 * Returns the rank of the stripe (the number of basis vectors it found).
 */
inline
int
LinearSystem<F2>::find_stripe_basis(std::vector<unsigned long>& stripe2, std::vector<int>& stripe_row_indices,
				    std::vector<int>& independentRows, std::vector<int>& independentCols)
{
    int row, col;
    int row_density;                         //Number of non-zero entries in the current row (whole matrix)
    int stripeRank = 0;                      //Number of basis vectors found so far
    unsigned long col_mask;                   //Bitmask for the current column in stripe
    static std::vector<unsigned long> stripe; //GE is done on this copy of stripe2

    independentRows.clear();//clear list of indep. rows and cols for the current strip
    independentCols.clear();

    stripe = stripe2;
    while(1) {
        row = -1;
		row_density = n + m;	//upper bound.
	
		//find non dense row to GE other rows in stripe.
		for (int i = stripeRank; i < (int) stripe.size(); ++i) {
		    if (stripe[i] != 0 && (int) matrix[stripe_row_indices[i]].size() < row_density) {
		        row = i;
				row_density = matrix[stripe_row_indices[i]].size();
		    }
		}
		
		//GE other rows in stripe.
		if (row >= 0) {
		    col_mask = (~(stripe[row] - 1uL) & stripe[row]);
	    	std::swap(stripe[row], stripe[stripeRank]);
		    std::swap(stripe2[row], stripe2[stripeRank]);
		    std::swap(stripe_row_indices[row], stripe_row_indices[stripeRank]);
		    for (row = stripeRank + 1; row < (int) stripe.size(); ++row) {
		        if (stripe[row] & col_mask) {
				    stripe[row] ^= stripe[stripeRank];
				}
		    }
		    independentRows.push_back(stripe_row_indices[stripeRank]);
		    col = 0;
		    while(col_mask > 1uL) {
		        col_mask >>= 1;
				++col;
		    }
		    independentCols.push_back(col);
		    ++stripeRank;
		} else {
		    //All remaining rows are zero
		    break;
		}
    }//for col. GE dense stripe.
    return stripeRank;
}

/**
 * Puts the first stripeRank rows of the stripe into row echelon form.
 * Row operations are done on the whole matrix.
 * Assumes that these rows are linearly independent.
 */
inline
void
LinearSystem<F2>::diagonalize_stripe_basis(std::vector<unsigned long>& stripe, std::vector<int>& independentRows,
					   std::vector<int>& independentCols, int stripeRank)
{
    unsigned long col_mask;
    for (int i = 0; i < stripeRank; ++i) {
		col_mask = 1uL << independentCols[i];
		for (int j = i + 1; j < stripeRank; ++j) {
			if (stripe[j] & col_mask) {
				add_rows(independentRows[i], independentRows[j]);
				stripe[j] ^= stripe[i];
	   		}
       }
    }
}

/**
 * Calculates the linear combination of rows of the basis necessary
 * to generate each standard basis vector. Standard basis vectors
 * correspond to the columns in independentCols. The values of these
 * so-called standard basis vectors are undefined in dependent columns.
 */
inline
void
LinearSystem<F2>::find_standard_basis_patterns(std::vector<unsigned long>& stripe, std::vector<int>& independentCols, 
					       int stripeRank,
					       std::vector<unsigned long>& standard_basis_patterns)
{
    static std::vector<unsigned long> basis(64);
    basis.clear();
    standard_basis_patterns.clear();
    for (int i = 0; i < stripeRank; ++i) {
        basis.push_back(stripe[i]);
	standard_basis_patterns.push_back(1uL << i);
    }
    for (int i = 0; i < stripeRank; ++i) {
        int col = independentCols[i];
	assert(basis[i] & (1uL << col));
	for (int j = 0; j < stripeRank; ++j) {
	    if (i != j) {
	        if (basis[j] & (1uL << col)) {
		    basis[j] ^= basis[i];
		    standard_basis_patterns[j] ^= standard_basis_patterns[i];
		}
	    }
	}
    }

    /*
    std::cout << "basis: ";
    for (int i = 0; i < stripeRank; ++i ){
      std::cout << basis[i] << " ";
    }
    std::cout << "\n";

    std::cout << "stripe: ";
    for (int i = 0; i < stripeRank; ++i ) {
      std::cout << stripe[i] << " ";
    }
    std::cout << "\n";

    std::cout << "s_b_p: ";
    for (int i = 0; i < stripeRank; ++i ) {
      std::cout << standard_basis_patterns[i] << " ";
    }
    std::cout << "\n";
    
    std::cout << "cols: ";
    for (int i = 0; i < stripeRank; ++i ) {
      std::cout << independentCols[i] << " ";
    }
    std::cout << "\n\n";
    */
}

/**
 * Each row is interpreted by the coefficients of the basis vectors needed to construct it.
 * Then these numbers are interpretted as gray codes, and the rows are then sorted by their
 * index in the gray code. This should reduce the number of row operations needed to maintain
 * the cursor row.
 */
inline
void
LinearSystem<F2>::sort_by_gray_code(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
				    std::vector<int>& independentCols, int stripeRank,
				    std::vector<unsigned long>& standard_basis_patterns,
				    std::vector<ThreeTuple>& stripe_gray_codes)
{
    unsigned long stripe_row;
    unsigned long basis_coeffs;
    unsigned long gray_code;
    unsigned long col_mask;
    /* Used for alternate computation method 
    int shift_amt;

    shift_amt = 1;
    while (shift_amt < stripeRank) {
        shift_amt <<= 1;
    }
    */

    stripe_gray_codes.clear();

    for (int i = stripeRank; i < (int) stripe.size(); ++i) {
        stripe_row = stripe[i];
	basis_coeffs = 0u;
	for (int j = 0; j < stripeRank; ++j) {
	    col_mask = 1uL << independentCols[j];
	    if (stripe_row & col_mask) {
	        basis_coeffs ^= standard_basis_patterns[j];
	    }
	}
	
	//Taking basis_coeffs as a gray code, compute its index in the standard gray code.
	gray_code = 0u;
	while (basis_coeffs != 0) {
	    gray_code ^= basis_coeffs;
	    basis_coeffs >>= 1;
	}

	/* Alternate computation method
	gray_code = basis_coeffs;
	while (shift_amt > 0) {
	    gray_code ^= (gray_code >> shift_amt);
	    shift_amt >>= 1;
	}
	*/

	stripe_gray_codes.push_back(ThreeTuple(i, gray_code, matrix[stripe_row_indices[i]].size()));
    }//for i. Make the list of 3-tuples.
    
    sort(stripe_gray_codes.begin(), stripe_gray_codes.end(), compThreeTuple);

    /*
    std::cout << "sorted rows: \n";
    for (int i = 0; i < (int) stripe.size(); ++i) {
      std::cout << stripe[stripe_gray_codes[i].index] << "\n";
    }
    std::cout << "\n";
    */
}


/**
 *  Uses a 2-tuple, not a 3-tuple 
 *  Does not use the "Alternate compution method" for converting the gray code.
 */
inline
void
LinearSystem<F2>::sort_by_gray_code_origional(std::vector<unsigned long>& stripe,
				    std::vector<int>& independentCols, int stripeRank,
				    std::vector<TwoTuple>& stripe_gray_codes)
{
    unsigned long stripe_row;
    unsigned long condensed_row;
    unsigned long gray_code;
    
    stripe_gray_codes.clear();

    for (int i = stripeRank; i < (int) stripe.size(); ++i) {
		stripe_row = stripe[i];
		condensed_row = 0u;
		gray_code = 0u;
		for (int j = 0; j < stripeRank; ++j) {
		    if (stripe_row & (1u << independentCols[j])) {
		        condensed_row |= (1u << j);
		    }
		}

		while (condensed_row != 0) {
		    gray_code ^= condensed_row;
		    condensed_row >>= 1;
		}
		stripe_gray_codes.push_back(TwoTuple(i, gray_code));
    }//for i. Make the list of 2-tuples.
    
    sort(stripe_gray_codes.begin(), stripe_gray_codes.end(), compTwoTuple);


}

/**
 * Eliminates the given stripe, using the first stripeRank rows as a basis.
 * The cursor row is taken to be an all zero row.
 */
inline
bool
LinearSystem<F2>::eliminate_stripe(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
				   std::vector<unsigned long>& standard_basis_patterns,
				   std::vector<ThreeTuple>& stripe_gray_codes, std::vector<std::vector<int> >& col_entries,
				   std::vector<int>& independentCols, int stripeRank)
{
    int stripe_index;             // Index of the current row in stripe
    int abs_index;                // Index of the current row in matrix
    unsigned long col_mask;        // Bitmask for the current column in the stripe
    unsigned long row_bits;        // Bits of the current row in stripe
    unsigned long cursor_row_bits; // Bits of the cursor row (in stripe)
    static Row cursor_row;        // Sparse representation of cursor row outside of stripe

    cursor_row.clear();    
    cursor_row_bits = 0u; // Representation of the cursor row in the stripe

    for (int i = 0; i < (int) stripe_gray_codes.size(); ++i) {
        stripe_index = stripe_gray_codes[i].index; //stripe_index = index to row from the 2-tuple
	row_bits = stripe[stripe_index];
	abs_index = stripe_row_indices[stripe_index];
	Row& row = matrix[abs_index];

	//std::cout << cursor_row_bits << " : " << row_bits << "\n";

	if (cursor_row_bits != row_bits) {
	    unsigned long solution = 0u;
	    for (int j = 0; j < stripeRank; ++j) {
	        col_mask = 1uL << independentCols[j];
		if (col_mask & (cursor_row_bits ^ row_bits)) {
		    solution ^= standard_basis_patterns[j];
		}
	    }
	    //std::cout << "\t" << solution << "\n";
	    for (int j = 0; j < stripeRank; ++j) {
	        col_mask = 1uL << j;
		if (col_mask & solution) {
		    cursor_row_bits ^= stripe[j];
		    add_rows(matrix[stripe_row_indices[j]], cursor_row);
		}
	    }
	}//if. Get the cursor row equal to the current row.
	//std::cout << (cursor_row_bits == row_bits) << " " << cursor_row_bits << " : " << row_bits << "\n";

	if (cursor_row.size() > row.size()) {
	    while (i + 1 < (int) stripe_gray_codes.size()) {
	        int next_stripe_index = stripe_gray_codes[i + 1].index;
		int next_abs_index = stripe_row_indices[next_stripe_index];
		unsigned long next_row_bits = stripe[next_stripe_index];
		if (row_bits == next_row_bits) {
		    ++i;
		    Row& next_row = matrix[next_abs_index];
		    add_rows(row, next_row);
		    if(!next_row.empty()) {
		        if (next_row.back() < 0) { return false; }
			col_entries[next_row.back()].push_back(next_abs_index);
		    }
		} else {
		    break;
		}//if
	    }//while
	}//if. eliminate with least dense row.
	
	
	//stripe[i] ^= cursor_row_bits; // <-- we are never going to look at this again; is zeroed out
	add_rows(cursor_row, row);
	
	if (!row.empty()) {
	    if (row.back() < 0) { return false; }
	    col_entries[row.back()].push_back(abs_index);
	} // Update col_entries, and check for solutions.
    }//for i 
    return true;
}


/**
 * Same function as in LinearSystemF2.h.r5
 */
inline
bool
LinearSystem<F2>::eliminate_stripe_origional(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
				   std::vector<TwoTuple>& stripe_gray_codes, std::vector<std::vector<int> >& col_entries,
				   std::vector<int>& independentCols, int stripeRank)
{
    int i;
    unsigned long col_mask;
    unsigned long different_bits;
    unsigned long cursor_row_bits;
    static Row cursor_row;

    cursor_row.clear();    
    cursor_row_bits = 0u; // Representation of the cursor row in the stripe

    for (int k = 0; k < (int) stripe_gray_codes.size(); ++k) {
		i = stripe_gray_codes[k].index; //i = index to row from the 2-tuple
		if (cursor_row_bits != stripe[i]) {
		    different_bits = cursor_row_bits ^ stripe[i];
		    for (int j = 0; j < stripeRank; ++j) {
		        col_mask = 1u << independentCols[j];
				if (col_mask & (different_bits)) {
				    cursor_row_bits ^= stripe[j];
				    add_rows(matrix[stripe_row_indices[j]], cursor_row);
				}
	    	}
		}//if. Get the cursor row equal to the current row.


		//stripe[i] ^= cursor_row_bits; // <-- we are never going to look at this again
		add_rows(cursor_row, matrix[stripe_row_indices[i]]);
	
		if (!matrix[stripe_row_indices[i]].empty()) {
		    if (matrix[stripe_row_indices[i]].back() < 0) { return false; }
		    col_entries[matrix[stripe_row_indices[i]].back()].push_back(stripe_row_indices[i]);
		} // Update col_entries, and check for solutions.
    }
    return true;
}





/**
 * Eliminates the given stripe, using the first stripeRank rows as a basis.
 * The cursor row is taken to be an all zero row.
 * Does NOT use gray codes.
 */
inline
bool
LinearSystem<F2>::eliminate_stripe_noGrayCodes(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
				   std::vector<ThreeTuple>& stripe_gray_codes, std::vector<std::vector<int> >& col_entries,
				   std::vector<int>& independentCols, int stripeRank)
{
    int stripe_index;             // Index of the current row in stripe
    int abs_index;                // Index of the current row in matrix
    unsigned long col_mask;        // Bitmask for the current column in the stripe
    unsigned long row_bits;        // Bits of the current row in stripe
    unsigned long cursor_row_bits; // Bits of the cursor row (in stripe)
    static Row cursor_row;        // Sparse representation of cursor row outside of stripe

    cursor_row.clear();    
    cursor_row_bits = 0u; // Representation of the cursor row in the stripe

    for (stripe_index = stripeRank; stripe_index < (int) stripe.size(); ++stripe_index) {
		row_bits = stripe[stripe_index];
		abs_index = stripe_row_indices[stripe_index];
		Row& row = matrix[abs_index];
		if (cursor_row_bits != row_bits) {
		    for (int j = 0; j < stripeRank; ++j) {
		        col_mask = 1uL << independentCols[j];
				if (col_mask & (cursor_row_bits ^ row_bits)) {
				    cursor_row_bits ^= stripe[j];
				    add_rows(matrix[stripe_row_indices[j]], cursor_row);
				}
		    }
		}//if. Get the cursor row equal to the current row.

	
		//stripe[i] ^= cursor_row_bits; // <-- we are never going to look at this again; is zeroed out
		add_rows(cursor_row, row);
	
		if (!row.empty()) {
		    if (row.back() < 0) { return false; }
		    col_entries[row.back()].push_back(abs_index);
		} // Update col_entries, and check for solutions.
    }//while
    return true;
}




/**
 * Eliminates the given stripe, using the first stripeRank rows as a basis.
 * The cursor row is taken to be an all zero row.
 * Doesn't abort on 'infeasibility'
 */
inline
void
LinearSystem<F2>::eliminate_stripe2(std::vector<unsigned long>& stripe, std::vector<int>& stripe_row_indices,
				    std::vector<ThreeTuple>& stripe_gray_codes,
				    std::vector<std::vector<int> >& col_entries,
				    std::vector<int>& independentCols, int stripeRank)
{
    int stripe_index;             // Index of the current row in stripe
    int abs_index;                // Index of the current row in matrix
    unsigned long col_mask;        // Bitmask for the current column in the stripe
    unsigned long row_bits;        // Bits of the current row in stripe
    unsigned long cursor_row_bits; // Bits of the cursor row (in stripe)
    static Row cursor_row;        // Sparse representation of cursor row outside of stripe

    cursor_row.clear();    
    cursor_row_bits = 0u; // Representation of the cursor row in the stripe

    for (int i = 0; i < (int) stripe_gray_codes.size(); ++i) {
        stripe_index = stripe_gray_codes[i].index; //stripe_index = index to row from the 2-tuple
		row_bits = stripe[stripe_index];
		abs_index = stripe_row_indices[stripe_index];
		Row& row = matrix[abs_index];
		if (cursor_row_bits != row_bits) {
		    for (int j = 0; j < stripeRank; ++j) {
		        col_mask = 1uL << independentCols[j];
				if (col_mask & (cursor_row_bits ^ row_bits)) {
				    cursor_row_bits ^= stripe[j];
				    add_rows(matrix[stripe_row_indices[j]], cursor_row);
				}
		    }
		}//if. Get the cursor row equal to the current row.

		//stripe[i] ^= cursor_row_bits; // <-- we are never going to look at this again; is zeroed out
		add_rows(cursor_row, row);
	
		if (!row.empty()) {
	    	col_entries[row.back()].push_back(abs_index);
		} // Update col_entries
    }//for i
}





/* ************************************************************************************************************************

Uses Striped Gaussian Elimination to put the matrix in row echelon form.
Aborts and returns false if system is infeasible. Otherwise returns true.
 striped_ge_consistent
     make_stripe
     find_stripe_basis
     diagonalize_stripe_basis
     sort_by_gray_code
     eliminate_stripe
     


Like striped_ge_consistent, only does not use gray codes!
striped_ge_noGrayCode_consistent()
   make_stripe
   find_stripe_basis
   diagonalize_stripe_basis
   eliminate_stripe_noGrayCodes
 



Like striped_ge_consistent, but it is ment to find rank of system.
striped_ge_rank()
  make_stripe()
  find_stripe_basis
  diagonalize_stripe_basis
  sort_by_gray_code
  eliminate_stripe2  
  
  
See LinearSystemF2.h.r5. Updates were needed for "32 vs 64bits" and small things like that.
striped_ge_original
  make_stripe
  find_stripe_basis
  diagonalize_stripe_basis
  sort_by_gray_code_origional: should be same function as in LinearSystemF2.h.r5
  eliminate_stripe_origional: should be same function as in LinearSystemF2.h.r5
  
*/






/**
 * Uses Striped Gaussian Elimination to put the matrix in row echelon form.
 * This version of SGE does not do any column pivoting.
 * Aborts and returns false if system is infeasible. Otherwise returns true.
 */
inline
bool
LinearSystem<F2>::striped_ge_consistent()
{
/**
 * We will walk the columns from RIGHT to LEFT. This allows us to use vector.push_back() vs 
 *   vecotr.push_front(). This also makes working with the bits of ints easier.
 * Matrix is of the form [b, A].
 * Will convert [b, A] to [ b * * .. *  * 1 ]
 *                        [ b * * .. 1      ]
 *                        [ ...             ]
 *                        [ b 1             ]
 *                        [ 0               ]
 */

    int s = 64;                                 // This should be an upper bound on s (chosen below)
    float singularity = 1.0;                    // Guess of number of columns needed to find an independent column
    
    int thisCol = n - 2;                        // current col
    std::vector<int> independentRows(s);        //list of rows.
    std::vector<int> independentCols(s);        //list of cols.
    std::vector<unsigned long> stripe(m);        //dense strip stored in the bits of an int.
    std::vector<unsigned long> standard_basis_patterns(s);
    std::vector<ThreeTuple> stripe_gray_codes(m); //list of (index, gray_code) tuples.
    std::vector<int> stripe_row_indices(m);     //stripe_row_indicies(r) = index of matrix row stored in stripe(r).
                                                //  This is a parallel vector to stripe.
    int stripeRank;                             //how many lin. independent rows do we have in the current stripe?
    int row, i, j;                              //for loop var's.
    unsigned long col_mask;			//for bit shifting things.
    int rank = 0;                               //number of linearly independent rows found so far
    int num_zero_cols;

    for (row = 0; row < m; ++row) {
        if (!matrix[row].empty() && matrix[row].back() == n - 1) {
		    matrix[row].pop_back();
		    matrix[row].insert(matrix[row].begin(), -1);
		}
    }// convert the matrix from [A, b] to [b, A].
    // (This is a hack to make feasibility easier to calculate for reverse col order)

    std::vector<std::vector<int> > col_entries(n); // col_entries(c) = vector of row indices for col c.
    for (row = 0; row < m; ++row) {
        if (!matrix[row].empty()) {
			if (matrix[row].back() < 0) { return false; }
    		col_entries[matrix[row].back()].push_back(row);
        }
    }//set col_entries to the row index of the first non-zero element in the column.

    //std::cout << "s: " << s << "\n";

    //start of SGE	
    while (thisCol >= 0) {
        if (col_entries[thisCol].empty()) {
	    --thisCol;
	    continue;
	}

	//std::cout << "\nsingularity = " << singularity << "\n";
        s = (int) (singularity * log2(m - rank));

	/*
	int num_rows = 0;
	for (i = 0; i < s; ++i) {
	    //std::cout << "\ntemp... " << thisCol - i << std::endl;
	    num_rows += (int) col_entries[thisCol - i].size();
	}
	*/

	i = thisCol - 1;
	j = thisCol - s + 1;
	num_zero_cols = 0;
	while (s < 64 && j >= 0 && i >= j) {
	    if (col_entries[i].empty()) {
	        ++s;
		--j;
		++num_zero_cols;
	    }
	    --i;
	}

	if (s > 64) s = 64; // 64 here is the number of bits in a computer word
	//if (s == 64) std::cout << "\n64\n";

	if (s < 1) s = 1;
	if (thisCol + 1 - s < 0) s = thisCol + 1;

	//std::cout << "\ns = " << s << "; num_rows = " << num_rows << "\n";
	
	if (s <= 1) {

	    // Do a turn of Gaussian elimination

	    // Eliminate other rows using col_entries[thisCol][0]
	    int row_choice = -1;
	    int row_density = n + m;	//upper bound.
	    std::vector<int>& colRef = col_entries[thisCol];
	    for(i = 0; i < (int) colRef.size(); ++i) {
	        if( (int) matrix[colRef[i]].size() < row_density) {
		    row_choice = colRef[i];
		    row_density = (int) matrix[colRef[i]].size();
		}
	    }//for i. find least dense row.
	    
	    int pivot_row = row_choice; //set the pivot_row to the least dense row.
	    for (i = 0; i < (int) col_entries[thisCol].size(); ++i) {
	        row = col_entries[thisCol][i];
		if (row != pivot_row) {
		    add_rows(pivot_row, row);
		    if (!matrix[row].empty()) {
		        if (matrix[row].back() < 0) { return false; }
			col_entries[matrix[row].back()].push_back(row);
		    }
		}
	    }
	    --thisCol;
	    continue;
	}//if s <= 1

        // Find up to s independent rows and columns in the current stripe

        // (Construct dense stripe representation)

	make_stripe(col_entries, thisCol, s, stripe, stripe_row_indices);

	//Now, stripe is a dense representaton for the matrix.

	// (Gaussian elimination on dense stripe)

	stripeRank = find_stripe_basis(stripe, stripe_row_indices, independentRows, independentCols);
	rank += stripeRank;

	//std::cout << "\nstripeRank / s = " << ((float) stripeRank) / ((float) s) << "\n";
	
	// Gaussian elimination on stripeRank-width band

	//diagonalize_stripe_basis(stripe, independentRows, independentCols, stripeRank);
	find_standard_basis_patterns(stripe, independentCols, stripeRank, standard_basis_patterns);

	if (stripeRank != 0) singularity = ((float) (s - num_zero_cols)) / ((float) stripeRank);

	// Sort remaining rows by Gray code
	
	sort_by_gray_code(stripe, stripe_row_indices, independentCols, stripeRank, standard_basis_patterns, stripe_gray_codes);

	//(Eliminate rows)

	if(!eliminate_stripe(stripe, stripe_row_indices, standard_basis_patterns, stripe_gray_codes, col_entries, independentCols, stripeRank)) {
	    return false;
	}

	// Reconstruct sparse entries for the s by s block

	for (i = 0; i < stripeRank; ++i) {
	    unsigned long stripe_row = stripe[i];
	    col_mask = 1uL;
	    for (j = 0; j < s; ++j) {
	        if (stripe_row & col_mask) {
		    matrix[independentRows[i]].push_back(thisCol-j);
		}
		col_mask <<= 1;
	    }
	}

	// Increment looping variables
	thisCol -= s;

    }// while (thisCol >= 0) 

    //std::cout << "rank / n = " << ((float) rank) / ((float) n) << "\n";

    //No inconsistency found.
    return true;
}




/* origional striped ge. See LinearSystemF2.h.r5. We are not diagnolizing the matrix. */

inline
bool
LinearSystem<F2>::striped_ge_original()
{
    int s = 32; // (int) (log2(m < n ? m : n) -  log2( log2( m < n ? m : n)));          //size of stripe.
    if ( s < 1)
		s = 1;	

    int thisCol = n - 2;                        // current col
    std::vector<int> independentRows(s);        //list of rows.
    std::vector<int> independentCols(s);        //list of cols.
    std::vector<unsigned long> stripe(m);        //dense strip stored in the bits of an int.
    std::vector<TwoTuple> stripe_gray_codes(m); //list of (index, gray_code) tuples.
    std::vector<int> stripe_row_indices(m);     //stripe_row_indicies(r) = index of matrix row stored in stripe(r).
                                                //  This is a parallel vector to stripe.
    int stripeRank;                             //how many lin. independent rows do we have in the current stripe?
    int row, i, j;                              //for loop var's.
    unsigned int col_mask;						//for bit shifting things.


    for (row = 0; row < m; ++row) {
        if (!matrix[row].empty() && matrix[row].back() == n - 1) {
	   		matrix[row].pop_back();
	    	matrix[row].insert(matrix[row].begin(), -1);
		}
    }// convert the matrix from [A, b] to [b, A].

    std::vector<std::vector<int> > col_entries(n); // col_entries(c) = vector of row indices for col c.
    for (row = 0; row < m; ++row) {
        if (!matrix[row].empty()) {
	    	if (matrix[row].back() < 0) { return false; }
        	col_entries[matrix[row].back()].push_back(row);
        }
    }//set col_entries to the row index of the first non-zero element in the column.

    //std::cout << "s: " << s << "\n";

    //start of SGE	
    while (thisCol >= s) {

        // Find up to s independent rows and columns in the current stripe

        // (Construct dense stripe representation)

		make_stripe(col_entries, thisCol, s, stripe, stripe_row_indices);

		//Now, stripe is a dense representaton for the matrix.

		// (Gaussian elimination on dense stripe)

		stripeRank = find_stripe_basis(stripe, stripe_row_indices, independentRows, independentCols);
	
		// Gaussian elimination on stripeRank-width band

		diagonalize_stripe_basis(stripe, independentRows, independentCols, stripeRank);

		// Elimination only if there are rows to eliminate
		if ((int) stripe.size() == stripeRank) {
		    thisCol -= s;
		    continue;
		}

        // Sort remaining rows by Gray code
			   
				    				    	
		sort_by_gray_code_origional(stripe, independentCols, stripeRank, stripe_gray_codes);

        //(Eliminate rows)

		if(!eliminate_stripe_origional(stripe, stripe_row_indices, stripe_gray_codes, col_entries, independentCols, stripeRank)) {
		    return false;
		}

		// Reconstruct sparse entries for the s by s block

		for (i = 0; i < stripeRank; ++i) {
		    unsigned int stripe_row = stripe[i];
			col_mask = 1u;
	   		for (j = 0; j < s; ++j) {
		        if (stripe_row & col_mask) {
				    matrix[independentRows[i]].push_back(thisCol-j);
				}
				col_mask <<= 1;
		    }
		}

		// Increment looping variables
		thisCol -= s;

    }//while (thisCol >= s)

    //print_triangle();

    // Gaussian elimination on remainder
    while (thisCol >= 0) {
        if (col_entries[thisCol].empty()) {
	    --thisCol;
	    continue;
		}

		// Eliminate other rows using col_entries[thisCol][0]
		int row_choice = -1;
		int row_density = n + m;	//upper bound.
		std::vector<int>& colRef = col_entries[thisCol];
		for(i = 0; i < (int) colRef.size(); ++i) {
		    if( (int) matrix[colRef[i]].size() < row_density)
		    {
		    	row_choice = colRef[i];
				row_density = (int) matrix[colRef[i]].size();
		    }
		}//for i. find least dense row.
		
		int pivot_row = row_choice; //set the pivot_row to the least dense row.
		for (i = 0; i < (int) col_entries[thisCol].size(); ++i) {
		    row = col_entries[thisCol][i];
		    if (row != pivot_row) {
		   		add_rows(pivot_row, row);
		    	if (!matrix[row].empty()) {
		            if (matrix[row].back() < 0) { return false; }
		            col_entries[matrix[row].back()].push_back(row);
				}
		    }
		}
		--thisCol;
    }//while thisCol >= 0

    //print_triangle();

    return true;
    // Check for feasibility

}
/* end of origional striped ge **/





/**
 * Uses Striped Gaussian Elimination to put the matrix in row echelon form.
 * This version of SGE does not do any column pivoting.
 * Aborts and returns false if system is infeasible. Otherwise returns true.
 * Does not use gray codes!
 */
inline
bool
LinearSystem<F2>::striped_ge_noGrayCode_consistent()
{
/**
 * We will walk the columns from RIGHT to LEFT. This allows us to use vector.push_back() vs 
 *   vecotr.push_front(). This also makes working with the bits of ints easier.
 * Matrix is of the form [b, A].
 * Will convert [b, A] to [ b * * .. *  * 1 ]
 *                        [ b * * .. 1      ]
 *                        [ ...             ]
 *                        [ b 1             ]
 *                        [ 0               ]
 */

    int s = 64;                                 // This should be an upper bound on s (chosen below)
    float singularity = 1.0;                    // Guess of number of columns needed to find an independent column
    
    int thisCol = n - 2;                        // current col
    std::vector<int> independentRows(s);        //list of rows.
    std::vector<int> independentCols(s);        //list of cols.
    std::vector<unsigned long> stripe(m);        //dense strip stored in the bits of an int.
    std::vector<ThreeTuple> stripe_gray_codes(m); //list of (index, gray_code) tuples.
    std::vector<int> stripe_row_indices(m);     //stripe_row_indicies(r) = index of matrix row stored in stripe(r).
                                                //  This is a parallel vector to stripe.
    int stripeRank;                             //how many lin. independent rows do we have in the current stripe?
    int row, i, j;                              //for loop var's.
    unsigned long col_mask;			//for bit shifting things.
    int rank = 0;                               //number of linearly independent rows found so far
    int num_zero_cols;

    for (row = 0; row < m; ++row) {
        if (!matrix[row].empty() && matrix[row].back() == n - 1) {
	 	   matrix[row].pop_back();
	 	   matrix[row].insert(matrix[row].begin(), -1);
		}
    }// convert the matrix from [A, b] to [b, A].
    // (This is a hack to make feasibility easier to calculate for reverse col order)

    std::vector<std::vector<int> > col_entries(n); // col_entries(c) = vector of row indices for col c.
    for (row = 0; row < m; ++row) {
        if (!matrix[row].empty()) {
	    if (matrix[row].back() < 0) { return false; }
            col_entries[matrix[row].back()].push_back(row);
        }
    }//set col_entries to the row index of the first non-zero element in the column.

    //std::cout << "s: " << s << "\n";

    //start of SGE	
    while (thisCol >= 0) {

        if (col_entries[thisCol].empty()) {
		    --thisCol;
		    continue;
		}

		//std::cout << "\nsingularity = " << singularity << "\n";
	    s = (int) (singularity * log2(m - rank));

		/*
		int num_rows = 0;
		for (i = 0; i < s; ++i) {
		  //std::cout << "\ntemp... " << thisCol - i << std::endl;
		    num_rows += (int) col_entries[thisCol - i].size();
		}
		*/
	
		i = thisCol - 1;
		j = thisCol - s + 1;
		num_zero_cols = 0;
		while (s < 64 && j >= 0 && i >= j) {
		    if (col_entries[i].empty()) {
		        ++s;
				--j;
				++num_zero_cols;
		    }
		    --i;
		}
	
		if (s > 64) s = 64; // 64 here is the number of bits in a computer word
		//if (s == 64) std::cout << "\n64\n";
	
		if (s < 1) s = 1;
		if (thisCol + 1 - s < 0) s = thisCol + 1;
	
		//std::cout << "\ns = " << s << "; num_rows = " << num_rows << "\n";

		if (s <= 1) {
	
		    // Do a turn of Gaussian elimination

		    // Eliminate other rows using col_entries[thisCol][0]
		    int row_choice = -1;
		    int row_density = n + m;	//upper bound.
		    std::vector<int>& colRef = col_entries[thisCol];
		    for(i = 0; i < (int) colRef.size(); ++i) {
		        if( (int) matrix[colRef[i]].size() < row_density) {
			  		row_choice = colRef[i];
			  		row_density = (int) matrix[colRef[i]].size();
				}
		    }//for i. find least dense row.
	
		    int pivot_row = row_choice; //set the pivot_row to the least dense row.
		    for (i = 0; i < (int) col_entries[thisCol].size(); ++i) {
		        row = col_entries[thisCol][i];
				if (row != pivot_row) {
				    add_rows(pivot_row, row);
				    if (!matrix[row].empty()) {
				        if (matrix[row].back() < 0) { return false; }
						col_entries[matrix[row].back()].push_back(row);
				    }
				}
		    }
		    --thisCol;
		    continue;
		}//if s <=1

		// Find up to s independent rows and columns in the current stripe
	
		// (Construct dense stripe representation)
	
		make_stripe(col_entries, thisCol, s, stripe, stripe_row_indices);

		//Now, stripe is a dense representaton for the matrix.

		// (Gaussian elimination on dense stripe)
	
		stripeRank = find_stripe_basis(stripe, stripe_row_indices, independentRows, independentCols);
		rank += stripeRank;

		//std::cout << "\nstripeRank / s = " << ((float) stripeRank) / ((float) s) << "\n";

		// Gaussian elimination on stripeRank-width band

		diagonalize_stripe_basis(stripe, independentRows, independentCols, stripeRank);

		// Elimination only if there are rows to eliminate
		if ((int) stripe.size() == stripeRank) {
		    thisCol -= s;
		    continue;
		}

		if (stripeRank != 0) singularity = ((float) (s - num_zero_cols)) / ((float) stripeRank);

	    // Sort remaining rows by Gray code
	
		//sort_by_gray_code(stripe, stripe_row_indices, independentCols, stripeRank, stripe_gray_codes);

	    //(Eliminate rows)

		if(!eliminate_stripe_noGrayCodes(stripe, stripe_row_indices, stripe_gray_codes, col_entries, independentCols, stripeRank)) {
		    return false;
		}

		// Reconstruct sparse entries for the s by s block

		for (i = 0; i < stripeRank; ++i) {
		    unsigned long stripe_row = stripe[i];
		    col_mask = 1uL;
		    for (j = 0; j < s; ++j) {
		        if (stripe_row & col_mask) {
				    matrix[independentRows[i]].push_back(thisCol-j);
				}
				col_mask <<= 1;
		    }
		}

		// Increment looping variables
		thisCol -= s;

    }//while (thisCol >= 0) 

    //std::cout << "rank / n = " << ((float) rank) / ((float) n) << "\n";

    //No inconsistency found.
    return true;
}










inline
int
LinearSystem<F2>::striped_ge_rank()
{
    int s = 64;                                 // This should be an upper bound on s (chosen below)
    float singularity = 1.0;                    // Guess of number of columns needed to find an independent column
    
    int thisCol = n - 1;                        // current col
    std::vector<int> independentRows(s);        //list of rows.
    std::vector<int> independentCols(s);        //list of cols.
    std::vector<unsigned long> stripe(m);        //dense strip stored in the bits of an int.
    std::vector<ThreeTuple> stripe_gray_codes(m); //list of (index, gray_code) tuples.
    std::vector<int> stripe_row_indices(m);     //stripe_row_indicies(r) = index of matrix row stored in stripe(r).
                                                //  This is a parallel vector to stripe.
    int stripeRank;                             //how many lin. independent rows do we have in the current stripe?
    int row, i, j;                              //for loop var's.
    int rank = 0;                               //number of linearly independent rows found so far
    int num_zero_cols;

    std::vector<std::vector<int> > col_entries(n); // col_entries(c) = vector of row indices for col c.
    for (row = 0; row < m; ++row) {
        if (!matrix[row].empty()) {
            col_entries[matrix[row].back()].push_back(row);
        }
    }//set col_entries to the row index of the first non-zero element in the column.

    //start of SGE	
    while (thisCol >= 0) {

        if (col_entries[thisCol].empty()) {
	    --thisCol;
	    continue;
	}

	s = (int) (singularity * log2(m - rank));

	i = thisCol - 1;
	j = thisCol - s + 1;
	num_zero_cols = 0;
	while (s < 64 && j >= 0 && i >= j) {
	    if (col_entries[i].empty()) {
	        ++s;
			--j;
			++num_zero_cols;
	    }
	    --i;
	}

	if (s > 64) s = 64; // 64 here is the number of bits in a computer word, er unsigned long

	if (s < 1) s = 1;
	if (thisCol + 1 - s < 0) s = thisCol + 1;

	if (s <= 1) {

	    // Do a turn of Gaussian elimination

	    // Eliminate other rows using col_entries[thisCol][0]
	    int row_choice = -1;
	    int row_density = n + m;	//upper bound.
	    std::vector<int>& colRef = col_entries[thisCol];
	    for(i = 0; i < (int) colRef.size(); ++i) {
	        if( (int) matrix[colRef[i]].size() < row_density) {
		 	   row_choice = colRef[i];
		    	row_density = (int) matrix[colRef[i]].size();
			}
	    }//for i. find least dense row.
	
	    int pivot_row = row_choice; //set the pivot_row to the least dense row.
	    for (i = 0; i < (int) col_entries[thisCol].size(); ++i) {
	        row = col_entries[thisCol][i];
			if (row != pivot_row) {
			    add_rows(pivot_row, row);
			    if (!matrix[row].empty()) {
			        if (matrix[row].back() < 0) { return false; }
					col_entries[matrix[row].back()].push_back(row);
			    }
			}
	    }
	    --thisCol;
	    continue;
	}//if s <= 1

	make_stripe(col_entries, thisCol, s, stripe, stripe_row_indices);

	stripeRank = find_stripe_basis(stripe, stripe_row_indices, independentRows, independentCols);
	rank += stripeRank;

	diagonalize_stripe_basis(stripe, independentRows, independentCols, stripeRank);

	// Elimination only if there are rows to eliminate
	if ((int) stripe.size() == stripeRank) {
	    thisCol -= s;
	    continue;
	}

	if (stripeRank != 0) singularity = ((float) (s - num_zero_cols)) / ((float) stripeRank);

	//sort_by_gray_code(stripe, stripe_row_indices, independentCols, stripeRank, stripe_gray_codes);

	eliminate_stripe2(stripe, stripe_row_indices, stripe_gray_codes, col_entries, independentCols, stripeRank);

	// Increment looping variables
	thisCol -= s;

    }

    return rank;
}

inline
void
LinearSystem<F2>::check_solution()
{
    soln.resize(n,1);
    for (int i = 0; i < m; ++i) {
        F2 v = 0;
        for (Row::iterator it = matrix[i].begin(); it != matrix[i].end(); ++it) {
            v += soln[*it];
        }
        if (v != 0) {
           std::cerr << "ERROR: Check solution failed!\n";
           exit(1);
        }
    }
    soln.resize(n-1);
}

// Print the linear system in sparse format.
inline
void
LinearSystem<F2>::print() const
{
    std::cout << m << " " << n << "\n";
    for (int i = 0; i < m; ++i) {
        const Row& r = matrix[i];
        if (!r.empty()) {
            std::cout << i << ":";
            for (size_t j = 0; j < r.size(); ++j) {
                std::cout << " " << r[j];
            }
            std::cout << "\n";
        }
    }
    std::cout << std::endl;
}

/**
 * Print the linear system in sparse format with the rows
 * sorted by the last column in which they have a non-zero
 * element.
 */
inline
void
LinearSystem<F2>::print_triangle() const
{
    std::cout << m << " " << n << "\n";
    int col = -1;
    while(col < n) {
		for (int i = 0; i < m; ++ i) {
			if (!matrix[i].empty() && matrix[i].back() == col) {
				/* This would print in dense format
				const Row& r = matrix[i];
				Row::const_iterator itr = r.begin();
				for (int j = -1; j < n - 1; ++j) {
				  if (itr == r.end() || j < *itr) { std::cout << " 0"; }
				  else { std::cout << " 1"; ++itr; }
				}
				std::cout << "\n";
				*/
				std::cout << i << ":";
				for (size_t j = 0; j < matrix[i].size(); ++j) {
					std::cout << " " << matrix[i][j];
				}
				std::cout << "\n";
	  
			}//if
      	}//for
		++col;
    }//while
    std::cout << std::endl;
}

// Print the linear system in dense format.
inline
void
LinearSystem<F2>::print_dense() const
{
    for (int i = 0; i < m; ++i) {
        const Row& r = matrix[i];
        Row::const_iterator itr = r.begin();
        for (int j = 0; j < n; ++j) {
             if (itr == r.end() || j < *itr) { std::cout << " 0"; }
             else { std::cout << " 1"; ++itr; }
        }
        std::cout << "\n";
    }
}


inline
void
LinearSystem<F2>::printSolution() const
{
    for (size_t i = 0; i < soln.size(); ++i) { std::cout << " " << soln[i]; }
    std::cout << "\n";
}



/** update n and m, clear the matrix, and resize the vectors. **/
inline
void
LinearSystem<F2>::updateRowCol(int newRowSize, int newColSize)
{

	if( newRowSize >= 0 && newColSize >= 0)	{
		clearMatrix();
		m = newRowSize;
		n = newColSize;
		
		matrix.resize(m);
	}//if.
	
}//updateRowCol()
#endif

