/*=============================================================================

    This file is part of FLINT.

    FLINT is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    FLINT is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with FLINT; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

=============================================================================*/
/******************************************************************************

    Copyright (C) 2010 Fredrik Johansson

******************************************************************************/

*******************************************************************************

    Memory management

*******************************************************************************

void nmod_mat_init(nmod_mat_t mat, slong rows, slong cols, mp_limb_t n)

    Initialises \code{mat} to a \code{rows}-by-\code{cols} matrix with 
    coefficients modulo~$n$, where $n$ can be any nonzero integer that 
    fits in a limb. All elements are set to zero.

void nmod_mat_init_set(nmod_mat_t mat, nmod_mat_t src)

    Initialises \code{mat} and sets its dimensions, modulus and elements 
    to those of \code{src}.

void nmod_mat_clear(nmod_mat_t mat)

    Clears the matrix and releases any memory it used. The matrix 
    cannot be used again until it is initialised. This function must be
    called exactly once when finished using an \code{nmod_mat_t} object.

void nmod_mat_set(nmod_mat_t mat, nmod_mat_t src)

    Sets \code{mat} to a copy of \code{src}. It is assumed 
    that \code{mat} and \code{src} have identical dimensions.

*******************************************************************************

    Basic properties and manipulation

*******************************************************************************

MACRO nmod_mat_entry(nmod_mat_t mat, slong i, slong j)

    Directly accesses the entry in \code{mat} in row $i$ and column $j$,
    indexed from zero. No bounds checking is performed. This macro can be
    used both for reading and writing coefficients.

slong nmod_mat_nrows(nmod_mat_t mat)

    Returns the number of rows in \code{mat}. This function is implemented
    as a macro.

slong nmod_mat_ncols(nmod_mat_t mat)

    Returns the number of columns in \code{mat}. This function is implemented
    as a macro.

*******************************************************************************

    Printing

*******************************************************************************

void nmod_mat_print_pretty(nmod_mat_t mat)

    Pretty-prints \code{mat} to \code{stdout}. A header is printed followed 
    by the rows enclosed in brackets. Each column is right-aligned to the 
    width of the modulus written in decimal, and the columns are separated by 
    spaces.
    For example:
    \begin{lstlisting}
    <2 x 3 integer matrix mod 2903>
    [   0    0 2607]
    [ 622    0    0]
    \end{lstlisting}

*******************************************************************************

    Random matrix generation

*******************************************************************************

void nmod_mat_randtest(nmod_mat_t mat, flint_rand_t state)

    Sets the elements to a random matrix with entries between $0$ and $m-1$
    inclusive, where $m$ is the modulus of \code{mat}. A sparse matrix is
    generated with increased probability.

void nmod_mat_randfull(nmod_mat_t mat, flint_rand_t state)

    Sets the element to random numbers likely to be close to the modulus
    of the matrix. This is used to test potential overflow-related bugs.

int nmod_mat_randpermdiag(nmod_mat_t mat, 
                        mp_limb_t * diag, slong n, flint_rand_t state)

    Sets \code{mat} to a random permutation of the diagonal matrix 
    with $n$ leading entries given by the vector \code{diag}. It is 
    assumed that the main diagonal of \code{mat} has room for at 
    least $n$ entries.

    Returns $0$ or $1$, depending on whether the permutation is even 
    or odd respectively.

void nmod_mat_randrank(nmod_mat_t mat, slong rank, flint_rand_t state)

    Sets \code{mat} to a random sparse matrix with the given rank, 
    having exactly as many non-zero elements as the rank, with the 
    non-zero elements being uniformly random integers between $0$ 
    and $m-1$ inclusive, where $m$ is the modulus of \code{mat}.

    The matrix can be transformed into a dense matrix with unchanged
    rank by subsequently calling \code{nmod_mat_randops()}.

void nmod_mat_randops(nmod_mat_t mat, slong count, flint_rand_t state)

    Randomises \code{mat} by performing elementary row or column 
    operations. More precisely, at most \code{count} random additions 
    or subtractions of distinct rows and columns will be performed. 
    This leaves the rank (and for square matrices, determinant) 
    unchanged.

void nmod_mat_randtril(nmod_mat_t mat, flint_rand_t state, int unit)

    Sets \code{mat} to a random lower triangular matrix. If \code{unit} is 1,
    it will have ones on the main diagonal, otherwise it will have random
    nonzero entries on the main diagonal.

void nmod_mat_randtriu(nmod_mat_t mat, flint_rand_t state, int unit)

    Sets \code{mat} to a random upper triangular matrix. If \code{unit} is 1,
    it will have ones on the main diagonal, otherwise it will have random
    nonzero entries on the main diagonal.


*******************************************************************************

    Comparison

*******************************************************************************

int nmod_mat_equal(nmod_mat_t mat1, nmod_mat_t mat2)

    Returns nonzero if mat1 and mat2 have the same dimensions and elements,
    and zero otherwise. The moduli are ignored.

*******************************************************************************

    Transpose

*******************************************************************************

void nmod_mat_transpose(nmod_mat_t B, nmod_mat_t A)

    Sets $B$ to the transpose of $A$. Dimensions must be compatible.
    $B$ and $A$ may be the same object if and only if the matrix is square.


*******************************************************************************

    Addition and subtraction

*******************************************************************************

void nmod_mat_add(nmod_mat_t C, nmod_mat_t A, nmod_mat_t B)

    Computes $C = A + B$. Dimensions must be identical.

void nmod_mat_sub(nmod_mat_t C, nmod_mat_t A, nmod_mat_t B)

    Computes $C = A - B$. Dimensions must be identical.

void nmod_mat_neg(nmod_mat_t A, nmod_mat_t B)

    Sets $B = -A$. Dimensions must be identical.

*******************************************************************************

    Matrix-scalar arithmetic

*******************************************************************************

void nmod_mat_scalar_mul(nmod_mat_t B, const nmod_mat_t A, mp_limb_t c)

    Sets $B = cA$, where the scalar $c$ is assumed to be reduced
    modulo the modulus. Dimensions of $A$ and $B$ must be identical.


*******************************************************************************

    Matrix multiplication

*******************************************************************************

void nmod_mat_mul(nmod_mat_t C, nmod_mat_t A, nmod_mat_t B)

    Sets $C = AB$. Dimensions must be compatible for matrix multiplication.
    $C$ is not allowed to be aliased with $A$ or $B$. This function
    automatically chooses between classical and Strassen multiplication.

void nmod_mat_mul_classical(nmod_mat_t C, nmod_mat_t A, nmod_mat_t B)

    Sets $C = AB$. Dimensions must be compatible for matrix multiplication.
    $C$ is not allowed to be aliased with $A$ or $B$. Uses classical
    matrix multiplication, creating a temporary transposed copy of $B$
    to improve memory locality if the matrices are large enough,
    and packing several entries of $B$ into each word if the modulus
    is very small.

void nmod_mat_mul_strassen(nmod_mat_t C, nmod_mat_t A, nmod_mat_t B)

    Sets $C = AB$. Dimensions must be compatible for matrix multiplication.
    $C$ is not allowed to be aliased with $A$ or $B$. Uses Strassen
    multiplication (the Strassen-Winograd variant).

void nmod_mat_addmul(nmod_mat_t D, const nmod_mat_t C,
    const nmod_mat_t A, const nmod_mat_t B)

    Sets $D = C + AB$. $C$ and $D$ may be aliased with each other but
    not with $A$ or $B$. Automatically selects between classical
    and Strassen multiplication.

void nmod_mat_submul(nmod_mat_t D, const nmod_mat_t C,
    const nmod_mat_t A, const nmod_mat_t B)

    Sets $D = C + AB$. $C$ and $D$ may be aliased with each other but
    not with $A$ or $B$.

*******************************************************************************

    Trace

*******************************************************************************

mp_limb_t nmod_mat_trace(const nmod_mat_t mat)

    Computes the trace of the matrix, i.e. the sum of the entries on
    the main diagonal. The matrix is required to be square.

*******************************************************************************

    Determinant and rank

*******************************************************************************

mp_limb_t nmod_mat_det(nmod_mat_t A)

    Returns the determinant of $A$. The modulus of $A$ must be a prime number.

slong nmod_mat_rank(nmod_mat_t A)

    Returns the rank of $A$. The modulus of $A$ must be a prime number.


*******************************************************************************

    Inverse

*******************************************************************************

int nmod_mat_inv(nmod_mat_t B, nmod_mat_t A)

    Sets $B = A^{-1}$ and returns $1$ if $A$ is invertible. 
    If $A$ is singular, returns $0$ and sets the elements of 
    $B$ to undefined values.

    $A$ and $B$ must be square matrices with the same dimensions
    and modulus. The modulus must be prime.


*******************************************************************************

    Triangular solving

*******************************************************************************

void nmod_mat_solve_tril(nmod_mat_t X, const nmod_mat_t L,
                            const nmod_mat_t B, int unit)

    Sets $X = L^{-1} B$ where $L$ is a full rank lower triangular square
    matrix. If \code{unit} = 1, $L$ is assumed to have ones on its
    main diagonal, and the main diagonal will not be read.
    $X$ and $B$ are allowed to be the same matrix, but no other
    aliasing is allowed. Automatically chooses between the classical and
    recursive algorithms.

void nmod_mat_solve_tril_classical(nmod_mat_t X, const nmod_mat_t L,
                            const nmod_mat_t B, int unit)

    Sets $X = L^{-1} B$ where $L$ is a full rank lower triangular square
    matrix. If \code{unit} = 1, $L$ is assumed to have ones on its
    main diagonal, and the main diagonal will not be read.
    $X$ and $B$ are allowed to be the same matrix, but no other
    aliasing is allowed. Uses forward substitution.

void nmod_mat_solve_tril_recursive(nmod_mat_t X, const nmod_mat_t L,
                            const nmod_mat_t B, int unit)

    Sets $X = L^{-1} B$ where $L$ is a full rank lower triangular square
    matrix. If \code{unit} = 1, $L$ is assumed to have ones on its
    main diagonal, and the main diagonal will not be read.
    $X$ and $B$ are allowed to be the same matrix, but no other
    aliasing is allowed.

    Uses the block inversion formula

    $$
    \begin{pmatrix} A & 0 \\ C & D \end{pmatrix}^{-1}
    \begin{pmatrix} X \\ Y \end{pmatrix} =
    \begin{pmatrix} A^{-1} X \\ D^{-1} ( Y - C A^{-1} X ) \end{pmatrix}
    $$

    to reduce the problem to matrix multiplication and triangular solving
    of smaller systems.

void nmod_mat_solve_triu(nmod_mat_t X, const nmod_mat_t U,
                            const nmod_mat_t B, int unit)

    Sets $X = U^{-1} B$ where $U$ is a full rank upper triangular square
    matrix. If \code{unit} = 1, $U$ is assumed to have ones on its
    main diagonal, and the main diagonal will not be read.
    $X$ and $B$ are allowed to be the same matrix, but no other
    aliasing is allowed. Automatically chooses between the classical and
    recursive algorithms.

void nmod_mat_solve_triu_classical(nmod_mat_t X, const nmod_mat_t U,
                            const nmod_mat_t B, int unit)

    Sets $X = U^{-1} B$ where $U$ is a full rank upper triangular square
    matrix. If \code{unit} = 1, $U$ is assumed to have ones on its
    main diagonal, and the main diagonal will not be read.
    $X$ and $B$ are allowed to be the same matrix, but no other
    aliasing is allowed. Uses forward substitution.

void nmod_mat_solve_triu_recursive(nmod_mat_t X, const nmod_mat_t U,
                            const nmod_mat_t B, int unit)

    Sets $X = U^{-1} B$ where $U$ is a full rank upper triangular square
    matrix. If \code{unit} = 1, $U$ is assumed to have ones on its
    main diagonal, and the main diagonal will not be read.
    $X$ and $B$ are allowed to be the same matrix, but no other
    aliasing is allowed.

    Uses the block inversion formula

    $$
    \begin{pmatrix} A & B \\ 0 & D \end{pmatrix}^{-1}
    \begin{pmatrix} X \\ Y \end{pmatrix} =
    \begin{pmatrix} A^{-1} (X - B D^{-1} Y) \\ D^{-1} Y \end{pmatrix}
    $$

    to reduce the problem to matrix multiplication and triangular solving
    of smaller systems.


*******************************************************************************

    Nonsingular square solving

*******************************************************************************

int nmod_mat_solve(nmod_mat_t X, nmod_mat_t A, nmod_mat_t B)

    Solves the matrix-matrix equation $AX = B$ over $\Z / p \Z$ where $p$
    is the modulus of $X$ which must be a prime number. $X$, $A$, and $B$
    should have the same moduli.

    Returns $1$ if $A$ has full rank; otherwise returns $0$ and sets the
    elements of $X$ to undefined values.

int nmod_mat_solve_vec(mp_limb_t * x, nmod_mat_t A, mp_limb_t * b)

    Solves the matrix-vector equation $Ax = b$ over $\Z / p \Z$ where $p$
    is the modulus of $A$ which must be a prime number.

    Returns $1$ if $A$ has full rank; otherwise returns $0$ and sets the
    elements of $x$ to undefined values.


*******************************************************************************

    LU decomposition

*******************************************************************************

slong nmod_mat_lu(slong * P, nmod_mat_t A, int rank_check)

    Computes a generalised LU decomposition $LU = PA$ of a given
    matrix $A$, returning the rank of $A$.

    If $A$ is a nonsingular square matrix, it will be overwritten with
    a unit diagonal lower triangular matrix $L$ and an upper triangular
    matrix $U$ (the diagonal of $L$ will not be stored explicitly).

    If $A$ is an arbitrary matrix of rank $r$, $U$ will be in row echelon
    form having $r$ nonzero rows, and $L$ will be lower triangular
    but truncated to $r$ columns, having implicit ones on the $r$ first
    entries of the main diagonal. All other entries will be zero.

    If a nonzero value for \code{rank_check} is passed, the
    function will abandon the output matrix in an undefined state and
    return 0 if $A$ is detected to be rank-deficient.

    This function calls \code{nmod_mat_lu_recursive}.

slong nmod_mat_lu_classical(slong * P, nmod_mat_t A, int rank_check)

    Computes a generalised LU decomposition $LU = PA$ of a given
    matrix $A$, returning the rank of $A$. The behavior of this function
    is identical to that of \code{nmod_mat_lu}. Uses Gaussian elimination.

slong nmod_mat_lu_recursive(slong * P, nmod_mat_t A, int rank_check)

    Computes a generalised LU decomposition $LU = PA$ of a given
    matrix $A$, returning the rank of $A$. The behavior of this function
    is identical to that of \code{nmod_mat_lu}. Uses recursive block
    decomposition, switching to classical Gaussian elimination for
    sufficiently small blocks.


*******************************************************************************

    Reduced row echelon form

*******************************************************************************

slong nmod_mat_rref(nmod_mat_t A)

    Puts $A$ in reduced row echelon form and returns the rank of $A$.

    The rref is computed by first obtaining an unreduced row echelon
    form via LU decomposition and then solving an additional
    triangular system.


*******************************************************************************

    Nullspace

*******************************************************************************

slong nmod_mat_nullspace(nmod_mat_t X, const nmod_mat_t A)

    Computes the nullspace of $A$ and returns the nullity.

    More precisely, this function sets $X$ to a maximum rank matrix
    such that $AX = 0$ and returns the rank of $X$. The columns of
    $X$ will form a basis for the nullspace of $A$.

    $X$ must have sufficient space to store all basis vectors
    in the nullspace.

    This function computes the reduced row echelon form and then reads
    off the basis vectors.
