function result = band_Arnoldi(A,R,nmax,tol,n,result)
%
%  Band Arnoldi process
%  
%  This implementation is essentially Algorithm 6.1 in
%  
%  Roland W. Freund, Model reduction methods based on Krylov subspaces, 
%  Acta Numerica, 12 (2003), pp. 267-319.
%
% -----------------------------------------------------------------------------
%  
%  Notes:  1) The matrices A and R are allowed to be complex.
%
%          2) The orthogonalizations performed in the algorithm are with 
%             respect to the Euclidean inner product
% 
%                   (w,v) = w^H v (= w' * v),
%
%             where w^H (= w') denotes the complex conjugate transpose of w.
%
%          3) The entries above and on the diagonal of the k-th column of the
%             Arnoldi matrix H are computed via (modified) Gram-Schmidt 
%             orthogonalization of the vector A v_k against v_1, v_2, ..., v_k;
%             the entries to the left of the diagonal of the k-th row of H are 
%             computed via inner products and norms of candidate vectors and
%             previously deflated vectors.
%
% -----------------------------------------------------------------------------
%
%  Usage:  result = band_Arnoldi(A,R,nmax)
%          result = band_Arnoldi(A,R,nmax,tol)
%          result = band_Arnoldi(A,R,nmax,tol,n,result)
% 
%          where A is a matrix, or,
%
%          result = band_Arnoldi(@(x) afun(x,...),R,nmax)
%          result = band_Arnoldi(@(x) afun(x,...),R,nmax,tol)
%          result = band_Arnoldi(@(x) afun(x,...),R,nmax,tol,n,result) 
%
%          where "afun" is a function such that 
%
%          y = afun(x,...)
%
%          computes the matrix-vector product y = A x (= A * x) with A
%
% -----------------------------------------------------------------------------
%         
%  Required inputs:  A = a square matrix or an (anonymous) function that 
%                        computes matrix-vector products with A
%                    R = matrix the m columns of which are the starting vectors
%                 nmax = maximum number of Arnoldi vectors to be generated
%
%                 It is assumed that A is a square matrix and that A and R have
%                 the same number of rows;  these assumptions are checked when
%                 the input A is a matrix, but not when A is a function. 
%
%  Optional inputs:  tol = a structure that contains tolerances and parameters
%                          for the deflation procedure
%                      n = a nonnegative integer;  n > 0 means that a previous
%                          call to the function "band_Arnoldi" has generated n
%                          Arnoldi vectors and that the current call to 
%                          "band_Arnoldi" resumes the iteration at step n+1;
%                          n = 0 means that the band Arnoldi process is started
%                          from scratch
%                 result = the output structure of the previous call to
%                          "band_Arnoldi" if n > 0;  if n = 0, this input is
%                          ignored
%						       
%                 If "tol" is provided as an input, it needs to contain 
%
%                    tol.defl_tol = unscaled deflation tolerance
%
%                 and values of the following two flags:
%
%                    tol.defl_flag = 0  (use unscaled deflation tolerance)
%                                    1  (use scaled deflation tolerance)
%                                    2  (use scaled deflation tolerance only
%                                                   for the starting block R)
%                                    3  (use scaled deflation tolerance except
%                                                     for the starting block R)
%
%                    tol.normA_flag = 1  (an estimate for the norm of A is
%                                           generated within the algorithm)
%                                     2  (an estimate for the norm of A is
%                                                    provided as tol.normA)
%
%                 If tol.normA_flag = 2, then an estimate for the norm of A 
%                 needs to be provided as 
%
%                    tol.normA
%
%                 If "tol" is not provided as an input, the following default 
%                 values are used:
%
%                    tol.defl_tol = sqrt(eps)  (eps = machine precision)
%                    tol.defl_flag = 1
%                    tol.normA_flag = 1
%
%                 If n is not provided as an input, n = 0 is used
%                 and the optional input "result" is ignored.
%
%                 If n > 0, the input "result" needs to be provided. It is 
%                 assumed, but not checked, that "result" is the output 
%                 structure of a previous call to the function "band_Arnoldi"
%                 (applied to the same matrices A and R).  In this case, the
%                 input "tol" is ignored and
%              
%                    tol = result.tol 
%
%                 is used instead.
%
% -----------------------------------------------------------------------------
%					     
%  On return, "result" is a structure the fields of which include
%  the following quantities:
%
%         result.n = number of Arnoldi vectors that were generated
%
%         result.V = matrix V the n columns of which are the Arnoldi vectors  
%
%   result.Vh_defl = matrix Vh_defl the columns of which are the candidate
%                    vectors for the next mc Arnoldi vectors and the m-mc 
%                    deflated vectors 
%      
%         result.H = the n x n matrix H that represents the projection of the
%                    matrix A onto the subspace spanned by the columns of V; 
%                    A, V, and H are connected via the relation 
%                   
%                             V' * A * V = H
%
%       result.rho = the n x m matrix rho that contains the coefficients used 
%                    to turn the starting vectors (in R) into the first Arnoldi
%                    vectors;  R, V, and rho are connected via the relation
%               
%                             V' * R = rho
%
% -----------------------------------------------------------------------------
%
%  This routine can be run in incremental fashion.
%
%  Example 1:  
%
%      result = band_Arnoldi(A,R,nmax0,tol)
%    
%      n = result.n
%              
%      result = band_Arnoldi(A,R,nmax,tol,n,result)
% 
%      The first call to the function "band_Arnoldi" runs the band Arnoldi 
%      process from scratch and generates n Arnoldi vectors.
% 
%      The second call to the function "band_Arnoldi" resumes the iteration at
%      step n+1.
%  
%  Example 2 (Band Arnoldi process, run one step at a time):
%
%      result = band_Arnoldi(A,R,1,tol,0,[])
% 
%      for n = 1 : nmax - 1,
%
%         result = band_Arnoldi(A,R,n+1,[],result.n,result)
%                
%      end
% 
%      This will run the band Arnoldi process for nmax steps.
%
% -----------------------------------------------------------------------------
%
%  BANDITS: a Matlab Package of Band Krylov Subspace Iterations
%
%  Copyright (c) 2018-2019 Roland W. Freund
%  See LICENSE.txt for license
%
% -----------------------------------------------------------------------------

%  =======================
%  Begin of initialization
%  =======================
%
if nargin < 3,
   error('** Not enough input arguments! **')
end
%
if (nargin < 4) || isempty(tol),
   tol.defl_flag = 1;
   tol.defl_tol = sqrt(eps);
   tol.normA_flag = 1;
end
%
if (nargin < 5) || isempty(n),
   n = 0;
else
   if (n < 0) || (rem(n,1) ~= 0),
      error('** n needs to be a nonnegative integer **')
   end   
end
%
if nmax <= n,
   error('** nmax is not large enough;  we need to have nmax > n **')
end  
%
[N,m] = size(R);
% 
if isfloat(A) == 1,
   mvec_A = 1;
   [Nt1,Nt2] = size(A);
   if Nt1 ~= Nt2,
      error('** The matrix A is not square **')
   end
   if Nt1 ~= N,
      error('** The matrices A and R need to have the same number of rows **')
   end
else
   mvec_A = 0;
end
%
if n > 0,
   if (nargin < 6) || isempty(result),
      error('** n > 0, but there is no input "result" **')
   end 
   V = result.V;
   Vh_defl = result.Vh_defl;
   rho = result.rho;
   H = result.H;
   mc = result.mc;
   Iv = result.Iv;
   n_check = result.n;
   tol = result.tol;
   exh_flag = result.exh_flag;
%  
   if exh_flag > 0,
      fprintf(' \n')
      disp('**---------------------------------**')    
      disp('** Previous run ended due to an    **')
      disp('** exhausted block Krylov subspace **')     
      disp('**---------------------------------**')
      fprintf(' \n') 
      return
   end  
%
   if n ~= n_check,
      error('** n does not match the value of n in result **')
   end
%  
   n1 = n + 1;
%
else
   V = zeros(N,0);
   Vh_defl(:,1:m) = R;
   rho = [];
   H = [];
   mc = m;
   Iv.ph = [1:m];
   Iv.I = [];
   Iv.pd = [];  
   Iv.nd = 0;
   n1 = 1;
   exh_flag = 0;  
end
%
%  Extract and check tolerance, flags, and norm estimate for deflation check
%
[defl_tol,defl_flag,normA_flag,normA] = check_tolerances(tol,n);
%
%  ============================================
%  End of initialization and begin of iteration
%  ============================================
%
for n = n1 : nmax,
%
%  =================================
%  Construct n-th Arnoldi vector v_n
%  =================================
%
   foundvn = 0;
%   
%  If necessary, deflate v vector
%
   while foundvn == 0,
%
      [mc,foundvn,Vh_defl,Iv,normv] = deflation(0,n,m,mc,foundvn, ...
                               Vh_defl,R,Iv,defl_flag,defl_tol, ...
                                              normA);
%      
%     Check if block Krylov subspace is exhausted    
%
      if mc == 0,
%
         disp('**-----------------------------------------**')
         disp('** There are no more Krylov vectors, and   **')
         disp('** so the algorithm has to terminate: STOP **')
         disp('**-----------------------------------------**')
         disp(['  Number of Arnoldi steps performed: ' num2str(n-1)])
%
         tol.normA = normA;
         result = save_result(0,V,Vh_defl,mc,Iv,H,rho,tol);
%         
         result.n = n - 1;
         result.exh_flag = 1;
         return
%      
      end
%        
%     End of: while foundvn == 0
%
   end
%
%  Make sure rho has n rows
%
   rho(n,1) = 0;
%
%  Normalize v_n
%
   V(:,n) = Vh_defl(:,Iv.ph(1)) / normv;
   if n > mc,
      H(n,n-mc) = normv;
   else
      rho(n,n-mc+m) = normv;
   end
%
%  Orthogonalize the candidate vectors against v_n
%
   ivph1 = Iv.ph(1);
   Itmp = Iv.ph(2:mc);
   Iv.ph(1:mc-1) = Itmp;
   Iv.ph(mc) = ivph1;
%
   tmp = (V(:,n))' * Vh_defl(:,Itmp);
   Vh_defl(:,Itmp) = Vh_defl(:,Itmp) - V(:,n) * tmp; 
%  
   Ktmp = find([1:mc-1] > mc-n);
   H(n,Ktmp-mc+n) = tmp(Ktmp);
%  
   Ktmp = find([1:mc-1] <= mc-n);
   rho(n,Ktmp-mc+n+m) = tmp(Ktmp);  
%  
%  Advance block Krylov subspace by computing tmpv = A * V(:,n) (=A v_n)
%
   if mvec_A == 1,
      tmpv = A * V(:,n);
   else
      tmpv = feval(A,V(:,n));
   end
%
   if normA_flag == 1,
      normA = max([normA,norm(tmpv,2)]);
   end
%
%  Orthogonalize tmpv against previous v vectors
%
   for k = 1 : n,
      H(k,n) = (V(:,k))' * tmpv;
      tmpv   = tmpv - V(:,k) * H(k,n);
   end
%
   Vh_defl(:,Iv.ph(mc)) = tmpv;
%
%  Compute entries in H and rho due to nonzero deflated vectors
%    
   nd = Iv.nd;
   tmp = (V(:,n))' * Vh_defl(:,Iv.pd(1:nd));
%    
   Itmp = Iv.I(1:nd);
   Ktmp = find(Itmp > 0);
   H(n,Itmp(Ktmp)) = tmp(Ktmp);
%
   Ktmp = find(Itmp <= 0);
   rho(n,Itmp(Ktmp)+m) = tmp(Ktmp);
%
   result.n = n;
%
end
%
%  ================
%  End of iteration
%  ================
%
tol.normA = normA;
result = save_result(0,V,Vh_defl,mc,Iv,H,rho,tol);
%         
result.n = n;
result.exh_flag = 0;

