1\documentclass[11pt,a4paper]{article} 2 3\usepackage{amssymb} 4\usepackage{amsmath} 5\usepackage{amsthm} 6 7\usepackage{fullpage} 8 9\begin{document} 10\title{Solution of Specialized Sylvester Equation} 11\author{Ondra Kamenik} 12\date{2004} 13\maketitle 14 15\renewcommand{\vec}{\mathop{\hbox{vec}}} 16\newcommand{\otimesi}{\mathop{\overset{i}{\otimes}}} 17\newcommand{\iF}{\,^i\!F} 18\newcommand{\imF}{\,^{i-1}\!F} 19\newcommand{\solvi}{\mathbf{solv1}} 20\newcommand{\solvii}{\mathbf{solv2}} 21\newcommand{\solviip}{\mathbf{solv2p}} 22 23\newtheorem{lemma}{Lemma} 24 25Given the following matrix equation 26$$AX+BX\left(\otimesi C\right)=D,$$ where $A$ is regular $n\times n$ 27matrix, $X$ is $n\times m^i$ matrix of unknowns, $B$ is singular 28$n\times n$ matrix, $C$ is $m\times m$ regular matrix with 29$|\beta(C)|<1$ (i.e. modulus of largest eigenvalue is less than one), 30$i$ is an order of Kronecker product, and finally $D$ is $n\times m^i$ 31matrix. 32 33First we multiply the equation from the left by $A^{-1}$ to obtain: 34$$X+A^{-1}BX\left(\otimesi C\right)=A^{-1}D$$ 35Then we find real Schur decomposition $K=UA^{-1}BU^T$, and 36$F=VCV^T$. The equation can be written as 37$$UX\left(\otimesi V^T\right) + 38KUX\left(\otimesi V^T\right)\left(\otimesi F\right) = 39UA^{-1}D\left(\otimesi V^T\right)$$ 40This can be rewritten as 41$$Y + KY\left(\otimesi F\right)=\widehat{D},$$ 42and vectorized 43$$\left(I+\otimesi F^T\otimes K\right)\vec(Y)=\vec(\widehat{D})$$ 44Let $\iF$ denote $\otimesi F^T$ for the rest of the text. 45 46\section{Preliminary results} 47 48\begin{lemma} 49\label{sylv:first-lemma} 50For any $n\times n$ matrix $A$ and $\beta_1\beta_2>0$, if there is 51exactly one solution of 52$$\left(I_2\otimes I_n + 53\begin{pmatrix}\alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix} 54\otimes A\right)\begin{pmatrix} x_1\cr x_2\end{pmatrix} = 55\begin{pmatrix} d_1\cr d_2\end{pmatrix},$$ 56then it can be obtained as solution of 57\begin{align*} 58\left(I_n + 2\alpha A+(\alpha^2+\beta^2)A^2\right)x_1 & = 59\widehat{d_1}\\ 60\left(I_n + 2\alpha A+(\alpha^2+\beta^2)A^2\right)x_2 & = 61\widehat{d_2} 62\end{align*} 63where $\beta=\sqrt{\beta1\beta2}$, and 64$$ 65\begin{pmatrix} \widehat{d_1}\cr\widehat{d_2}\end{pmatrix} = 66\left(I_2\otimes I_n + 67\begin{pmatrix}\alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix} 68\otimes A\right)\begin{pmatrix} d_1\cr d_2\end{pmatrix}$$ 69\end{lemma} 70 71\begin{proof} Since 72$$ 73\begin{pmatrix} \alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix} 74\begin{pmatrix} \alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix} 75= 76\begin{pmatrix} \alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix} 77\begin{pmatrix} \alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix} 78= 79\begin{pmatrix} \alpha^2+\beta^2&0\cr 0&\alpha^2+\beta^2\end{pmatrix}, 80$$ 81it is easy to see that if the equation is multiplied by 82$$I_2\otimes I_n + 83\begin{pmatrix}\alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix} 84\otimes A$$ 85we obtain the result. We only need to prove that the matrix is 86regular. But this is clear because matrix 87$$\begin{pmatrix} \alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix}$$ 88collapses an eigenvalue of $A$ to $-1$ iff the matrix 89$$\begin{pmatrix} \alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix}$$ 90does. 91\end{proof} 92 93\begin{lemma} 94\label{sylv:second-lemma} 95For any $n\times n$ matrix $A$ and $\delta_1\delta_2>0$, if there is 96exactly one solution of 97$$\left(I_2\otimes I_n + 982\alpha\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A 99+(\alpha^2+\beta^2) 100\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}^2\otimes A^2\right) 101\begin{pmatrix} x_1\cr x_2\end{pmatrix}= 102\begin{pmatrix} d_1\cr d_2\end{pmatrix} 103$$ 104it can be obtained as 105\begin{align*} 106\left(I_n+2a_1A+(a_1^2+b_1^2)A^2\right)\left(I_n+2a_2A+(a_2^2+b_2^2)A^2\right) 107x_1 & =\widehat{d_1}\\ 108\left(I_n+2a_1A+(a_1^2+b_1^2)A^2\right)\left(I_n+2a_2A+(a_2^2+b_2^2)A^2\right) 109x_2 & =\widehat{d_2} 110\end{align*} 111where 112$$ 113\begin{pmatrix} \widehat{d_1}\cr\widehat{d_2}\end{pmatrix} = 114\left(I_2\otimes I_n + 1152\alpha\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A 116+(\alpha^2+\beta^2) 117\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}^2\otimes A^2\right) 118\begin{pmatrix} d_1\cr d_2\end{pmatrix} 119$$ 120and 121\begin{align*} 122a_1 & = \alpha\gamma - \beta\delta\\ 123b_1 & = \alpha\delta + \gamma\beta\\ 124a_2 & = \alpha\gamma + \beta\delta\\ 125b_2 & = \alpha\delta - \gamma\beta\\ 126\delta & = \sqrt{\delta_1\delta_2} 127\end{align*} 128 129\end{lemma} 130 131\begin{proof} 132The matrix can be written as 133$$\left(I_2\otimes I_n+(\alpha+\mathrm i\beta) 134\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A\right) 135\left(I_2\otimes I_n +(\alpha-\mathrm i\beta) 136\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A\right). 137$$ 138Note that the both matrices are regular since their product is 139regular. For the same reason as in the previous proof, the following 140matrix is also regular 141$$\left(I_2\otimes I_n+(\alpha+\mathrm i\beta) 142\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A\right) 143\left(I_2\otimes I_n +(\alpha-\mathrm i\beta) 144\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A\right), 145$$ 146and we may multiply the equation by this matrix obtaining 147$\widehat{d_1}$ and $\widehat{d_2}$. Note that the four matrices 148commute, that is why we can write the whole product as 149\begin{align*} 150\left(I_2\otimes I_n + (\alpha+\mathrm i\beta) 151\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A\right)\cdot 152\left(I_2\otimes I_n + (\alpha+\mathrm i\beta) 153\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A\right)&\cdot\\ 154\left(I_2\otimes I_n + (\alpha-\mathrm i\beta) 155\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A\right)\cdot 156\left(I_2\otimes I_n + (\alpha-\mathrm i\beta) 157\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A\right)&=\\ 158\left(I_2\otimes I_n + 2(\alpha + \mathrm i\beta) 159\begin{pmatrix}\gamma&0\cr 0&\gamma\end{pmatrix}\otimes A + 160(\alpha + \mathrm i\beta)^2 161\begin{pmatrix}\gamma^2+\delta^2&0\cr 0&\gamma^2+\delta^2\end{pmatrix}\otimes A^2 162\right)&\cdot\\ 163\left(I_2\otimes I_n + 2(\alpha - \mathrm i\beta) 164\begin{pmatrix}\gamma&0\cr 0&\gamma\end{pmatrix}\otimes A + 165(\alpha - \mathrm i\beta)^2 166\begin{pmatrix}\gamma^2+\delta^2&0\cr 0&\gamma^2+\delta^2\end{pmatrix}\otimes A^2 167\right)& 168\end{align*} 169 170The product is a diagonal consiting of two $n\times n$ blocks, which are the 171same. The block can be rewritten as product: 172\begin{align*} 173(I_n+(\alpha+\mathrm i\beta)(\gamma+\mathrm i\delta)A)\cdot 174(I_n+(\alpha+\mathrm i\beta)(\gamma-\mathrm i\delta)A)&\cdot\\ 175(I_n+(\alpha-\mathrm i\beta)(\gamma+\mathrm i\delta)A)\cdot 176(I_n+(\alpha-\mathrm i\beta)(\gamma-\mathrm i\delta)A)& 177\end{align*} 178and after reordering 179\begin{align*} 180(I_n+(\alpha+\mathrm i\beta)(\gamma+\mathrm i\delta)A)\cdot 181(I_n+(\alpha-\mathrm i\beta)(\gamma-\mathrm i\delta)A)&\cdot\\ 182(I_n+(\alpha+\mathrm i\beta)(\gamma-\mathrm i\delta)A)\cdot 183(I_n+(\alpha-\mathrm i\beta)(\gamma+\mathrm i\delta)A)&=\\ 184(I_n+2(\alpha\gamma-\beta\delta)A+ 185(\alpha^2+\beta^2)(\gamma^2+\delta^2)A^2)&\cdot\\ 186(I_n+2(\alpha\gamma+\beta\delta)A+ 187(\alpha^2+\beta^2)(\gamma^2+\delta^2)A^2)& 188\end{align*} 189Now it suffices to compare $a_1=\alpha\gamma-\beta\delta$ and verify 190that 191\begin{align*} 192b_1^2 & = (\alpha^2+\beta^2)(\gamma^2+\delta^2)-a_1^2 =\cr 193 & = \alpha^2\gamma^2+\beta^2\gamma^2+\alpha^2\beta^2+\beta^2\delta^2- 194 (\alpha\gamma)^2+2\alpha\beta\gamma\delta-(\beta\delta)^2=\cr 195 & = (\beta\gamma)^2 + (\alpha\beta)^2 + 2\alpha\beta\gamma\delta=\cr 196 & = (\beta\gamma +\alpha\beta)^2 197\end{align*} 198For $b_2$ it is done in the same way. 199\end{proof} 200 201\section{The Algorithm} 202 203Below we define three functions of which 204$\vec(Y)=\solvi(1,\vec(\widehat{D}),i)$ provides the solution $Y$. $X$ 205is then obtained as $X=U^TY\left(\otimesi V\right)$. 206 207\subsection{Synopsis} 208 209$F^T$ is $m\times m$ lower quasi-triangular matrix. Let $m_r$ be a 210number of real eigenvalues, $m_c$ number of complex pairs. Thus 211$m=m_r+2m_c$. Let $F_j$ denote 212$j$-th diagonal block of $F^T$ ($1\times 1$ or $2\times 2$ matrix) for 213$j=1,\ldots, m_r+m_c$. For a fixed $j$, let $\bar j$ denote index of the 214first column of $F_j$ in $F^T$. Whenever we write something like 215$(I_{m^i}\otimes I_n+r\iF\otimes K)x = d$, $x$ and $d$ denote column 216vectors of appropriate dimensions, and $x_{\bar j}$ is $\bar j$-th 217partition of $x$, and $x_j=(x_{\bar j}\quad x_{\bar j+1})^T$ if $j$-th 218eigenvalue is complex, and $x_j=x_{\bar j}$ if $j$-th eigenvalue is real. 219 220\subsection{Function $\solvi$} 221 222The function $x=\solvi(r,d,i)$ solves equation 223$$\left(I_{m^i}\otimes I_n+r\iF\otimes K\right)x = d.$$ 224The function proceedes as follows: 225 226If $i=0$, the equation is solved directly, $K$ is upper 227quasi-triangular matrix, so this is easy. 228 229If $i>0$, then we go through diagonal blocks $F_j$ for 230$j=1,\ldots, m_r+m_c$ and perform: 231\begin{enumerate} 232\item if $F_j=(f_{\bar j\bar j}) = (f)$, then we return 233$x_j=\solvi(rf,d_{\bar j}, i-1)$. Then precalculate $y=d_j-x_j$, and 234eliminate guys below $F_j$. This is, for each $k=\bar j+1,\ldots, m$, we 235put 236$$d_k = d_k-rf_{\bar jk}\left(\imF\otimes K\right)x_{\bar j}= 237d_k - \frac{f_{\bar jk}}{f}y$$ 238 239\item if $F_j=\begin{pmatrix}\alpha&\beta_1\cr -\beta_2&\alpha\end{pmatrix}$, 240we return $x_j=\solvii(r\alpha, r\beta_1, r\beta_2, d_j, i-1)$. Then 241we precalculate 242$$y=\left(\begin{pmatrix}\alpha&-\beta_1\cr \beta_2&\alpha\end{pmatrix} 243\otimes I_{m^{i-1}}\otimes I_n\right) 244\begin{pmatrix} d_{\bar j} - x_{\bar j}\cr 245 d_{\bar j+1} - x_{\bar j+1} 246\end{pmatrix}$$ 247and eliminate guys below $F_j$. This is, for each $k=\bar j+2,\ldots, n$ 248we put 249\begin{align*} 250d_k &= d_k - r(f_{{\bar j}k}\quad f_{{\bar j}+1 k}) 251 \otimes\left(\imF\otimes K\right)x_j\\ 252 &= d_k - \frac{1}{\alpha^2+\beta_1\beta_2} 253 \left((f_{{\bar j}k}\quad f_{{\bar j}+1 k}) 254 \otimes I_{m^{i-1}}\otimes I_n\right)y 255\end{align*} 256 257\end{enumerate} 258 259\subsection{Function $\solvii$} 260 261The function $x=\solvii(\alpha, \beta_1, \beta_2, d, i)$ solves 262equation 263$$ 264\left(I_2\otimes I_{m^i}\otimes I_n + 265\begin{pmatrix}\alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix} 266\otimes\iF\otimes K\right)x=d 267$$ 268 269According to Lemma \ref{sylv:first-lemma} the function returns 270$$ 271x=\begin{pmatrix}\solviip(\alpha,\beta_1\beta_2,\widehat{d_1},i)\cr 272 \solviip(\alpha,\beta_1\beta_2,\widehat{d_2},i)\end{pmatrix}, 273$$ 274where $\widehat{d_1}$, and $\widehat{d_2}$ are partitions of 275$\widehat{d}$ from the lemma. 276 277\subsection{Function $\solviip$} 278 279The function $x=\solviip(\alpha,\beta^2,d,i)$ solves equation 280$$ 281\left(I_{m^i}\otimes I_n + 2\alpha\iF\otimes K + 282(\alpha^2+\beta^2)\iF^2\otimes K^2\right)x = d 283$$ 284The function proceedes as follows: 285 286If $i=0$, the matrix $I_n+2\alpha K+(\alpha^2+\beta^2)K^2$ is 287calculated and the solution is obtained directly. 288 289Now note that diagonal blocks of $F^{2T}$ are of the form $F_j^2$, 290since if the $F^T$ is block partitioned according to diagonal blocks, 291then it is lower triangular. 292 293If $i>0$, then we go through diagonal blocks $F_j$ for $j=1,\ldots, m_r+m_c$ 294and perform: 295\begin{enumerate} 296\item if $F_j=(f_{\bar j\bar j})=(f)$ then $j$-th diagonal block of 297$$ 298I_{m^i}\otimes I_n + 2\alpha\iF\otimes K + 299(\alpha^2+\beta^2)\iF^2\otimes K^2 300$$ 301takes the form 302$$ 303I_{m^{i-1}}\otimes I_n +2\alpha f\imF\otimes K + 304(\alpha^2+\beta^2)f^2\imF^2\otimes K^2 305$$ 306and we can put $x_j = \solviip(f\alpha,f^2\beta^2,d_j,i-1)$. 307 308Then we need to eliminate guys below $F_j$. Note that $|f^2|<|f|$, 309therefore we precalculate $y_2=(\alpha^2+\beta^2)f^2(\imF^2\otimes K^2)x_j$, 310and then precalculate 311$$y_1 = 2\alpha f(\imF\otimes K)x_j = d_j-x_j-y_2.$$ 312Let $g_{pq}$ denote element of $F^{2T}$ at position $(q,p)$. 313The elimination is done by going through $k=\bar j+1,\ldots, m$ and 314putting 315\begin{align*} 316d_k &= d_k - \left(2\alpha f_{\bar j k}\imF\otimes K + 317(\alpha^2+\beta^2)g_{\bar j k}\imF^2\otimes K^2\right)x_j\\ 318 &= d_k - \frac{f_{\bar j k}}{f}y_1 - 319 \frac{g_{\bar j k}}{f^2}y_2 320\end{align*} 321 322\item if $F_j=\begin{pmatrix}\gamma&\delta_1\cr -\delta_2&\gamma\end{pmatrix}$, 323then $j$-th diagonal block of 324$$ 325I_{m^i}\otimes I_n + 2\alpha\iF\otimes K + 326(\alpha^2+\beta^2)\iF^2\otimes K^2 327$$ 328takes the form 329$$ 330I_{m^{i-1}}\otimes I_n +2\alpha 331\begin{pmatrix}\gamma&\delta_1\cr -\delta_2&\gamma\end{pmatrix}\imF\otimes K 332+(\alpha^2+\beta^2) 333\begin{pmatrix}\gamma&\delta_1\cr -\delta_2&\gamma\end{pmatrix}^2\imF^2\otimes K^2 334$$ 335According to Lemma \ref{sylv:second-lemma}, we need to calculate 336$\widehat{d}_{\bar j}$, and $\widehat{d}_{\bar j+1}$, and $a_1$, 337$b_1$, $a_2$, $b_2$. Then we obtain 338\begin{align*} 339x_{\bar j}&= 340 \solviip(a_1,b_1^2,\solviip(a_2,b_2^2,\widehat{d}_{\bar j},i-1),i-1)\\ 341x_{\bar j+1}&= 342 \solviip(a_1,b_1^2,\solviip(a_2,b_2^2,\widehat{d}_{\bar j+1},i-1),i-1) 343\end{align*} 344 345Now we need to eliminate guys below $F_j$. Since $\Vert F^2_j\Vert < 346\Vert F_j\Vert$, we precalculate 347 348\begin{align*} 349y_2&=(\alpha^2+\beta^2)(\gamma^2+\delta^2) 350\left(I_2\otimes\imF^2\otimes K^2\right)x_j\\ 351y_1&=2\alpha(\gamma^2+\delta^2)\left(I_2\otimes\imF\otimes 352K\right)x_j\\ 353 &=(\gamma^2+\delta^2) 354 \left(F_j^{-1} 355 \otimes I_{m^{i-1}n}\right) 356 \left(d_j-x_j-\frac{1}{(\gamma^2+\delta^2)} 357 \left( 358 F_j^2 359 \otimes I_{m^{i-1}n}\right)y_2\right)\\ 360 &=\left(\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix} 361 \otimes I_{m^{i-1}n}\right)(d_j-x_j) 362 -\left(\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix} 363 \otimes I_{m^{i-1}n}\right)y_2 364\end{align*} 365 366Then we go through all $k=\bar j+2,\ldots, m$. For clearer formulas, let 367$\mathbf f_k$ denote pair of $F^T$ elements in $k$-th line below $F_j$, 368this is $\mathbf f_k=(f_{\bar jk}\quad f_{\bar j+1k})$. And let $\mathbf g_k$ 369denote the same for $F^{2T}$, this is $\mathbf g_k=(g_{\bar jk}\quad 370g_{\bar j+1k})$. For each $k$ we put 371 372\begin{align*} 373d_k &= d_k - \left(2\alpha\mathbf f_k\otimes 374 \imF\otimes K + 375 (\alpha^2+\beta^2)\mathbf g_k\otimes 376 \imF^2\otimes K^2\right)x_j\\ 377 &= d_k - \frac{1}{\gamma^2+\delta^2} 378 \left(\mathbf f_k\otimes 379 I_{m^{i-1}n}\right)y_1 380 - \frac{1}{\gamma^2+\delta^2} 381 \left(\mathbf g_k\otimes 382 I_{m^{i-1}n}\right)y_2 383\end{align*} 384 385\end{enumerate} 386 387\section{Final Notes} 388 389\subsection{Numerical Issues of $A^{-1}B$} 390 391We began the solution of the Sylvester equation with multiplication by 392$A^{-1}$. This can introduce numerical errors, and we need more 393numerically stable supplement. Its aim is to make $A$ and $B$ 394commutative, this is we need to find a regular matrix $P$, such that 395$(PA)(PB)=(PB)(PA)$. Recall that this is neccessary in solution of 396$$ 397(I_2\otimes I_{m^i}\otimes (PA)+(D+C)\otimes\,\iF\otimes (PB))x=d, 398$$ 399since this equation is 400multiplied by $I_2\otimes I_{m^i}\otimes (PA)+(D-C)\otimes\,\iF\otimes PB$, 401and the diagonal 402result 403$$ 404I_2\otimes I_{m^i}\otimes (PAPA) + 2D\otimes\,\iF\otimes (PAPB) + 405(D^2-C^2)\otimes\,\iF^2\otimes (PBPB) 406$$ 407is obtained only if 408$(PA)(PB)=(PB)(PA)$. 409 410Finding regular solution of $(PA)(PB)=(PB)(PA)$ is equivalent to 411finding regular solution of $APB-BPA=0$. Numerical error of the former 412equation is $P$-times greater than the numerical error of the latter 413equation. And the numerical error of the latter equation also grows 414with the size of $P$. On the other hand, truncation error in $P$ 415multiplication decreases with growing the size of $P$. By intuition, 416stability analysis will show that the best choice is some orthonormal 417$P$. 418 419Obviously, since $A$ is regular, the equation $APB-BPA=0$ has solution 420of the form $P=\alpha A^{-1}$, where $\alpha\neq 0$. There is a vector 421space of all solutions $P$ (including singular ones). In precise 422arithmetics, its dimension is $\sum n^2_i$, where $n_i$ is number of 423repetitions of $i$-th eigenvalue of $A^{-1}B$ which is similar to 424$BA^{-1}$. In floating point arithmetics, without any further 425knowledge about $A$, and $B$, we are only sure about dimension $n$ 426which is implied by similarity of $A^{-1}B$ and $BA^{-1}$. Now we try 427to find the base of the vector space of solutions. 428 429Let $L$ denote the following linear operator: 430$$L(X)=(AXB-BXA)^T.$$ 431 432Let $\vec(X)$ denote a vector made by stacking all the 433columns of $X$. Let $T_n$ denote $n^2\times n^2$ matrix representing 434operator $\vec(X)\mapsto \vec(X^T)$. And 435finally let $M$ denote $n^2\times n^2$ matrix represening the operator 436$L$. It is not difficult to verify that: 437$$M=T_n(B^T\otimes A - A^T\otimes B)$$ 438Now we show that $M$ is skew symmetric. Recall that $T_n(X\otimes 439Y)=(Y\otimes X)T_n$, we have: 440$$M^T=(B^T\otimes A - A^T\otimes B)^TT_n=(B\otimes A^T - A\otimes B^T)T_n= 441T_n(A^T\otimes B - B^T\otimes A) = -M$$ 442 443We try to solve $M\vec(X) = T_n(0) = 0$. Since $M$ is 444skew symmetric, there is real orthonormal matrix $Q$, such that 445$M=Q\widehat{M}Q^T$, and $\widehat{M}$ is block diagonal matrix 446consisting of $2\times 2$ blocks of the form 447$$\left(\begin{matrix} 0&\alpha_i\cr-\alpha_i&0\end{matrix}\right),$$ 448and of additional zero, if $n^2$ is odd. 449 450Now we solve equation $\widehat{M}y=0$, where $y=Q^T\vec(X)$. Now 451there are $n$ zero rows in $\widehat{M}$ coming from similarity of 452$A^{-1}B$ and $BA^{-1}$ (structural zeros). Note that the additional 453zero for odd $n^2$ is already included in that number, since for odd 454$n^2$ is $n^2-n$ even. Besides those, there are also zeros (esp. in 455floating point arithmetics), coming from repetitive (or close) 456eigenvalues of $A^{-1}B$. If we are able to select the rows with the 457structural zeros, a solution is obtained by picking arbitrary numbers 458for the same positions in $y$, and put $\vec(X)=Qy$. 459 460The following questions need to be answered: 461\begin{enumerate} 462\item How to recognize the structural rows? 463\item Is $A^{-1}$ generated by a $y$, which has non-zero elements only 464on structural rows? Note that $A$ can have repetitive eigenvalues. The 465positive answer to the question implies that in each $n$-partition of 466$y$ there is exactly one structural row. 467\item And very difficult one: How to pick $y$ so that $X$ would be 468regular, or even close to orthonormal (pure orthonormality 469overdeterminates $y$)? 470\end{enumerate} 471 472\subsection{Making Zeros in $F$} 473 474It is clear that the numerical complexity of the proposed algorithm 475strongly depends on a number of non-zero elements in the Schur factor 476$F$. If we were able to find $P$, such that $PFP^{-1}$ has 477substantially less zeros than $F$, then the computation would be 478substantially faster. However, it seems that we have to pay price for 479any additional zero in terms of less numerical stability of $PFP^{-1}$ 480multiplication. Consider $P$, and $F$ in form 481$$P=\begin{pmatrix} I &X\cr 0&I\end{pmatrix}, 482\qquad F=\begin{pmatrix} A&C\cr 0&B\end{pmatrix}$$ 483we obtain 484$$PFP^{-1}=\begin{pmatrix} A& C + XB - AX\cr 0&B \end{pmatrix}$$ 485 486Thus, we need to solve $C = AX - XB$. Its clear that numerical 487stability of operator $Y\mapsto PYP^{-1}$ and its inverse $Y\mapsto 488P^{-1}YP$ is worse with growing norm $\Vert X\Vert$. The norm can be 489as large as $\Vert F\Vert/\delta$, where $\delta$ is a distance of 490eigenspectra of $A$ and $B$. Also, a numerical error of the solution is 491proportional to $\Vert C\Vert/\delta$. 492 493Although, these difficulties cannot be overcome completely, we may 494introduce an algorithm, which works on $F$ with ordered eigenvalues on 495diagonal, and seeks such partitioning to maximize $\delta$ and 496minimize $C$. If the partitioning is found, the algorithm finds $P$ 497and then is run for $A$ and $B$ blocks. It stops when further 498partitioning is not possible without breaking some user given limit 499for numerical errors. We have to keep in mind that the numerical 500errors are accumulated in product of all $P$'s of every step. 501 502\subsection{Exploiting constant rows in $F$} 503 504If some of $F$'s rows consists of the same numbers, or a number of 505distict values within a row is small, then this structure can be 506easily exploited in the algorithm. Recall, that in both functions 507$\solvi$, and $\solviip$, we eliminate guys below diagonal element (or 508block) (of $F^T$), by multiplying solution of the diagonal and 509cancelling it from right side. If the elements below the diagonal 510block are the same, we save one vector multiplication. Note that in 511$\solviip$ we still need to multiply by elements below diagonal of the 512matrix $F^{T2}$, which obviously has not the property. However, the 513heaviest elimination is done at the very top level, in the first call 514to $\solvi$. 515 516Another way of exploitation the property is to proceed all 517calculations in complex numbers. In that case, only $\solvi$ is run. 518 519How the structure can be introduced into the matrix? Following the 520same notation as in previous section, we solve $C = AX - XB$ in order 521to obtain zeros at place of $C$. If it is not possible, we may relax 522the equation by solving $C - R = AX - XB$, where $R$ is suitable 523matrix with constant rows. The matrix $R$ minimizes $\Vert C-R\Vert$ 524in order to minimize $\Vert X\Vert$ if $A$, and $B$ are given. Now, in 525the next step we need to introduce zeros (or constant rows) to matrix 526$A$, so we seek for regular matrix $P$, doing the 527job. If found, the product looks like: 528$$\begin{pmatrix} P&0\cr 0&I\end{pmatrix}\begin{pmatrix} A&R\cr 0&B\end{pmatrix} 529\begin{pmatrix} P^{-1}&0\cr 0&I\end{pmatrix}= 530\begin{pmatrix} PAP^{-1}&PR\cr 0&B\end{pmatrix}$$ 531Now note, that matrix $PR$ has also constant rows. Thus, 532preconditioning of the matrix in upper left corner doesn't affect the 533property. However, a preconditioning of the matrix in lower right 534corner breaks the property, since we would obtain $RP^{-1}$. 535 536\end{document} 537