1\documentclass[11pt,a4paper]{article}
2
3\usepackage{amssymb}
4\usepackage{amsmath}
5\usepackage{amsthm}
6
7\usepackage{fullpage}
8
9\begin{document}
10\title{Solution of Specialized Sylvester Equation}
11\author{Ondra Kamenik}
12\date{2004}
13\maketitle
14
15\renewcommand{\vec}{\mathop{\hbox{vec}}}
16\newcommand{\otimesi}{\mathop{\overset{i}{\otimes}}}
17\newcommand{\iF}{\,^i\!F}
18\newcommand{\imF}{\,^{i-1}\!F}
19\newcommand{\solvi}{\mathbf{solv1}}
20\newcommand{\solvii}{\mathbf{solv2}}
21\newcommand{\solviip}{\mathbf{solv2p}}
22
23\newtheorem{lemma}{Lemma}
24
25Given the following matrix equation
26$$AX+BX\left(\otimesi C\right)=D,$$ where $A$ is regular $n\times n$
27matrix, $X$ is $n\times m^i$ matrix of unknowns, $B$ is singular
28$n\times n$ matrix, $C$ is $m\times m$ regular matrix with
29$|\beta(C)|<1$ (i.e. modulus of largest eigenvalue is less than one),
30$i$ is an order of Kronecker product, and finally $D$ is $n\times m^i$
31matrix.
32
33First we multiply the equation from the left by $A^{-1}$ to obtain:
34$$X+A^{-1}BX\left(\otimesi C\right)=A^{-1}D$$
35Then we find real Schur decomposition $K=UA^{-1}BU^T$, and
36$F=VCV^T$. The equation can be written as
37$$UX\left(\otimesi V^T\right) +
38KUX\left(\otimesi V^T\right)\left(\otimesi F\right) =
39UA^{-1}D\left(\otimesi V^T\right)$$
40This can be rewritten as
41$$Y + KY\left(\otimesi F\right)=\widehat{D},$$
42and vectorized
43$$\left(I+\otimesi F^T\otimes K\right)\vec(Y)=\vec(\widehat{D})$$
44Let $\iF$ denote $\otimesi F^T$ for the rest of the text.
45
46\section{Preliminary results}
47
48\begin{lemma}
49\label{sylv:first-lemma}
50For any $n\times n$ matrix $A$ and $\beta_1\beta_2>0$, if there is
51exactly one solution of
52$$\left(I_2\otimes I_n +
53\begin{pmatrix}\alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix}
54\otimes A\right)\begin{pmatrix} x_1\cr x_2\end{pmatrix} =
55\begin{pmatrix} d_1\cr d_2\end{pmatrix},$$
56then it can be obtained as solution of
57\begin{align*}
58\left(I_n + 2\alpha A+(\alpha^2+\beta^2)A^2\right)x_1 & =
59\widehat{d_1}\\
60\left(I_n + 2\alpha A+(\alpha^2+\beta^2)A^2\right)x_2 & =
61\widehat{d_2}
62\end{align*}
63where $\beta=\sqrt{\beta1\beta2}$, and
64$$
65\begin{pmatrix} \widehat{d_1}\cr\widehat{d_2}\end{pmatrix} =
66\left(I_2\otimes I_n +
67\begin{pmatrix}\alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix}
68\otimes A\right)\begin{pmatrix} d_1\cr d_2\end{pmatrix}$$
69\end{lemma}
70
71\begin{proof} Since
72$$
73\begin{pmatrix} \alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix}
74\begin{pmatrix} \alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix}
75=
76\begin{pmatrix} \alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix}
77\begin{pmatrix} \alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix}
78=
79\begin{pmatrix} \alpha^2+\beta^2&0\cr 0&\alpha^2+\beta^2\end{pmatrix},
80$$
81it is easy to see that if the equation is multiplied by
82$$I_2\otimes I_n +
83\begin{pmatrix}\alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix}
84\otimes A$$
85we obtain the result. We only need to prove that the matrix is
86regular. But this is clear because matrix
87$$\begin{pmatrix} \alpha&-\beta_1\cr\beta_2&\alpha\end{pmatrix}$$
88collapses an eigenvalue of $A$ to $-1$ iff the matrix
89$$\begin{pmatrix} \alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix}$$
90does.
91\end{proof}
92
93\begin{lemma}
94\label{sylv:second-lemma}
95For any $n\times n$ matrix $A$ and $\delta_1\delta_2>0$, if there is
96exactly one solution of
97$$\left(I_2\otimes I_n +
982\alpha\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A
99+(\alpha^2+\beta^2)
100\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}^2\otimes A^2\right)
101\begin{pmatrix} x_1\cr x_2\end{pmatrix}=
102\begin{pmatrix} d_1\cr d_2\end{pmatrix}
103$$
104it can be obtained as
105\begin{align*}
106\left(I_n+2a_1A+(a_1^2+b_1^2)A^2\right)\left(I_n+2a_2A+(a_2^2+b_2^2)A^2\right)
107x_1 & =\widehat{d_1}\\
108\left(I_n+2a_1A+(a_1^2+b_1^2)A^2\right)\left(I_n+2a_2A+(a_2^2+b_2^2)A^2\right)
109x_2 & =\widehat{d_2}
110\end{align*}
111where
112$$
113\begin{pmatrix} \widehat{d_1}\cr\widehat{d_2}\end{pmatrix} =
114\left(I_2\otimes I_n +
1152\alpha\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A
116+(\alpha^2+\beta^2)
117\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}^2\otimes A^2\right)
118\begin{pmatrix} d_1\cr d_2\end{pmatrix}
119$$
120and
121\begin{align*}
122a_1 & = \alpha\gamma - \beta\delta\\
123b_1 & = \alpha\delta + \gamma\beta\\
124a_2 & = \alpha\gamma + \beta\delta\\
125b_2 & = \alpha\delta - \gamma\beta\\
126\delta & = \sqrt{\delta_1\delta_2}
127\end{align*}
128
129\end{lemma}
130
131\begin{proof}
132The matrix can be written as
133$$\left(I_2\otimes I_n+(\alpha+\mathrm i\beta)
134\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A\right)
135\left(I_2\otimes I_n +(\alpha-\mathrm i\beta)
136\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A\right).
137$$
138Note that the both matrices are regular since their product is
139regular. For the same reason as in the previous proof, the following
140matrix is also regular
141$$\left(I_2\otimes I_n+(\alpha+\mathrm i\beta)
142\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A\right)
143\left(I_2\otimes I_n +(\alpha-\mathrm i\beta)
144\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A\right),
145$$
146and we may multiply the equation by this matrix obtaining
147$\widehat{d_1}$ and $\widehat{d_2}$. Note that the four matrices
148commute, that is why we can write the whole product as
149\begin{align*}
150\left(I_2\otimes I_n + (\alpha+\mathrm i\beta)
151\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A\right)\cdot
152\left(I_2\otimes I_n + (\alpha+\mathrm i\beta)
153\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A\right)&\cdot\\
154\left(I_2\otimes I_n + (\alpha-\mathrm i\beta)
155\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}\otimes A\right)\cdot
156\left(I_2\otimes I_n + (\alpha-\mathrm i\beta)
157\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}\otimes A\right)&=\\
158\left(I_2\otimes I_n + 2(\alpha + \mathrm i\beta)
159\begin{pmatrix}\gamma&0\cr 0&\gamma\end{pmatrix}\otimes A +
160(\alpha + \mathrm i\beta)^2
161\begin{pmatrix}\gamma^2+\delta^2&0\cr 0&\gamma^2+\delta^2\end{pmatrix}\otimes A^2
162\right)&\cdot\\
163\left(I_2\otimes I_n + 2(\alpha - \mathrm i\beta)
164\begin{pmatrix}\gamma&0\cr 0&\gamma\end{pmatrix}\otimes A +
165(\alpha - \mathrm i\beta)^2
166\begin{pmatrix}\gamma^2+\delta^2&0\cr 0&\gamma^2+\delta^2\end{pmatrix}\otimes A^2
167\right)&
168\end{align*}
169
170The product is a diagonal consiting of two $n\times n$ blocks, which are the
171same. The block can be rewritten as product:
172\begin{align*}
173(I_n+(\alpha+\mathrm i\beta)(\gamma+\mathrm i\delta)A)\cdot
174(I_n+(\alpha+\mathrm i\beta)(\gamma-\mathrm i\delta)A)&\cdot\\
175(I_n+(\alpha-\mathrm i\beta)(\gamma+\mathrm i\delta)A)\cdot
176(I_n+(\alpha-\mathrm i\beta)(\gamma-\mathrm i\delta)A)&
177\end{align*}
178and after reordering
179\begin{align*}
180(I_n+(\alpha+\mathrm i\beta)(\gamma+\mathrm i\delta)A)\cdot
181(I_n+(\alpha-\mathrm i\beta)(\gamma-\mathrm i\delta)A)&\cdot\\
182(I_n+(\alpha+\mathrm i\beta)(\gamma-\mathrm i\delta)A)\cdot
183(I_n+(\alpha-\mathrm i\beta)(\gamma+\mathrm i\delta)A)&=\\
184(I_n+2(\alpha\gamma-\beta\delta)A+
185(\alpha^2+\beta^2)(\gamma^2+\delta^2)A^2)&\cdot\\
186(I_n+2(\alpha\gamma+\beta\delta)A+
187(\alpha^2+\beta^2)(\gamma^2+\delta^2)A^2)&
188\end{align*}
189Now it suffices to compare $a_1=\alpha\gamma-\beta\delta$ and verify
190that
191\begin{align*}
192b_1^2 & = (\alpha^2+\beta^2)(\gamma^2+\delta^2)-a_1^2 =\cr
193      & = \alpha^2\gamma^2+\beta^2\gamma^2+\alpha^2\beta^2+\beta^2\delta^2-
194          (\alpha\gamma)^2+2\alpha\beta\gamma\delta-(\beta\delta)^2=\cr
195      & = (\beta\gamma)^2 + (\alpha\beta)^2 + 2\alpha\beta\gamma\delta=\cr
196      & = (\beta\gamma +\alpha\beta)^2
197\end{align*}
198For $b_2$ it is done in the same way.
199\end{proof}
200
201\section{The Algorithm}
202
203Below we define three functions of which
204$\vec(Y)=\solvi(1,\vec(\widehat{D}),i)$ provides the solution $Y$. $X$
205is then obtained as $X=U^TY\left(\otimesi V\right)$.
206
207\subsection{Synopsis}
208
209$F^T$ is $m\times m$ lower quasi-triangular matrix. Let $m_r$ be a
210number of real eigenvalues, $m_c$ number of complex pairs. Thus
211$m=m_r+2m_c$. Let $F_j$ denote
212$j$-th diagonal block of $F^T$ ($1\times 1$ or $2\times 2$ matrix) for
213$j=1,\ldots, m_r+m_c$. For a fixed $j$, let $\bar j$ denote index of the
214first column of $F_j$ in $F^T$. Whenever we write something like
215$(I_{m^i}\otimes I_n+r\iF\otimes K)x = d$, $x$ and $d$ denote column
216vectors of appropriate dimensions, and $x_{\bar j}$ is $\bar j$-th
217partition of $x$, and $x_j=(x_{\bar j}\quad x_{\bar j+1})^T$ if $j$-th
218eigenvalue is complex, and $x_j=x_{\bar j}$ if $j$-th eigenvalue is real.
219
220\subsection{Function $\solvi$}
221
222The function $x=\solvi(r,d,i)$ solves equation
223$$\left(I_{m^i}\otimes I_n+r\iF\otimes K\right)x = d.$$
224The function proceedes as follows:
225
226If $i=0$, the equation is solved directly, $K$ is upper
227quasi-triangular matrix, so this is easy.
228
229If $i>0$, then we go through diagonal blocks $F_j$ for
230$j=1,\ldots, m_r+m_c$ and perform:
231\begin{enumerate}
232\item if $F_j=(f_{\bar j\bar j}) = (f)$, then we return
233$x_j=\solvi(rf,d_{\bar j}, i-1)$. Then precalculate $y=d_j-x_j$, and
234eliminate guys below $F_j$. This is, for each $k=\bar j+1,\ldots, m$, we
235put
236$$d_k = d_k-rf_{\bar jk}\left(\imF\otimes K\right)x_{\bar j}=
237d_k - \frac{f_{\bar jk}}{f}y$$
238
239\item if $F_j=\begin{pmatrix}\alpha&\beta_1\cr -\beta_2&\alpha\end{pmatrix}$,
240we return $x_j=\solvii(r\alpha, r\beta_1, r\beta_2, d_j, i-1)$. Then
241we precalculate
242$$y=\left(\begin{pmatrix}\alpha&-\beta_1\cr \beta_2&\alpha\end{pmatrix}
243\otimes I_{m^{i-1}}\otimes I_n\right)
244\begin{pmatrix} d_{\bar j} - x_{\bar j}\cr
245         d_{\bar j+1} - x_{\bar j+1}
246\end{pmatrix}$$
247and eliminate guys below $F_j$. This is, for each $k=\bar j+2,\ldots, n$
248we put
249\begin{align*}
250d_k &= d_k - r(f_{{\bar j}k}\quad f_{{\bar j}+1 k})
251               \otimes\left(\imF\otimes K\right)x_j\\
252    &= d_k - \frac{1}{\alpha^2+\beta_1\beta_2}
253              \left((f_{{\bar j}k}\quad f_{{\bar j}+1 k})
254               \otimes I_{m^{i-1}}\otimes I_n\right)y
255\end{align*}
256
257\end{enumerate}
258
259\subsection{Function $\solvii$}
260
261The function $x=\solvii(\alpha, \beta_1, \beta_2, d, i)$ solves
262equation
263$$
264\left(I_2\otimes I_{m^i}\otimes I_n +
265\begin{pmatrix}\alpha&\beta_1\cr-\beta_2&\alpha\end{pmatrix}
266\otimes\iF\otimes K\right)x=d
267$$
268
269According to Lemma \ref{sylv:first-lemma} the function returns
270$$
271x=\begin{pmatrix}\solviip(\alpha,\beta_1\beta_2,\widehat{d_1},i)\cr
272          \solviip(\alpha,\beta_1\beta_2,\widehat{d_2},i)\end{pmatrix},
273$$
274where $\widehat{d_1}$, and $\widehat{d_2}$ are partitions of
275$\widehat{d}$ from the lemma.
276
277\subsection{Function $\solviip$}
278
279The function $x=\solviip(\alpha,\beta^2,d,i)$ solves equation
280$$
281\left(I_{m^i}\otimes I_n + 2\alpha\iF\otimes K +
282(\alpha^2+\beta^2)\iF^2\otimes K^2\right)x = d
283$$
284The function proceedes as follows:
285
286If $i=0$, the matrix $I_n+2\alpha K+(\alpha^2+\beta^2)K^2$ is
287calculated and the solution is obtained directly.
288
289Now note that diagonal blocks of $F^{2T}$ are of the form $F_j^2$,
290since if the $F^T$ is block partitioned according to diagonal blocks,
291then it is lower triangular.
292
293If $i>0$, then we go through diagonal blocks $F_j$ for $j=1,\ldots, m_r+m_c$
294and perform:
295\begin{enumerate}
296\item if $F_j=(f_{\bar j\bar j})=(f)$ then $j$-th diagonal block of
297$$
298I_{m^i}\otimes I_n + 2\alpha\iF\otimes K +
299(\alpha^2+\beta^2)\iF^2\otimes K^2
300$$
301takes the form
302$$
303I_{m^{i-1}}\otimes I_n +2\alpha f\imF\otimes K +
304(\alpha^2+\beta^2)f^2\imF^2\otimes K^2
305$$
306and we can put $x_j = \solviip(f\alpha,f^2\beta^2,d_j,i-1)$.
307
308Then we need to eliminate guys below $F_j$. Note that $|f^2|<|f|$,
309therefore we precalculate $y_2=(\alpha^2+\beta^2)f^2(\imF^2\otimes K^2)x_j$,
310and then precalculate
311$$y_1 = 2\alpha f(\imF\otimes K)x_j = d_j-x_j-y_2.$$
312Let $g_{pq}$ denote element of $F^{2T}$ at position $(q,p)$.
313The elimination is done by going through $k=\bar j+1,\ldots, m$ and
314putting
315\begin{align*}
316d_k &= d_k - \left(2\alpha f_{\bar j k}\imF\otimes K +
317(\alpha^2+\beta^2)g_{\bar j k}\imF^2\otimes K^2\right)x_j\\
318    &= d_k - \frac{f_{\bar j k}}{f}y_1 -
319       \frac{g_{\bar j k}}{f^2}y_2
320\end{align*}
321
322\item if $F_j=\begin{pmatrix}\gamma&\delta_1\cr -\delta_2&\gamma\end{pmatrix}$,
323then $j$-th diagonal block of
324$$
325I_{m^i}\otimes I_n + 2\alpha\iF\otimes K +
326(\alpha^2+\beta^2)\iF^2\otimes K^2
327$$
328takes the form
329$$
330I_{m^{i-1}}\otimes I_n +2\alpha
331\begin{pmatrix}\gamma&\delta_1\cr -\delta_2&\gamma\end{pmatrix}\imF\otimes K
332+(\alpha^2+\beta^2)
333\begin{pmatrix}\gamma&\delta_1\cr -\delta_2&\gamma\end{pmatrix}^2\imF^2\otimes K^2
334$$
335According to Lemma \ref{sylv:second-lemma}, we need to calculate
336$\widehat{d}_{\bar j}$, and $\widehat{d}_{\bar j+1}$, and $a_1$,
337$b_1$, $a_2$, $b_2$. Then we obtain
338\begin{align*}
339x_{\bar j}&=
340 \solviip(a_1,b_1^2,\solviip(a_2,b_2^2,\widehat{d}_{\bar j},i-1),i-1)\\
341x_{\bar j+1}&=
342 \solviip(a_1,b_1^2,\solviip(a_2,b_2^2,\widehat{d}_{\bar j+1},i-1),i-1)
343\end{align*}
344
345Now we need to eliminate guys below $F_j$. Since $\Vert F^2_j\Vert <
346\Vert F_j\Vert$, we precalculate
347
348\begin{align*}
349y_2&=(\alpha^2+\beta^2)(\gamma^2+\delta^2)
350\left(I_2\otimes\imF^2\otimes K^2\right)x_j\\
351y_1&=2\alpha(\gamma^2+\delta^2)\left(I_2\otimes\imF\otimes
352K\right)x_j\\
353   &=(\gamma^2+\delta^2)
354      \left(F_j^{-1}
355         \otimes I_{m^{i-1}n}\right)
356      \left(d_j-x_j-\frac{1}{(\gamma^2+\delta^2)}
357        \left(
358         F_j^2
359         \otimes I_{m^{i-1}n}\right)y_2\right)\\
360   &=\left(\begin{pmatrix}\gamma&-\delta_1\cr\delta_2&\gamma\end{pmatrix}
361           \otimes I_{m^{i-1}n}\right)(d_j-x_j)
362     -\left(\begin{pmatrix}\gamma&\delta_1\cr-\delta_2&\gamma\end{pmatrix}
363           \otimes I_{m^{i-1}n}\right)y_2
364\end{align*}
365
366Then we go through all $k=\bar j+2,\ldots, m$. For clearer formulas, let
367$\mathbf f_k$ denote pair of $F^T$ elements in $k$-th line below $F_j$,
368this is $\mathbf f_k=(f_{\bar jk}\quad f_{\bar j+1k})$. And let $\mathbf g_k$
369denote the same for $F^{2T}$, this is $\mathbf g_k=(g_{\bar jk}\quad
370g_{\bar j+1k})$. For each $k$ we put
371
372\begin{align*}
373d_k &= d_k - \left(2\alpha\mathbf f_k\otimes
374                   \imF\otimes K +
375                   (\alpha^2+\beta^2)\mathbf g_k\otimes
376                   \imF^2\otimes K^2\right)x_j\\
377    &= d_k - \frac{1}{\gamma^2+\delta^2}
378             \left(\mathbf f_k\otimes
379                   I_{m^{i-1}n}\right)y_1
380           - \frac{1}{\gamma^2+\delta^2}
381             \left(\mathbf g_k\otimes
382                   I_{m^{i-1}n}\right)y_2
383\end{align*}
384
385\end{enumerate}
386
387\section{Final Notes}
388
389\subsection{Numerical Issues of $A^{-1}B$}
390
391We began the solution of the Sylvester equation with multiplication by
392$A^{-1}$.  This can introduce numerical errors, and we need more
393numerically stable supplement. Its aim is to make $A$ and $B$
394commutative, this is we need to find a regular matrix $P$, such that
395$(PA)(PB)=(PB)(PA)$. Recall that this is neccessary in solution of
396$$
397(I_2\otimes I_{m^i}\otimes (PA)+(D+C)\otimes\,\iF\otimes (PB))x=d,
398$$
399since this equation is
400multiplied by $I_2\otimes I_{m^i}\otimes (PA)+(D-C)\otimes\,\iF\otimes PB$,
401and the diagonal
402result
403$$
404I_2\otimes I_{m^i}\otimes (PAPA) + 2D\otimes\,\iF\otimes (PAPB) +
405(D^2-C^2)\otimes\,\iF^2\otimes (PBPB)
406$$
407is obtained only if
408$(PA)(PB)=(PB)(PA)$.
409
410Finding regular solution of $(PA)(PB)=(PB)(PA)$ is equivalent to
411finding regular solution of $APB-BPA=0$. Numerical error of the former
412equation is $P$-times greater than the numerical error of the latter
413equation. And the numerical error of the latter equation also grows
414with the size of $P$. On the other hand, truncation error in $P$
415multiplication decreases with growing the size of $P$. By intuition,
416stability analysis will show that the best choice is some orthonormal
417$P$.
418
419Obviously, since $A$ is regular, the equation $APB-BPA=0$ has solution
420of the form $P=\alpha A^{-1}$, where $\alpha\neq 0$. There is a vector
421space of all solutions $P$ (including singular ones). In precise
422arithmetics, its dimension is $\sum n^2_i$, where $n_i$ is number of
423repetitions of $i$-th eigenvalue of $A^{-1}B$ which is similar to
424$BA^{-1}$. In floating point arithmetics, without any further
425knowledge about $A$, and $B$, we are only sure about dimension $n$
426which is implied by similarity of $A^{-1}B$ and $BA^{-1}$. Now we try
427to find the base of the vector space of solutions.
428
429Let $L$ denote the following linear operator:
430$$L(X)=(AXB-BXA)^T.$$
431
432Let $\vec(X)$ denote a vector made by stacking all the
433columns of $X$. Let $T_n$ denote $n^2\times n^2$ matrix representing
434operator $\vec(X)\mapsto \vec(X^T)$. And
435finally let $M$ denote $n^2\times n^2$ matrix represening the operator
436$L$. It is not difficult to verify that:
437$$M=T_n(B^T\otimes A - A^T\otimes B)$$
438Now we show that $M$ is skew symmetric. Recall that $T_n(X\otimes
439Y)=(Y\otimes X)T_n$, we have:
440$$M^T=(B^T\otimes A - A^T\otimes B)^TT_n=(B\otimes A^T - A\otimes B^T)T_n=
441T_n(A^T\otimes B - B^T\otimes A) = -M$$
442
443We try to solve $M\vec(X) = T_n(0) = 0$. Since $M$ is
444skew symmetric, there is real orthonormal matrix $Q$, such that
445$M=Q\widehat{M}Q^T$, and $\widehat{M}$ is block diagonal matrix
446consisting of $2\times 2$ blocks of the form
447$$\left(\begin{matrix} 0&\alpha_i\cr-\alpha_i&0\end{matrix}\right),$$
448and of additional zero, if $n^2$ is odd.
449
450Now we solve equation $\widehat{M}y=0$, where $y=Q^T\vec(X)$. Now
451there are $n$ zero rows in $\widehat{M}$ coming from similarity of
452$A^{-1}B$ and $BA^{-1}$ (structural zeros). Note that the additional
453zero for odd $n^2$ is already included in that number, since for odd
454$n^2$ is $n^2-n$ even. Besides those, there are also zeros (esp. in
455floating point arithmetics), coming from repetitive (or close)
456eigenvalues of $A^{-1}B$. If we are able to select the rows with the
457structural zeros, a solution is obtained by picking arbitrary numbers
458for the same positions in $y$, and put $\vec(X)=Qy$.
459
460The following questions need to be answered:
461\begin{enumerate}
462\item How to recognize the structural rows?
463\item Is $A^{-1}$ generated by a $y$, which has non-zero elements only
464on structural rows? Note that $A$ can have repetitive eigenvalues. The
465positive answer to the question implies that in each $n$-partition of
466$y$ there is exactly one structural row.
467\item And very difficult one: How to pick $y$ so that $X$ would be
468regular, or even close to orthonormal (pure orthonormality
469overdeterminates $y$)?
470\end{enumerate}
471
472\subsection{Making Zeros in $F$}
473
474It is clear that the numerical complexity of the proposed algorithm
475strongly depends on a number of non-zero elements in the Schur factor
476$F$. If we were able to find $P$, such that $PFP^{-1}$ has
477substantially less zeros than $F$, then the computation would be
478substantially faster. However, it seems that we have to pay price for
479any additional zero in terms of less numerical stability of $PFP^{-1}$
480multiplication. Consider $P$, and $F$ in form
481$$P=\begin{pmatrix} I &X\cr 0&I\end{pmatrix},
482\qquad F=\begin{pmatrix} A&C\cr 0&B\end{pmatrix}$$
483we obtain
484$$PFP^{-1}=\begin{pmatrix} A& C + XB - AX\cr 0&B \end{pmatrix}$$
485
486Thus, we need to solve $C = AX - XB$. Its clear that numerical
487stability of operator $Y\mapsto PYP^{-1}$ and its inverse $Y\mapsto
488P^{-1}YP$ is worse with growing norm $\Vert X\Vert$. The norm can be
489as large as $\Vert F\Vert/\delta$, where $\delta$ is a distance of
490eigenspectra of $A$ and $B$. Also, a numerical error of the solution is
491proportional to $\Vert C\Vert/\delta$.
492
493Although, these difficulties cannot be overcome completely, we may
494introduce an algorithm, which works on $F$ with ordered eigenvalues on
495diagonal, and seeks such partitioning to maximize $\delta$ and
496minimize $C$. If the partitioning is found, the algorithm finds $P$
497and then is run for $A$ and $B$ blocks. It stops when further
498partitioning is not possible without breaking some user given limit
499for numerical errors. We have to keep in mind that the numerical
500errors are accumulated in product of all $P$'s of every step.
501
502\subsection{Exploiting constant rows in $F$}
503
504If some of $F$'s rows consists of the same numbers, or a number of
505distict values within a row is small, then this structure can be
506easily exploited in the algorithm. Recall, that in both functions
507$\solvi$, and $\solviip$, we eliminate guys below diagonal element (or
508block) (of $F^T$), by multiplying solution of the diagonal and
509cancelling it from right side. If the elements below the diagonal
510block are the same, we save one vector multiplication. Note that in
511$\solviip$ we still need to multiply by elements below diagonal of the
512matrix $F^{T2}$, which obviously has not the property. However, the
513heaviest elimination is done at the very top level, in the first call
514to $\solvi$.
515
516Another way of exploitation the property is to proceed all
517calculations in complex numbers. In that case, only $\solvi$ is run.
518
519How the structure can be introduced into the matrix? Following the
520same notation as in previous section, we solve $C = AX - XB$ in order
521to obtain zeros at place of $C$. If it is not possible, we may relax
522the equation by solving $C - R = AX - XB$, where $R$ is suitable
523matrix with constant rows. The matrix $R$ minimizes $\Vert C-R\Vert$
524in order to minimize $\Vert X\Vert$ if $A$, and $B$ are given. Now, in
525the next step we need to introduce zeros (or constant rows) to matrix
526$A$, so we seek for regular matrix $P$, doing the
527job. If found, the product looks like:
528$$\begin{pmatrix} P&0\cr 0&I\end{pmatrix}\begin{pmatrix} A&R\cr 0&B\end{pmatrix}
529\begin{pmatrix} P^{-1}&0\cr 0&I\end{pmatrix}=
530\begin{pmatrix} PAP^{-1}&PR\cr 0&B\end{pmatrix}$$
531Now note, that matrix $PR$ has also constant rows. Thus,
532preconditioning of the matrix in upper left corner doesn't affect the
533property. However, a preconditioning of the matrix in lower right
534corner breaks the property, since we would obtain $RP^{-1}$.
535
536\end{document}
537