1\documentclass[11pt,reqno]{amsart}
2\usepackage{amsmath, amssymb, amsthm}
3\usepackage{graphicx}
4\usepackage{xtab}
5\usepackage{color}
6\usepackage{hyperref}
7\usepackage[linesnumbered,ruled]{algorithm2e}
8
9\SetKw{KwGoTo}{goto}
10
11\numberwithin{equation}{section}
12\newtheorem{theorem}{Theorem}[section]
13\newtheorem{definition}[theorem]{Definition}
14\newtheorem{lemma}[theorem]{Lemma}
15\newtheorem{remark}[theorem]{Remark}
16\newtheorem{entry}[theorem]{Entry}
17\newtheorem{corollary}[theorem]{Corollary}
18\newtheorem{proposition}[theorem]{Proposition}
19\newtheorem{example}[theorem]{Example}
20\newtheorem{myalgorithm}[theorem]{Algorithm}
21
22\addtolength{\textwidth}{1.5in}
23\addtolength{\hoffset}{-0.75in}
24%\addtolength{\textheight}{0.6in}
25%\addtolength{\voffset}{-0.3in}
26
27\newcommand\T{\rule{0pt}{4.0ex}}       % Top strut
28\newcommand\B{\rule[-2.5ex]{0pt}{0pt}} % Bottom strut
29
30\newcommand{\smat}[4] {(\begin{smallmatrix} #1 & #2 \\ #3 & #4 \end{smallmatrix} )}
31\newcommand{\mattt}[4]  { \left(\begin{array}{cc} #1 & #2 \\ #3 & #4 \end{array} \right)}
32\newcommand{\matto}[2]  { \left(\begin{array}{cc} #1 \\ #2 \end{array} \right)}
33\newcommand{\schar}[2] {( \begin{smallmatrix} #1 \\ #2 \end{smallmatrix})}
34
35\newcommand{\op}[1]  { \operatorname{ #1 }}
36\newcommand{\olbbH}[0]  { \overline{\mathbb{H}}}
37\newcommand{\olbbQ}[0]  { \overline{\mathbb{Q}}}
38\newcommand{\olG}[0]  { \overline{\Gamma}}
39\newcommand{\bbH}[0]  { \mathbb{H}}
40\newcommand{\bbC}[0]  { \mathbb{C}}
41\newcommand{\bbZ}[0]  { \mathbb{Z}}
42\newcommand{\bbF}[0]  { \mathbb{F}}
43\newcommand{\bbQ}[0]  { \mathbb{Q}}
44\newcommand{\bbR}[0]  { \mathbb{R}}
45\newcommand{\gok}[0]  { \mathfrak{k}}
46\newcommand{\goe}[0]  { \mathfrak{e}}
47\newcommand{\goR}[0]  { \mathfrak{R}}
48
49\title{Algorithms for Multivariate Polynomials}
50\author{}
51
52\begin{document}
53
54\begin{abstract}
55Algorithms for multivariate polynomials in flint are discussed.
56\end{abstract}
57
58
59\maketitle
60
61\section{Introduction}
62
63A polynomial $A \in R[x_1,\dots,x_n]$ is representation as a sums of terms
64\begin{equation*}
65A = t_1 + \cdots + t_a
66\end{equation*}
67where the terms are ordered as $t_1 > t_2 > \cdots > t_a$ according to some term ordering. The basic operations of addition and subtraction are then equivalent to a merge operation and run in time proportional to the sum of the input term counts.
68
69
70\section{Monomial Representation}
71
72The {\tt mpoly} module implements the low level packing and unpacking of exponents
73for multivariate polynomials. If the variables in the polynomial are, say,
74$x$, $y$ and $z$ with $x > y > z$ in the monomial ordering, then the monomial
75$x^a y^b z^c$ is represented as the array $\{a, b, c\}$ from the user's perspective.
76
77Polynomial exponents are stored in packed format. This means that monomials are
78actually stored as an array of integer 'fields' that may be packed within
79a machine word or across multiple machine words if needed.
80This facilitates basic operations on the monomials, and we make the following assumptions about
81the correspondence between the variables' exponents and the
82fields in the packing:
83
84\begin{enumerate}
85\item {The monomial ordering is a total ordering, i.e. 1 is the smallest.}
86\item{Multiplication of monomials corresponds to field-wise addition.}
87\item{Monomials can be compared by comparing their packed representation possibly with an xor mask on certain fields.}
88\item{The exponent of each variable is itself one of the fields.}
89\item{The fields are all non-negative.}
90\end{enumerate}
91
92For the three supported ordering {\tt ORD\_LEX}, {\tt ORD\_DEGLEX}, and {\tt ORD\_DEGREVLEX}, the
93monomial $x^a y^b z^c$ is converted into fields in the following ways (the
94least significant field is on the left, the most significant is on the right),
95and the comparison mask is shown below.
96
97\begin{verbatim}
98    ORD_LEX:         | c | b | a |         ( 3 fields)
99                      000 000 000
100
101    ORD_DEGLEX:      | c | b | a | a+b+c | ( 4 fields)
102                      000 000 000  00000
103
104    ORD_DEGREVLEX:   | a | b | c | a+b+c |  ( 4 fields)
105                      111 111 111 0000000
106\end{verbatim}
107
108If one wanted to support, for example, a block ordering which was {\tt ORD\_DEGLEX}
109in $x, y$ and {\tt ORD\_DEGREVLEX} in $z, w$ with $x>y>z>w$,
110the monomial $x^a y^b z^c w^d$ would need to be stored as
111
112\begin{verbatim}
113    | c | d | c+d | b | a | a+b |    (6 fields)
114     111 111 00000 000 000 00000
115\end{verbatim}
116
117No such interface is currently implemented.
118
119
120There is no limit to the size of the fields. The fields themselves are packed
121to a uniform bit width, usually denoted by {\tt bits} in the functions. This bit count should contain an extra sign bit used for overflow detection. Thus, if the maximum field is $15$, then the fields only fit into a packing with {\tt bits >= 5}. The total number of machine words taken by an exponent packed into fields is usually denoted by {\tt N} in the code.
122
123If {\tt bits <= FLINT\_BITS} then precisely a maximum of {\tt floor(FLINT\_BITS/bits)} number of fields may be packed into a single word. Within a word, the packing is from low to high, and unused fields (as well as unused bits) at the top of the word are zero.
124
125\section{Multiplication}
126\subsection{Dense multiplication in $\bbZ[x_1,\dots,x_n]$ or $\bbZ_p[x_1,\dots,x_n]$}\
127
128Given $A(x_1,\dots,x_n), B(x_1,\dots,x_n) \in R[x_1,\dots,x_n]$, set $r_i = 1 + \op{deg}_{x_i}(a) + \op{deg}_{x_i}(b)$. The Kronecker substitution
129\begin{equation*}
130x_1 \to x, \quad x_2 \to x^{r_1}, \quad x_3 \to x^{r_1 r_2}, \quad \dots, \quad x_n \to x^{r_1 \cdots r_{n-1}}
131\end{equation*}
132gives two univariate polynomials to multiply in $\bbZ[x]$ or $\bbZ_p[x]$. This Kronecker substitution is chosen so that it can be reversed to find $A \cdot B \in R[x_1,\dots,x_n]$ from the univariate product. The flint functions {\tt \_mpoly\_mul\_\{dense|array\}} implement such techniques. The {\tt dense} functions use the ordinary polynomial multiplication functions while the {\tt array} functions use a multiply and accumulate technique that might be better for semi-sparse polynomials.
133
134\subsection{Sparse multiplication in $\bbZ[x_1,\dots,x_n]$ or $\bbZ_p[x_1,\dots,x_n]$}\
135
136Given $A = t_1 + \cdots + t_a, B = s_1 + \cdots + s_b, \in R[x_1,\dots,x_n]$, we need to calculate all products $t_i s_j$, sort them, and combine like terms. This is done using a heap in the functions {\tt \_mpoly\_mul\_johnson} as in \cite{Johnson}. The essential idea is to read off the product terms in order from a heap. The heap never needs to become too large if one uses the relations
137\begin{equation*}
138t_i s_j > t_{i+1} s_j, \quad t_i s_j > t_i s_{j+1}\text{.}
139\end{equation*}
140
141
142\section{Division}
143
144The techniques used for multiplication (Kronecker substitutions in the dense case and heaps in the sparse case) apply to division as well.
145
146
147\section{Powering}
148
149Implements a corrected version of an algorithm called FPS in \cite{FPS}. The basic idea is to map the problem to $R[x]$ via a Kronecker substitution and use a recursion for the coefficients of $f^k$ derived from
150\begin{equation*}
151f (f^k)' = k f' (f^k)\text{.}
152\end{equation*}
153Since solving for the coefficients of $f^k$ involves division, this requires some modification for $R=\bbZ_p$.
154
155
156
157\section{Interpolation}
158
159All of the interpolation methods for $f(x_1, \dots, x_n) \in R[x_1, \dots, x_n]$ require strict degree bounds $r_i$ with $\op{deg}_{x_i}(f) < r_i$.
160
161\subsection{Dense Newton Interpolation}
162
163Straightforward, variable-by-variable, recursive, dense interpolation. Number of probes to $f$ is $\prod_i r_i$. There is only one problem with this approach.
164\begin{itemize}
165\item insufficient evaluation points
166\end{itemize}
167
168
169\subsection{Sparse Zippel Interpolation}
170Similar to Newton interpolation, but we use the assumption that monomials don't disappear under evaluation. For example, suppose $r_x, r_y, r_z$ are the strict degree bounds. We first find $f(x,1,1)$ using dense interpolation with $r_x$ values of $x$, say $x_1, \dots, x_{r_x},$. If
171\begin{equation*}
172f(x,1,1) = x^5 + 2x^2 + 1\text{,}
173\end{equation*}
174we make the assumption that
175\begin{equation*}
176f(x,y,1) = a_1(y)x^5 + a_2(y)x^2 + a_3(y)\text{,}
177\end{equation*}
178and proceed to interpolate the $a_i(y)$ using dense univariate interpolation in $y$. We need $r_y$ values of $y$, say $y_1, \dots, y_{r_y}$. For each of these values $y=y_i$ we can find the coefficients (?) in
179\begin{equation*}
180f(x,y_i,1) = (?) x^5 + (?) x^2 + (?)
181\end{equation*}
182by plugging in \emph{three} random values of $x$ and solving the linear system. To find $f(x,y,1)$ at this point the number of probes to $f$ we have used is $r_x + 3r_y$, which is probably fewer than $r_x r_y$.
183
184Now suppose we obtain
185\begin{equation*}
186f(x,y,1) = y^2 x^5 + x^2 + y^7 x^2 + y^3.
187\end{equation*}
188Make the assumption
189\begin{equation*}
190f(x,y,z) = b_1(z)y^2 x^5 + b_2(z)x^2 + b_3(z)y^7 x^2 + b_4(z)y^3\text{,}
191\end{equation*}
192and interpolate the $b_i(z)$ using dense univariate interpolation in $z$. We need $r_z$ values of $z$, say $z_1, \dots, z_{r_z}$. For each of these values $z=z_i$ we can find the coefficients (?) in
193\begin{equation*}
194f(x,y,z) = (?)y^2 x^5 + (?)x^2 + (?)y^7 x^2 + (?)y^3
195\end{equation*}
196by plugging in \emph{four} random pairs of values of $(x,y)$ and solving the linear system. To find $f(x,y,z)$ at this point the number of probes to $f$ we have used is $r_x + 3r_y + 4r_z$, which is probably fewer than $r_x r_y r_z$.
197
198This approach has an additional problems.
199\begin{itemize}
200\item insufficient evaluation points
201\item inconsistent/underdetermined linear equations
202\item associated linear algebra costs
203\end{itemize}
204
205\subsection{Sparse Interpolation with the Berlekamp-Massey Algorithm}
206Given the strict degree bounds $r_i$, in order to interpolate $f(x_1, \dots, x_n)$ it suffices to interpolate $f(\xi, \xi^{r_1}, \xi^{r_1 r_2}, \dots, \xi^{r_1 \cdots r_{n-1}})$, which is a univarate with degree bound $\prod_i r_i$. If $t$ is the number of terms of $f$, then we can summarize the probe counts of the three methods.
207\begin{enumerate}
208\item dense: $\prod_i r_i$
209\item zippel: approximately $t \cdot \sum_i r_i$.
210\item bma: $2t$.
211\end{enumerate}
212
213This approach has problems too.
214\begin{itemize}
215\item insufficient evaluation points
216\item costs of the associated linear algebra and discrete logarithms.
217\end{itemize}
218
219
220Since the presentation in \cite{BMAR} is overly complicated and does not deal with the half gcd, it seems reasonable to review the Berlekamp-Massey Algorithm here. Given a formal power series
221\begin{equation*}
222\frac{a_1}{x} + \frac{a_2}{x^2} + \frac{a_3}{x^3} + \cdots\text{,} \quad a_i \in \bbF
223\end{equation*}
224vanishing at $x = \infty$ and the fact that this power series represents a rational function, we are interested in computing this rational function. The following theorem says that we can use the extended euclidean algorithm and stop when the first remainder of degree $<\frac{n}{2}$ is obtained.
225
226\begin{theorem}
227Suppose that
228\begin{equation*}
229\frac{a_1}{x} + \frac{a_2}{x^2} + \frac{a_3}{x^3} + \cdots = -\frac{\bar{u}}{\bar{v}}
230\end{equation*}
231for some $\bar{u}, \bar{v} \in \bbF[x]$ with $\op{deg}(\bar{u}) < \op{deg}(\bar{v}) \le \frac{n}{2}$. Suppose further that
232\begin{equation}
233\label{equ_bma1}
234u x^{n} + v (a_1 x^{n-1} + a_2 x^{n-2} + \cdots + a_{n-1} x + a_{n}) = r
235\end{equation}
236for some $u, v, r \in \bbF[x]$ with $\op{deg}(u) < \op{deg}(v) \le \frac{n}{2}$ and $\op{deg}(r) < \frac{n}{2}$ and $\op{deg}(r) < \op{deg}(v)$. Then,
237\begin{equation*}
238\frac{\bar{u}}{\bar{v}} = \frac{u}{v}\text{.}
239\end{equation*}
240\end{theorem}
241\begin{proof}
242Dividing both sides of \eqref{equ_bma1} by $v x^{n}$ shows that
243\begin{equation*}
244\frac{\bar{u}}{\bar{v}} = \frac{u}{v} + O \left( \frac{1}{x^{n+1}}\right)\text{,}
245\end{equation*}
246which, on account of the degree bounds $\op{deg}(\bar{v}), \op{deg}(v) \le \frac{n}{2}$, proves the equality.
247\end{proof}
248
249This reconstruction may be applied to reconstruct an $f(\xi) = c_1 \xi^{e_1} + \cdots + c_t \xi^{e_t} \in \bbF[\xi]$ from the sequence of evaluation points
250\begin{equation*}
251a_i = f(\alpha^{s+i-1})\text{,} \quad \alpha \neq 0, \quad s \in \bbZ\text{,}
252\end{equation*}
253for in this case we have
254\begin{equation*}
255\frac{a_1}{x} + \frac{a_2}{x^2} + \frac{a_3}{x^3} + \cdots = \frac{c_1\alpha^{e_1 s}}{x - \alpha^{e_1}} + \cdots + \frac{c_t\alpha^{e_t s}}{x - \alpha^{e_t}}\text{.}
256\end{equation*}
257If this rational function is known and the $e_i$ can be found, then $f$ is known as well.
258
259The main problem with this approach is that the term bound $t$ is not known in advance. The approach we take is to calculate the $v$ in \eqref{equ_bma1} for some $n$ points $a_1, \dots, a_{n}$. Then, we add another $m$ points to form the sequence $a_1, \dots, a_{n+m}$ and calculate the corresponding $v'$. If $v=v'$, then it is likely that $v$ is the correct denominator. The extent to which previous computations may be reused is addressed in Theorem \ref{thm_nm}.
260We follow \cite{YAP} for the presentation of the half gcd. An elementary matrix is one of the form $\smat{0}{1}{1}{q}$ for $\deg(q) > 0$ and a regular matrix is a product of zero or more elementary matrices. The notation $U \overset{M}{\longrightarrow} V$ shall mean that $M$ is a regular matrix and $U=MV$. If $\deg(A)>\deg(B)$ then $\op{hgcd}(A,B)$ is defined (see \cite{YAP}) as the (unique) regular matrix $M$ such that
261\begin{gather*}
262\matto{A}{B} \overset{M}{\longrightarrow} \matto{C'}{D'}\text{,}\\
263\deg(C') \ge \frac{\deg(A)}{2} > \deg(D')\text{.}
264\end{gather*}
265
266\begin{theorem}
267\label{thm_correctness}
268Suppose that
269\begin{align*}
270\matto{A_0}{B_0} &\overset{M}{\longrightarrow} \matto{A_0'}{B_0'}\\
271\op{deg}(A_0') &> \deg(B_0')\\
272\op{deg}(A_0) &\le 2 \op{deg}(A_0')
273\end{align*}
274Then, for any $A_1$, $B_1$ with $\deg(A_1),\deg(B_1) < m$,
275\begin{align*}
276\matto{A_0 x^m + A_1}{B_0 x^m + B_1} &\overset{M}{\longrightarrow} \matto{A'}{B'}\\
277\op{deg}(A') &= m + \op{deg}(A_0')\\
278\op{deg}(B') &\le m + \op{max}( \deg(B_0'), \deg(A_0')-1)\\
279\op{deg}(B') &\le m + \op{max}( \deg(B_0'), \frac{\deg(A_0)}{2}-1)
280\end{align*}
281for some $A', B'$.
282\end{theorem}
283\begin{proof}
284This is a trivial rearrangement of Lemma 1 in \cite{YAP}.
285\end{proof}
286
287\begin{theorem}
288\label{thm_nm}
289Suppose $\deg(s_n) < n$, $\deg(s_m) < m$ and
290\begin{gather*}
291\matto{x^n}{s_n} \overset{M}{\longrightarrow} \matto{r_0}{r_1}\\
292\deg(r_0) \ge \frac{n}{2} > \deg(r_1)
293\end{gather*}
294Then, a regular matrix $M'$ (and thus $r_0', r_1'$) such that
295\begin{gather*}
296\matto{x^{n+m}}{s_n x^m + s_m} \overset{M'}{\longrightarrow} \matto{r_0'}{r_1'}\\
297\deg(r_0') \ge \frac{n+m}{2} > \deg(r_1')
298\end{gather*}
299may be calculated as follows. Define $A', B'$ by
300\begin{equation*}
301\matto{x^{n+m}}{s_n x^m + s_m} \overset{M}{\longrightarrow} \matto{A'}{B'}
302\end{equation*}
303It will be the case that $\deg(A') \ge \frac{n+m}{2}$. If $\frac{n+m}{2} > \deg(B')$, return with $M'=M$. Otherwise set $C = B'$, $D = \op{rem}(A',B')$ and $q=\op{quo}(A',B')$. Define $k := n + m - \deg(C)$. It will be the case that $0 < k \le \deg(C)$. Return with
304\begin{equation*}
305M' = M \cdot \mattt{0}{1}{1}{q} \cdot \op{hgcd} \matto{\op{quo}(C,x^k)}{\op{quo}(D,x^k)}
306\end{equation*}
307\end{theorem}
308\begin{proof}
309By Theorem \ref{thm_correctness}, $\deg(A') = m + \deg(r_0) \ge m + \frac{n}{2} \ge \frac{n+m}{2}$. Now suppose $\frac{n+m}{2} \le \deg(B')$, from which the assertion $k \le \deg(C)$ follows automatically. By Theorem \ref{thm_correctness}, $\deg(B') \le m + \max(\deg(r_1), \deg(r_0) - 1) < m + n$. Thus, the assertion $0 < k$ is proved. Finally, suppose
310\begin{gather*}
311\matto{C_0 := \op{quo}(C,x^k)}{D_0 := \op{quo}(D,x^k)} \overset{H}{\longrightarrow} \matto{C_0'}{D_0'}\text{,}\\
312\deg(C_0') \ge \frac{\deg(C_0)}{2} > \deg(D_0')\text{.}
313\end{gather*}
314If $C', D'$ are defined by
315\begin{equation*}
316\matto{C}{D} \overset{H}{\longrightarrow} \matto{C'}{D'}\text{,}
317\end{equation*}
318it suffices to prove that $\deg(C') \ge \frac{n+m}{2} > \deg(D')$. By Theorem \ref{thm_correctness},
319\begin{align*}
320\deg(C') &= k + \deg(C_0')\\
321		 &\ge k + \frac{\deg(C_0)}{2}\\
322		 &= k + \frac{\deg(C) - k}{2}\\
323		 &= \frac{n+m}{2}\text{.}
324\end{align*}
325Also by Theorem \ref{thm_correctness},
326\begin{align*}
327\deg(D') &\le k + \max(\deg(D_0'), \frac{\deg(C_0)}{2} - 1)\\
328		 & < k + \max(\frac{\deg(C_0)}{2}, \frac{\deg(C_0)}{2})\\
329		 &= \frac{n+m}{2}\text{.}
330\end{align*}
331\end{proof}
332
333
334
335\section{Greatest Common Divisor}
336
337\subsection{Dense GCD in $\bbZ_p[x_1,\dots,x_n]$}\
338Brown's algorithm \cite{Brown} is used here. This comes in two versions - a small prime version and a large prime version. These refer not to the size of the $p$'s involved, but rather to the field from which evaluation points are chosen: it can either be $\bbF_p$ or an extension of $\bbF_p$. The small prime version interpolates in each variable by choosing evaluation points from $\bbF_p$. If this fails, then the large prime method uses interpolation in $\bbF_p/(f(x_n))[x_1,\dots,x_{n-1}]$, i.e. $\bbF_q[x_1,\dots,x_{n-1}]$, for sufficiently many irreducible $f(x) \in \bbZ_p[x]$. No explicit divisibility checks need to be performed because the cofactors are reconstructed along with the GCD.
339
340\subsection{Dense GCD in $\bbZ[x_1,\dots,x_n]$}\
341We simply reconstruct the GCD from its image in $\bbZ_p[x_1,\dots,x_n]$ for sufficiently many $p$. Only large $p$'s are used, and dense GCD's in $\bbZ_p[x_1,\dots,x_n]$ only use the small prime version. Each image GCD in $\bbZ_p$ is correct and Brown's coefficient bounds \cite{Brown} are used instead of a divisibility check. Some pseudocode is Section \ref{Pseudocode}.
342
343\subsection{Sparse GCD in $R[x_1,\dots,x_n]$}\
344Assuming that we have a gcd algorithm for $R[x_1,\dots,x_m]$, we can view the inputs as elements of $R[x_1,\dots,x_m][x_{m+1},\dots,x_n]$ and use interpolation to extend this algorithm from $m$ variables to $n$ variables. Brown's algorithm corresponds to taking $m=n-1$, using univariate interpolation for the extension of $n-1$ variables to $n$ variables, and recursively solving the $n-1$ variable gcd problem with the Euclidean algorithm as the base case. Taking $m=1$ gives Zippel's approach \cite{ZIPPEL}.
345If the inputs are made primitive with respect to $x_1,\dots,x_m$ by factoring out polynomials in $R[x_{m+1},\dots,x_n]$, the gcd of the leading coefficients of the input with respect to $x_1,\dots,x_m$ may be imposed as the leading coefficient of the interpolated gcd. Finally, if the primitive part with respect to $x_1,\dots,x_m$ of this interpolated gcd divides both inputs, it must be the true gcd.
346
347Of note here is an algorithm stated slightly incorrectly in \cite{SULING} and \cite{LINZIP}; The basic idea is to reconstruct the correct leading term of the gcd $\in R[x_1,\dots,x_m]$ using some linear algebra directly instead of constructing some known multiple and then removing content. This is in {\tt fmpz\_mpolyl\_gcd\_zippel} and a rough overview is:
348\ \\
349{\tt fmpz\_mpolyl\_gcdm\_zippel}($A, B \in \bbZ_p[x_1,\dots,x_n][X]$, $n \ge 1$):\\
350\indent $\left[\begin{tabular}{l}
351The GCD is assumed to have no content w.r.t. $X$ (content in $\bbZ[x_1, \dots, x_n]$)\\
352Pick a prime $p$ and call {\tt nmod\_polyl\_gcdp\_zippel} to get an probable image of $G \mod p$ \\
353Assume that the true gcd $G$ over $\bbZ$ has the same monomials as this image mod $p$.\\
354Pick more primes $p$ and call {\tt nmod\_mpolyl\_gcds\_zippel} to get more images of $G \mod p$. \\
355Combine the images via chinese remaindering and test divisibility.
356\end{tabular}\right.$
357
358\ \\
359The ``p'' versions produce a correct gcd when the inputs have no content in $\bbF_p[x_1,\dots,x_n]$.
360\ \\
361{\tt nmod\_mpolyl\_gcdp\_zippel}($A, B \in \bbF_p[x_1,\dots,x_n][X]$, $n \ge 1$):\\
362\indent $\left[\begin{tabular}{l}
363If the GCD has content w.r.t. $X, x_1, \dots, x_1$ (content in $\bbF_p[x_n]$), fail.\\
364Pick an evaluation point $x_n \to \alpha$ for $\alpha \in \bbF_p$.\\
365(1) Call {\tt nmod\_mpolyl\_gcdp\_zippel} recursively on the evaluated inputs in $\bbF_p[x_1,\dots,x_{n-1}][X]$.\\
366Record the form $f$ of the GCD obtained for step (2) below.\\
367Pick severial evaluation points $x_n \to \alpha$ for $\alpha \in \bbF_q$.\\
368(2) Call {\tt [fq\_]nmod\_mpoly\_gcds\_zippel} on the evaluated inputs in $\bbF_q[x_1,\dots,x_{n-1}][X]$.\\
369Combine the answer from (1) and the answers from (2) via interpolation in $x_n$.\\
370Check divisibility on the proposed interpolated GCD.
371\end{tabular}\right.$
372
373\ \\
374The ``s'' versions are the heart of Zippel's sparse interpolation.
375\ \\
376{\tt nmod\_mpolyl\_gcds\_zippel}($A, B \in \bbF_q[x_1,\dots,x_n][X]$, assumed monomial form $f$ of gcd):\\
377\indent $\left[\begin{tabular}{l}
378Via evaluations of the form $(x_1,\dots,x_n) \to (\alpha_1,\dots,\alpha_n) \in \bbF_p^n$,\\ and GCD computations in $\bbF_p[X]$, and linear algebra, try to compute the coefficients \\of the assumed form $f$ to match the GCD of the inputs (up to scalar multiples in $\bbF_p$).
379\end{tabular}\right.$
380
381\subsection{PRS}
382The PRS algorithm works over any gcd domain $R$. It starts with a primitive input with respect to some main variable and calculates a pseudo gcd with a pseudo remainder sequence. Content is removed from the pseudo gcd to produce the true gcd by a recursive call. The final content can be computed without expensive recursive calls to gcd in the case when we know the leading or trailing coefficient in the main variable must be a monomial in the remaining variables.
383
384This algorithm has been discarded because it is so bad but may be reintroduced for low degrees.
385
386\subsection{Hensel Lifting}
387The gcd can also be calculated using Hensel lifting \cite{EZGCD}. The gcd of the resulting univariates when all variables but one are substituted away gives two factorizations which can be lifted to obtain the multivariate gcd.
388
389
390\section{Factorization}
391
392
393\subsection{Squarefree Factorization in $K[x_1,\dots,x_n]$}
394\label{section_sqfr}
395By taking derivatives and greatest common divisors, we may assume that the input polynomial is squarefree and primitive with respect to each variable. Thus in characteristic zero the input polynomial $f \in K[x_1,\dots,x_n]$ may be assumed to satisfy
396\begin{equation*}
397\forall_i \quad f_{x_i} \neq 0 \quad \text{and} \quad \gcd(f,f_{x_i}) = 1\text{.}
398\end{equation*}
399Over a finite field ($K=\mathbb{F}_q$) of characteristic $p$, we have the slightly weaker conditions
400\begin{equation}
401\label{sqrfp_cond}
402\begin{alignedat}{3}
403&f_{x_1} \ne 0 \quad &\text{and}& \quad \gcd(f,f_{x_1}) = 1\\
404\forall_{i>1} \quad &f_{x_i} = 0 \quad &\text{or}& \quad \gcd(f,f_{x_i}) = 1
405\end{alignedat}
406\end{equation}
407
408While we could apply the factorization algorithms directly to this $f$ with $x_1$ as the main variable, it is possible to a bit better when some of the other derivatives vanish.
409
410\begin{theorem}
411With the assumption \ref{sqrfp_cond} on $f$ and prime powers $p^{e_2},\dots,p^{e_n}$ and a deflated polynomial $g$ with
412\begin{equation*}
413g(x_1,x_2^{p^{e_2}},\dots, x_n^{p^{e_n}}) = f(x_1,x_2,\dots,x_n)\text{,}
414\end{equation*}
415the factorization of $f$ is the inflated factorization of $g$.
416\end{theorem}
417The proof follows by induction from the following lemma: the polynomials $g(x_1,x_2, x_3,\dots, x_n)$ and $g(x_1,x_2^p, x_3,\dots, x_n)$ have the same factorization (up to inflation $x_2 \to x_2^p$).
418\begin{lemma}
419If $p = \operatorname{char}(K) > 0$ and $f(x,y) \in K[x,y] \setminus (K[x] \cup K[y])$ is irreducible and $f(x^p,y)$ is squarefree, then $f(x^p,y)$ is irreducible.
420\end{lemma}
421\begin{proof}
422Suppose that $f(x^p,y)=g(x,y)h(x,y)$ for $g,h \not \in K$. Since $f(x^p,y)$ is squarefree, $g$ and $h$ are squarefree, and there are $s, t \in K(y)[x]$ with $1=sg+th$. By differentiating $f(x^p,y)=g(x,y)h(x,y)$, we obtain $0=h g_x + g h_x$, which when combined with $1=sg+th$ gives $h(t h_x - s g_x)=h_x$. This implies that $h_x=0$ and in turn that $g_x=0$, which implies that $f(x,y)$ is reducible, a contradiction.
423\end{proof}
424
425\subsection{Factorization in $R[x]$}
426\subsubsection{Quadratic over characteristic $\ne 2$}
427The primitive polynomial $ax^2+bx+c$ factors if and only if $b^2-4ac$ is a square in $R$, in which case the factors are the primitive parts of $2ax+b\pm \sqrt{b^2-4ac}$.
428
429\subsubsection{Quadratic in $R[X]$ for $R=\mathbb{F}_{2^k}[x_1,\dots,x_n]$}
430
431We wish to determine if $X^2+AX+B$ has a root in $R$. Since $X_0+A$ is a root if $X_0$ is, at least one of the two roots does not have $\operatorname{lt}(A)$ as a term. (It very well may be the case that both roots have a monomial matching $\operatorname{lm}(A)$, but then both corresponding coefficients must be different from the leading coffcient of $A$). Therefore, we make the important assumption that \emph{we are searching for a root $X_0$ with $\operatorname{lt}(A)$ not a term of $X_0$}. Let $m$ denote the leading term of $X_0$. By taking leading terms in $X_0^2+AX_0+B$ and applying the assumption, we have
432\begin{equation*}
433\operatorname{lt}(m^2+\operatorname{lt}(A)m) = \operatorname{lt}(B)\text{,} \quad \text{and} \quad m \neq \operatorname{lt}(A)\text{.}
434\end{equation*}
435For any specific given terms $\operatorname{lt}(A)$, $\operatorname{lt}(B)$, this equation is easy to solve for $m$ or to determine that there is no solution.
436\begin{align*}
437\operatorname{lm}(m)>\operatorname{lm}(A)&: \quad m = \sqrt{\operatorname{lt}(B)}\\
438\operatorname{lm}(m)=\operatorname{lm}(A)&: \quad m = \zeta /\operatorname{lc}(A)\sqrt{\operatorname{lm}(B)}\text{,} \quad \zeta^2+\zeta =\operatorname{lc}(B)/\operatorname{lc}(A)^2\\
439\operatorname{lm}(m)<\operatorname{lm}(A)&: \quad m = \operatorname{lt}(B)/\operatorname{lt}(A)
440\end{align*}
441
442Once $m$ is found, the equation satisfied by $X_0-m$ has the same $A$ and a new $B$ with a smaller leading monomial. In this way the solution may be written down in order, and this process is a simplification of Sections 4 and 5 in \cite{QuadraticFactor}, which does not present a sparse algorithm due to the many (possibly disastrous) divisions performed. The quadratic $\zeta^2+\zeta+c \in \mathbb{F}_{2^k}$ has a root if and only if $\operatorname{Tr}(c)=0$, in which case $c=\sum_{i=1}^{k-1}c^{2^i}\sum_{j=0}^{i-1}u^{2^j}$ is a root where $u$ is any element of $\mathbb{F}_{2^k}$ with $\operatorname{Tr}(u)=1$. If $\mathbb{F}_{2^k} = \mathbb{F}_2[\theta]/P(\theta)$, then $u=1/(\theta P'(\theta))$ will do.
443
444\subsubsection{Cubic over $\mathbb{Z}$}
445To factor a cubic over $\mathbb{Z}$, we first find the roots over the more friendly ring $\mathbb{Z}_2$ and then test these roots over $\mathbb{Z}$. Since it is easy to bound the roots over $\mathbb{Z}$, the roots over $\mathbb{Z}_2$ only need to be calculated to some finite precision $p$, that is, to order $O(2^p)$.
446
447Factor $x^3+2^\alpha ax+2^\beta b $ over $\mathbb{Z}_2$ where $\alpha, \beta\geq 0$ and $a, b$ are odd integers:
448\begin{enumerate}
449	\item $2\beta=3\alpha$: irreducible, as replacing $x\leftarrow 2^{\beta/3}y$ has no roots modulo $2$ for $y$.
450	\item $ 2\beta< 3\alpha$:
451	\begin{enumerate}
452		\item $3\nmid \beta$: irreducible as all roots have valuation $\beta/3$.
453		\item $3\mid \beta$: Replacing $x\leftarrow 2^{\beta/3}y$ gives $y^3+2^{\alpha-2\beta/3}a y+b=0$, which factors as $(y^2+y+1)(y+1)=0$ modulo $2$. Hence there is a unique root in $\mathbb{Z}_2$, and this root has valuation $\beta/3$.
454	\end{enumerate}
455	\item $2\beta > 3\alpha$: Replacing $x\leftarrow 2^{\beta-\alpha}y$ gives $2^{2\beta-3\alpha}y^3+ay+b=0$, which has $y=1$ mod $2$ as a root. This gives a factorization
456	$$2^{2\beta-3\alpha}y^3+ay+b=(y+r)(2^{2\beta-3\alpha}y^2-2^{2\beta-3\alpha}ry+s)$$ for some odd $r, s\in \mathbb{Z}_2$. This becomes
457	$$ (x+2^{\beta-\alpha}r)(x^2-2^{\beta-\alpha}rx+2^\alpha s)=0$$
458	\begin{enumerate}
459		\item $2\nmid \alpha$: quadratic is irreducible and $-2^{\beta-\alpha}r$ is the only root.
460		\item $2\mid \alpha$: assuming the square roots exist, the roots of the quadratic, which have valuation $\alpha/2$, are
461		\begin{equation*}
462		2^{\beta-\alpha-1}r \pm 2^{\alpha/2} \sqrt{2^{2\beta-3\alpha-2}r^2-s}
463		\end{equation*}
464		If $r$ and $s$ are calculated to some absolute precision $O(2^p)$, then this expression is also known to absolute precision $O(2^p)$ except when $\alpha=0$ and $\beta=1$, in which case the square root loses more than one bit of precision.
465	\end{enumerate}
466\end{enumerate}
467
468
469\subsection{Factorization in $K[x,y]$}
470For $K=\mathbb{Q}$, an irreducible bivariate polynomial $f(x,y)$ remains irreducible modulo $y=y_0$ for a generic $y_0 \in \mathbb{Q}$. Hence, all of the difficult recombination may be pushed to the univariate factorization. When $K=\mathbb{F}_q$ the recombination in \cite{GlobalFactor} is necessary.
471\subsubsection{Bivariate factorization over $\mathbb{Q}$}
472We begin with $f(x,y)$ satisfying
473\begin{enumerate}
474\item $f(x,y) \in \mathbb{Z}[x,y]$ and $f(x,0) \in \mathbb{Z}[x]$ are squarefree so that we can lift.
475\item $f(x,y)$ is primitive with respect to $x$ (i.e. $\op{cont}_x(f) \in \mathbb{Z}[y]$ is $1$) so that any factor is also primitive with respect to $x$.
476\item $\op{deg}_x (f(x,y)) = \op{deg}_x (f(x,0))$ (i.e. $\op{lc}_x(f)$ does not vanish at $y=0$) so that we can make $f$ monic.
477\end{enumerate}
478Let
479\begin{equation*}
480\tilde{f}(x,y) = f(x,y) / \op{lc}_x (f(x,y)) \in \mathbb{Q}[[y]][x]
481\end{equation*}
482be the monic version of $f$ computed to precision $O(y^{1 + \op{deg}_y f})$. We can factor $\tilde{f}(x,0) \in \mathbb{Q}[x]$ by a univariate algorithm and lift the factors to produce an irreducible factorization in $\mathbb{Q}[[y]][x]$ as
483\begin{equation*}
484\tilde{f}(x,y) = \prod_{i=1}^{l} \tilde{f}_i(x,y)\text{.}
485\end{equation*}
486The $\tilde{f}_i(x,y)$ are also monic and need to be computed to precision $O(y^{1 + \op{deg}_y f})$. For each subset $S$ of $\{1,\dots, l\}$, we then have the candidate true factor
487\begin{equation*}
488\op{ppart}_x \left(\op{lc}_x(f) \prod_{i \in S} \tilde{f}_i(x,y) \right)\text{,}
489\end{equation*}
490where, before taking the primitive part, the elements of $\mathbb{Q}[[y]][x]$ must be mapped to $\mathbb{Q}[y][x]$ via remainder upon division by $y^{1 + \op{deg}_y f}$. Since we are only interested in candidate factors over $\mathbb{Z}$, $\mathbb{Q}$ may be replaced by $\mathbb{Z}/p^k \mathbb{Z}$ for appropriate $p^k$ (in particular $p \nmid \op{lc}_x (f(x,0))$). The coefficients in $\mathbb{Z}/p^k \mathbb{Z}$ must then be mapped to $\mathbb{Z}$ via the symmetric remainder before taking the primitive part.
491
492\subsubsection{Bivariate Factorization over $\mathbb{F}_q$}
493We begin with $f(x,y) \in \mathbb{F}_q[x,y]$ and an irreducible $\alpha(y) \in \mathbb{F}_q[y]$ (with $\mathbb{F}_{q^k} := \mathbb{F}_q[y]/\alpha(y)$) such that
494\begin{enumerate}
495	\item $\alpha(y)$ does not divide $\op{lc}_x f(x,y)$ so that we can make $f$ monic.
496	\item $f(x,y) \bmod \alpha(y) \in \mathbb{F}_{q^k}[x]$ is squarefree so that we can lift.
497	\item $f(x,y)$ is primitive with respect to $x$.
498\end{enumerate}
499
500The irreducible factorization of $f(x,y) \bmod \alpha(y)$ can be lifted to a monic factorization in $\mathbb{F}_q[[\alpha(y)]][x]$. With the help of some linear algebra over $\mathbb{F}_p$ these factors can be recombined into true factors.
501
502
503\subsection{Factorization in $R[x_1,\dots,x_n][X]$}
504Factoring of a multivariate squarefree primitive polynomial $f$ over $R[x_1,...,x_n][X]$ (satisfying the assumptions of Section \ref{section_sqfr} works by reducing $f$ modulo the ideal
505\begin{equation*}
506\langle x_1 = \alpha_1, x_2 = \alpha_2, \dots, x_n = \alpha_n \rangle
507\end{equation*}
508for some $\alpha_i \in R$, factoring the resulting univariate into, say, $r$, factors, and then lifting the univariate factorization to a multivariate factorization. The evaluation points must be good in the sense that $f(\alpha_1, \dots, \alpha_n, X)$ is squarefree and has the same degree as $f(x_1, \dots, x_n, X)$ in $X$. This lifting process does not change the leading coefficients in $X$, hence it is necessary that the leading coefficients be ``correct" before the lifting. In the most general setting, we can determine $d_i \in R[x_1,...,x_n]$, such that it is known that $d_i$ divides the leading coefficient of the $i$-th lifted factor. Then, before lifting, we compute $m=\operatorname{lc}_X(f)/(d_1 \cdots d_r)$, impose a leading coefficient of $d_i m$ on the $i$-th factor, and multiply $f$ by $m^{r-1}$. If the lifting succeeds, then the actual factors can be obtained by taking principle parts. Doing no work to precompute leading coefficients corresponds to taking $d_i=1$, which can obviously lead to large swells.
509
510\subsubsection{Wang's leading coefficient computation}
511Wang \cite{WANG} has a good solution to the leading coefficient problem over $\mathbb{Z}$. The idea can be illustrated by a simple example.
512\begin{equation*}
513(2x_1^3 x_2+2x_1^3 x_2)X^2 + \cdots = (2x_1(x_1+x_2)X+x_1)(x_1 x_2 X + 6)
514\end{equation*}
515First the irreducible factorization of the leading coefficient is computed
516\begin{equation*}
517(2x_1^2 x_2+2x_1^2 x_2)=2x_1^2x_2(x_1+x_2)
518\end{equation*}
519Next, an evaluation point $x_i=\alpha_i$ such that there exists primes $p_i$ such that
520\begin{align*}
521&p_3 \mid \alpha_1 + \alpha_2, \quad p_3 \nmid \alpha_2, \quad p_3 \nmid \alpha_1,\\
522&p_2 \mid \alpha_2, \quad p_2 \nmid \alpha_1,\\
523&p_1 \mid \alpha_1
524\end{align*}
525Lets take $\alpha_1=10, \alpha_2=14$ and $p_1=5, p_2=7, p_3=3$. The univariate factorization comes out as
526\begin{equation*}
52720(48X+1)(70X + 3)
528\end{equation*}
529What is of interest here is the leading coefficients of the primitive factors over $\mathbb{Z}$. From $p_3=3$ we can correctly distribute $x_1+x_2$ to the first multivariate factor. From $p_2=7$ we can distribute $x_2^2$ to both factors, and from $p_1=5$, we can distribute $x_1$ to the second factor.
530
531When $R$ is a finite field, there is no useful notion of ``prime''. Furthermore, the probability that an irreducible univariate factorization can be lifted to a multivariate factorization is low and sometimes zero. Hence this does not work as stated. One may replace $R$ by $R[Y]$ for an auxiliary indeterminate $Y$ and consider polynomial substitutions of the form
532\begin{align*}
533x_1 &= \alpha_1 + \beta_1 Y + \gamma_1 Y^2 + \cdots\\
534x_2 &= \alpha_2 + \beta_2 Y + \gamma_2 Y^2 + \cdots\\
535&\cdots\\
536x_n &= \alpha_n + \beta_n Y + \gamma_n Y^2 + \cdots\text{.}
537\end{align*}
538The base case factorization is now not $R[X]$ but $R[Y][X]$. The points $\alpha_1, ..., \alpha_n$ still need to be good because the lifting will ultimately begin with univariates. However, the univariate factors come not from an irreducible univariate factorization, but from the $Y=0$ image of a bivariate factorization, which should greatly increases the changes of success in the lifting.
539
540\subsubsection{Kaltofen's leading coefficient computation}
541In this recursive approach \cite{KALTOFEN}, after substituting away all but \emph{two} of the variables, the bivariate polynomial is factored and the leading coefficients of the bivariate factors can be lifted against the leading cofficient of the original polynomial. Since only squarefree lifting is implemented, it is actually the squarefree parts of everything that are lifted.
542
543\subsubsection{Dense Hensel lifting}
544Some pseudocode is Section \ref{Pseudocode}. Of note here is that when lifting over $\mathbb{Z}$, we do not lift over $\mathbb{Z}/p^k\mathbb{Z}$ as Wang \cite{WANG} advises but do the lifting directly over $\mathbb{Z}$.
545
546\subsubsection{Sparse Hensel lifting}
547
548
549\section{Absolute Factorization}
550
551The goal of absolute factorization is to take an irreducible $f \in R[x_1, \dots, x_n]$ and either determine that $f$ is absolutely irreducible or provide a factorization
552\begin{equation*}
553f = g h \text{,} \quad g, h \in R'[x_1, \dots, x_n]
554\end{equation*}
555where $g$ is absolutely irreducible. $h$ may or may not be absolutely irreducible: it is simply the product of the rest.
556
557\subsection{Absolute Irreduciblity Testing}
558Here we follow Gao \cite{GAO}. For a multivariate polynomial $f = \sum_{\bold{i} \in \mathbb{Z}^n}{c_{\bold{i}} \, \pmb{x}^{\bold{i}}}$, the Newton polygon $N(f)$ is defined to be the convex hull of $\{\bold{i} \in \mathbb{Z}^n | c_{\bold{i}} \ne 0\}$ in $\mathbb{R}^n$.
559Since $N(fg) = N(f) + N(g)$ where $+$ denotes the Minkowski sum, if $N(f)$ is indecomposable, then $f$ is absolutely irreducible. Although indecomposability testing is hard, Gao gives a reasonable algorithm in two dimensions \cite{GAO2}, that is, for bivariate polynomials, and projects higher dimensional polytopes onto a two-dimensional ``shadow'' to test them for indecomposability.
560
561If $f \in K[\pmb{x}]$ happens to be irreducible over $K$ but not over the algebraic closure $\overline{K}$, then $N(f)$ will never be sufficient to prove the irreducibility over $K$. In the case that we are able to prove that $f$ is irreducible over $K$ using other methods, $N(f)$ can still be used to obtain some information on the degree of an extension of $K$ needed to factor $f$ absolutely. An absolute factorization of an irreducible $f(\pmb{x}) \in K[\pmb{x}]$ looks like
562\begin{equation*}
563f(\pmb{x}) = \operatorname{resultant}_{\alpha} (u(\alpha), g(\alpha, \pmb{x}))
564\end{equation*}
565for some irreducible $u(\alpha) \in K[\alpha]$ of degree, say, $m$. Since all $m$ of the $g(\alpha, \pmb{x})$ have the same Newton polygon, it follows that $N(f)=m\cdot N(g)$, and thus $m$ divides the coordinates of every vertex in $N(f)$. This can severely limit the possibilities for the extension degree required for an absolute factorization.
566
567\subsection{Bivariate Absolute Factorization over $\mathbb{Q}$}
568The idea here is that an absolutely irreducible $g(x,y) \in \overline{\mathbb{Q}}[x,y]$ remains absolutely irreducible in $\overline{\mathbb{F}}_p[x,y]$ for generic $p$.
569
570Assume that $f(x,y) \in \mathbb{Q}[y][x]$ is irreducible. Pick a good $\alpha \in \mathbb{Q}$ and a good rational prime $p$. The definition of ``good'' is that none of the following steps or assumptions fail. Determine an $\mathbb{F}_q = \mathbb{F}_{p^?}$ such that $f(x,\alpha)$ splits completely into distinct linear irreducibles:
571\begin{equation*}
572\frac{f(x,\alpha)}{\operatorname{lc}_x(f(x,y)) |_{y=\alpha}} = \prod_i x - r_i \text{ in } \mathbb{F}_q[x]\text{.}
573\end{equation*}
574Lift this to power series:
575\begin{equation*}
576\frac{f(x,y)}{\operatorname{lc}_x(f(x,y))} = \prod_i x - r_i(y) \text{ in } \mathbb{F}_q[[y-\alpha]][x]\text{.}
577\end{equation*}
578Do some linear algebra to recombine the factors into a real factorization:
579\begin{equation*}
580f(x,y) = \prod_j g_j(x,y) \text{ in } \mathbb{F}_q[y][x]\text{.}
581\end{equation*}
582The $g_i(x,y) \in \mathbb{F}_q[y][x]$ are absolutely irreducible. It might be possible to reduce the size of $q$ at this point.
583
584We then try to lift this to a factorization in $\mathbb{Q}_q[y][x]$:
585\begin{equation*}
586f(x,y) = \prod_j \widetilde{g}_j(x,y) \text{ in } \mathbb{Q}_q[y][x]\text{.}
587\end{equation*}
588In order to attemp this lift the $\operatorname{lc}_x(\widetilde{g}_j(x,y)) \in \mathbb{Q}_q[y]$ must be correct before starting. Assume $\operatorname{lc}_x(f(x,y))$ is monic in $y$, and that its squarefree part remains squarefree modulo $p$. Then, the squarefree factors of the $\operatorname{lc}_x \widetilde{g}_j(x,y)$ can be lifted and we can recover the monic $\operatorname{lc}_x(\widetilde{g}_j(x,y)) \in \mathbb{Q}_q[y]$.
589
590Finally, we map $\widetilde{g}_1(x,y)$ to some number field $K[x,y]$ (so that the other $\widetilde{g}_j(x,y)$ are its conjugates) and test divisibility $g_1|f$.
591
592\subsection{Bivariate Absolute Factorization over $\mathbb{F}_q$}
593
594
595
596\subsection{Multivariate Absolute Factorization}
597For absolutely factoring  an irreducible in $R[x_1,\dots,x_n][X]$, the plan is to substitute good auxiliary polynomials
598\begin{align*}
599x_1 &= \alpha_1 + \beta_1 Y + \gamma_1 Y^2 + \cdots\\
600x_2 &= \alpha_2 + \beta_2 Y + \gamma_2 Y^2 + \cdots\\
601&\cdots\\
602x_n &= \alpha_n + \beta_n Y + \gamma_n Y^2 + \cdots\text{,}
603\end{align*}
604and absolutely factor the resulting bivariate in $R[X,Y]$, and then lift the $Y=0$ images of the two factors back to a multivariate factorization. This would require the fact that an absolutely irreducible multivariate remains an absolutely irreducible bivariate under a generic substitution of this form.
605
606\begin{thebibliography}{99}
607\bibitem{Brown} W. S. Brown. On Euclid’s Algorithm and theComputation of Polynomial Greatest Common Divisors. J. ACM 18 (1971), 478-504.
608
609\bibitem{Johnson} Johnson, S.C., 1974. Sparse polynomial arithmetic. ACM SIGSAM Bulletin 8 (3), pp. 63--71.
610
611\bibitem{FPS} Monagan M., Pearce R.: Sparse polynomial powering using heaps. “Computer Algebra in Scientific Computing”, Springer, 2012, s.236-247.
612
613\bibitem{ZIPPEL} Zippel, Richard, Probabilistic algorithms for sparse polynomials. Lecture Notes in Computer Science. 72. pp. 216--226, 1979
614
615\bibitem{LINZIP}  J. de Kleine, M. Monagan and A. Wittkopf, Algorithms for the non-monic case of the sparse modular GCD algorithm. Proceedings of ISSAC ’05, ACM Press, pp. 124--131, 2005.
616
617\bibitem{SULING} Yang, Suling. Computing the Greatest Common Divisor of Multivariate Polynomials over Finite Fields. http://www.cecm.sfu.ca/CAG/theses/suling.pdf
618
619\bibitem{BMAR} The Berlekamp-Massey Algorithm revisited, N. B. Atti, G. M. Diaz–Toca, H. Lombardi, 9 March 2006
620
621\bibitem{YAP} A Unified Approach to HGCD Algorithms for polynomials and integers by Klaus Thull , Chee K. Yap
622
623\bibitem{GlobalFactor}  Factoring polynomials over global fields Belabas, Karim; van Hoeij, Mark; Klüners, Jürgen; Steel, Allan Journal de théorie des nombres de Bordeaux, Volume 21 (2009) no. 1, p. 15-39
624
625\bibitem{EZGCD} P. S. Wang, The EEZ-GCD Algorithm, ACM SIGSAM Bulletin 14, pp. 50--60, 1980
626
627\bibitem{WANG} P. S. Wang, An improved multivariate polynomial factoring algorithm. Mathematics of Computation 32, no. 144, 1215--1231, 1978
628
629\bibitem{GAO} S. Gao, Absolute irreducibility of polynomials via Newton polytopes, Journal of Algebra
630237 (2001), 501--520.
631
632\bibitem{GAO2} S. Gao and A.G.B. Lauder, Decomposition of polytopes and polynomials, Discrete and Computational Geometry 26 (2001), 89--104.
633
634\bibitem{QuadraticFactor} Jørgen Cherly, Luis Gallardo, Leonid Vaserstein and Ethel Wheland:  Solving Quadratic Equations over Polynomial Rings of Characteristic Two. Publicacions Matemàtiques, Vol. 42, No. 1 (1998), pp. 131-142
635
636\bibitem{KALTOFEN} E. Kaltofen.  Sparse Hensel lifting.  In EUROCAL 85 European Conf. Comput. Algebra Proc. Vol. 2, pages 4–17, 1985
637\end{thebibliography}
638
639\section{Pseudocode}
640\label{Pseudocode}
641
642\subsection{gcd} For the dense gcd over finite fields, if one runs out of primes of the form $x-\alpha$, instead of failing it is possible to use any irreducible polynomial in place of $x-\alpha$ in Algorithm \ref{algo_brownp}, and this would constitute the large prime version of the algorithm.
643
644\begin{algorithm}[H]
645	\DontPrintSemicolon
646	\KwIn{
647		\begin{enumerate}
648			\item $A,B \in \mathbb{F}_q[x][x_1, \dots, x_n]$ neither is zero
649		\end{enumerate}
650	}
651	\KwOut{
652		\begin{enumerate}
653			\item monic $G= \op{gcd}(A,B)$, $\bar{A}=A/G$, $\bar{B}=B/G$
654		\end{enumerate}
655	}
656	\lIf{$n=0$}{\textbf{return} using univariate arithmetic}
657	set $cA= \op{cont}_{x_1,\dots, x_n}(A)$ and $ cB=\op{cont}_{x_1, \dots, x_n}(B) \in \mathbb{F}_p[x]$\;
658	set $A= A/cA$ and $B=B/cB$ \tcp*{content $cA, cB, \dots$ is always monic}
659	set $cG= \op{gcd}(cA, cB)$, $c\bar{A}=cA/cG$ and $c\bar{B}=cB/cG$\;
660	set $\gamma=\op{gcd}(\op{lc}_{x_1,\dots, x_n}(A),\op{lc}_{x_1,\dots, x_n}(B))\in \mathbb{F}_q[x]$\;
661	set $bound= 1+ \op{deg}_{x}\gamma+ \max(\op{deg}_x(A), \op{deg}_x(B))$,
662	and set $m=1\in \mathbb{F}_p[x]$\;
663	\texttt{pick a prime}: \tcp*{primes are $(x-\alpha)$}
664
665	choose a new $\alpha\in \mathbb{F}_q$ else \textbf{return} FAIL\;
666	set $\gamma^*=\gamma\op{mod} {(x-\alpha)}$\;
667	set $A^*=A \op{mod} (x-\alpha)$ and $B^*=B\op{mod} (x-\alpha)\in \mathbb{F}_q[x_n][x_1,\dots,x_{n-1}]$\;
668	\lIf{$\gamma^*=0$}{\textbf{goto} \texttt{pick a prime}}
669	set $(G^*,\bar{A}^*, \bar{B}^*)= \textbf{brownp}(A^*,B^*)$ or \textbf{goto} \texttt{pick a prime} if the call failed\;
670	\lIf{$G^*=1$}
671	{set $G=1, A=\bar{A},B=\bar{B}$,
672		\textbf{goto} \texttt{put content}}
673	\If{$\op{deg}_x(m)>0$}
674	{\lIf{$\op{lm}_{x_1, \dots,x_n}(G^*)<\op{lm}_{x_1, \dots,x_n}(G)$}{set $m=1$}\lIf{$\op{lm}_{x_1, \dots,x_n}(G^*)>\op{lm}_{x_1, \dots,x_n}(G)$}{\textbf{goto} \texttt{pick a prime}}
675	}
676	set $\bar{A}=\op{crt}(\bar{A} \op{mod} m,\ \bar{A}^* \op{mod} \ (x-\alpha))$
677	and $\bar{B}=\op{crt}(\bar{B} \op{mod} m,\ \bar{B}^* \op{mod} \ (x-\alpha))$\; \label{algo_brownp_abcrt}
678	\If{$\bar{A}$ did not change and, with $T=\bar{A}/\op{cont}_{x_1, \dots, x_n}(\bar{A})$, $T\mid A$ and $A/T\mid B$ \label{algo_brownp_astab}}
679	{ set $G=A/T$, $\bar{A}=T$ and $\bar{B}={B}/G$, \textbf{goto} \texttt{fix lcs}
680	}
681	set $G=\op{crt}(G \op{mod} m,\ \gamma^*\cdot G^* \op{mod} \ (x-\alpha))$
682	and $m= m \cdot (x-\alpha)$\; \label{algo_brownp_gcrt}
683	\If{$G$ did not change and, with $T=G/\op{cont}_{x_1,\dots,x_n}(G)$, $T \mid A$ and $T\mid B$ \label{algo_brownp_gstab}}{set $G=T$, $\bar{A}=\bar{A}/G$ and $\bar{B}=B/G$, \textbf{goto} \texttt{fix lcs}}
684	\lIf{$\op{deg}_x(m)< bound$}{\textbf{goto} \texttt{pick a prime}}
685	\lIf{$\op{deg}_x\gamma+\op{deg}_xA=\op{deg}_xG+\op{deg}_x\bar{A}$ and $\op{deg}_x\gamma+\op{deg}_xB=\op{deg}_xG+\op{deg}_x\bar{B}$}{\textbf{goto} \texttt{success}}
686	set $m=1$, \textbf{goto} \texttt{pick a prime}
687
688	\texttt{success:}\;
689	set $G=G/\op{cont}_{x_1,\dots, x_n}(G)$, $A=A/\op{lc}_{x_1,\dots, x_n}(G)$ and $B=B/\op{lc}_{x_1,\dots, x_n}(G)$\;
690	\texttt{put content:}\;
691	set $G=G \cdot cG$, $\bar{A}=\bar{A}\cdot c\bar{A}$ and $\bar{B}=\bar{B}\cdot c\bar{B}$\;
692	\Return{$(G, \bar{A}, \bar{B})$}\;
693	\texttt{fix lcs:}\;
694	with $\delta = \op{lc}_{x,x_1,\dots, x_n}(G)$, set $G=\delta^{-1} G$, $A=\delta A$ and $B=\delta B$, \textbf{goto} \texttt{put content}
695	\caption{$\textbf{brownp}$ dense gcd over finite field}
696	\label{algo_brownp}
697\end{algorithm}
698
699On lines \ref{algo_brownp_abcrt} and \ref{algo_brownp_gcrt}, the inputs $G, \bar{A}, \bar{B}$ are undefined only when $m=1$, in which case the $\op{crt}$ ignores them anyways. There should also be a check analogous to line \ref{algo_brownp_astab} for the stabilization of $\bar{B}$. This was omitted simply due to space constraints. Finally, the stability checks in lines \ref{algo_brownp_astab} and \ref{algo_brownp_gstab} (and the missing one for $\bar{B}$) are completely optional and may be executed or skipped on every iteration at the user's discretion.
700
701Similarly to the previous algorithm, divisibility checks could be performed over the integers as well.
702
703\begin{algorithm}[H]
704	\DontPrintSemicolon
705	\KwIn{ $n \ge 1$
706		\begin{enumerate}
707			\item $A,B \in \mathbb{Z}[x_1, \dots, x_n]$ neither is zero
708		\end{enumerate}
709	}
710	\KwOut{
711		\begin{enumerate}
712			\item unit normal $G= \op{gcd}(A,B)$, $\bar{A}=A/G$, $\bar{B}=B/G$
713		\end{enumerate}
714	}
715	set $cA= \op{cont}_{x_1,\dots, x_n}(A)$ and $cB=\op{cont}_{x_1, \dots, x_n}(B) \in \mathbb{Z}$\;
716	set $A= A/cA$ and $B=B/cB$ \tcp*{content $cA, cB, \dots$ is always positive}
717	set $cG= \op{gcd}(cA, cB)$, $c\bar{A}=cA/cG$ and $c\bar{B}=cB/cG$\;
718	set $\gamma=\op{gcd}(\op{lc}_{x_1,\dots, x_n}(A),\op{lc}_{x_1,\dots, x_n}(B))\in \mathbb{Z}[x]$\;
719	set $bound= 2 \cdot \gamma \cdot \max(|A|_{\infty}, |B|_{\infty})$,
720	and set $m=1 \in \mathbb{Z}$\;
721	\texttt{pick a prime}: \tcp*{primes are numbers}
722	choose a new prime $p \in \mathbb{Z}$ else \textbf{return} FAIL\;
723	set $\gamma^*=\gamma\op{mod} p$\;
724	set $A^*=A \op{mod} p$ and $B^*=B\op{mod} p\in \mathbb{F}_p[x_n][x_1,\dots,x_{n-1}]$\;
725	\lIf{$\gamma^*=0$}{\textbf{goto} \texttt{pick a prime}}
726	set $(G^*,\bar{A}^*, \bar{B}^*)= \textbf{brownp}(A^*,B^*)$ or \textbf{goto} \texttt{pick a prime} if the call failed\;
727	\lIf{$G^*=1$}
728	{set $G=1, A=\bar{A},B=\bar{B}$,
729		\textbf{goto} \texttt{put content}}
730	\If{$m>1$}
731	{\lIf{$\op{lm}_{x_1, \dots,x_n}(G^*)<\op{lm}_{x_1, \dots,x_n}(G)$}{set $m=1$}\lIf{$\op{lm}_{x_1, \dots,x_n}(G^*)>\op{lm}_{x_1, \dots,x_n}(G)$}{\textbf{goto} \texttt{pick a prime}}
732	}
733	set $\bar{A}=\op{crt}(\bar{A} \op{mod} m,\ \bar{A}^* \op{mod} p)$
734	and $\bar{B}=\op{crt}(\bar{B} \op{mod} m,\ \bar{B}^* \op{mod} p)$\;
735	set $G=\op{crt}(G \op{mod} m,\ \gamma^*\cdot G^* \op{mod} p)$
736	and $m= m \cdot p$\;
737	\lIf{$m< bound$}{\textbf{goto} \texttt{pick a prime}}
738	set $hA = \min(|G|_1 \cdot |\bar{A}|_\infty, |G|_\infty \cdot |\bar{A}|_1)$ \tcp*{upper bound on $|G\cdot\bar{A}|_\infty$}
739	set $hB = \min(|G|_1 \cdot |\bar{B}|_\infty, |G|_\infty \cdot |\bar{B}|_1)$ \tcp*{upper bound on $|G\cdot\bar{B}|_\infty$}
740	\lIf{$hA < m$ and $hB < m$}{\textbf{goto} \texttt{success}}
741	\textbf{goto }\texttt{pick a prime}
742
743	\texttt{success:}\;
744	set $G=G/\op{cont}_{x_1,\dots, x_n}(G)$, $A=A/\op{lc}_{x_1,\dots, x_n}(G)$ and $B=B/\op{lc}_{x_1,\dots, x_n}(G)$\;
745	\texttt{put content:}\;
746	set $G=G \cdot cG$, $\bar{A}=\bar{A}\cdot c\bar{A}$ and $\bar{B}=\bar{B}\cdot c\bar{B}$\;
747	\Return{$(G, \bar{A}, \bar{B})$}\;
748	\caption{$\textbf{brownm}$ dense gcd over integers}
749	\label{algo_brownm}
750\end{algorithm}
751
752\subsection{factoring}
753
754The lifting algorithms with be stated with $3$ factors.
755
756
757\begin{algorithm}[H]
758\DontPrintSemicolon
759\KwIn{	$m \ge 2$
760	\begin{enumerate}
761 	\item $(\alpha_1, \dots, \alpha_m) \in R^m$
762 	\item $A \in R[x_1, \dots, x_m][X]$ with $A(X, \alpha_1, \dots, \alpha_m)$ squarefree
763 	\item $(B_1, B_2, B_3) \in R[x_1, \dots, x_m][X]$ (however, all but the leading coefficients of each $B_i$ are in $R[x_1, \dots, x_{m-1}]$) such that $A(X, x_1, \dots, x_{m-1}, \alpha_m) = (B_1 B_2 B_3)(X, x_1, \dots, x_{m-1}, \alpha_m)$
764	\end{enumerate}
765}
766\KwOut{
767	\begin{enumerate}
768 	\item $(B_1, B_2, B_3) \in R[x_1, \dots, x_m][X]$ such that $A(X, x_1, \dots, x_m) = (B_1 B_2 B_3)(X, x_1, \dots, x_m)$ or FAIL
769 	\end{enumerate}
770}
771
772set $e = A - B_1 B_2 B_3$ \tcp*{current error}
773set $\beta_i = B_i(X, x_1, \dots, x_{m-1}, \alpha_m) \in R[x_1, \dots, x_{m-1}][X]$\;
774\For{$j=1$ \KwTo $\op{deg}_{x_m}(A)$}
775{
776	assert that $e$ is divisible by $(x_m - \alpha_m)^j$\;
777	set $t =$ taylor coefficient of $(x_m - \alpha_m)^j$ in $e$ \tcp*{ $t \in R[x_1, \dots, x_{m-1}][X]$}
778	$(\delta_1, \delta_2, \delta_3) = \textbf{pfrac}(t, (\beta_1, \beta_2, \beta_3),(\alpha_1, \dots,\alpha_{m-1}), (\op{deg}_{x_1}A, \dots, \op{deg}_{x_{m-1}}A))$ \;
779		\tcp*{solve $t = \delta_1 \beta_2 \beta_3 + \delta_2 \beta_1 \beta_3 + \delta_3 \beta_1 \beta_2$}
780	\lIf{the solved failed}{\Return{FAIL}}
781	set $B_i = B_i + \delta_i (x_m - \alpha_m)^j$ for each $i$\;
782	set $e = A - B_1 B_2 B_3$
783}
784\leIf{$e=0$}{
785	\Return{$(B_1, B_2, B_3)$}
786}
787{
788	\Return{FAIL}
789}
790\caption{$\textbf{hlift}$ (Multivariate Hensel Lifting - Quintic version)}
791\label{algo_mlift}
792\end{algorithm}
793
794Since the solutions $\delta_i$ must satisfy $\op{deg}_{X} \delta_i < \op{deg}_{X} B_i$, the leading coefficients of the $B_i$ will not be changed by Algorithm \ref{algo_mlift}.
795
796\begin{algorithm}[H]
797\DontPrintSemicolon
798\KwIn{	$m \ge 2$
799	\begin{enumerate}
800 	\item $(\alpha_1, \dots, \alpha_m) \in R^m$
801 	\item $F \in R[x_1, \dots, x_m][X]$ with $F(X, \alpha_1, \dots, \alpha_m)$ squarefree
802 	\item $(A, B, C) \in R[x_1, \dots, x_m][X]$ (however, all but the leading coefficients of each $A,B,C$ are in $R[x_1, \dots, x_{m-1}]$) such that $F(X, x_1, \dots, x_{m-1}, \alpha_m) = (A B C)(X, x_1, \dots, x_{m-1}, \alpha_m)$
803	\end{enumerate}
804}
805\KwOut{
806	\begin{enumerate}
807 	\item $(A, B, C) \in R[x_1, \dots, x_m][X]$ such that $A(X, x_1, \dots, x_m) = (A B C)(X, x_1, \dots, x_m)$ or FAIL
808 	\end{enumerate}
809}
810
811set $a_0 = [(x_m - \alpha_m)^0] A$ and set $dA = 0$\;
812set $b_0 = [(x_m - \alpha_m)^0] B$ and set $dB = 0$\;
813set $c_0 = [(x_m - \alpha_m)^0] C$ and set $dC = 0$\;
814\For{$d=1$ \KwTo $\op{deg}_{x_m}(A)$}
815{
816	set $t = [(x_m - \alpha_m)^d]F - \sum_{\substack{i+j+k=d \\ i \le dA, \ j \le dB, \ k \le dC}} a_i b_j c_k$\;
817	use \textbf{pfrac} to find $a_d, b_d, c_d$ from $t=a_d b_0 c_0+a_0 b_d c_0+a_0 b_0 c_d$\;
818	\lIf{the solved failed}{\Return{FAIL}}
819	set $a_d = a_d + [(x_m - \alpha_m)^d]A$\;
820	set $b_d = b_d + [(x_m - \alpha_m)^d]B$\;
821	set $c_d = c_d + [(x_m - \alpha_m)^d]C$\;
822	\lIf{ $a_d \neq 0$}{set $dA = d$}
823	\lIf{ $b_d \neq 0$}{set $dB = d$}
824	\lIf{ $c_d \neq 0$}{set $dC = d$}
825	\lIf{$dA + dB + dC > \op{deg}_{x_m}(A)$}{\Return{FAIL}}
826}
827assert that $dA + dB + dC = \op{deg}_{x_m}(A)$\;
828set $A = \sum_{i=0}^{dA} a_i (x_m - \alpha_m)^i$\;
829set $B = \sum_{i=0}^{dB} b_i (x_m - \alpha_m)^i$\;
830set $C = \sum_{i=0}^{dC} c_i (x_m - \alpha_m)^i$\;
831\Return{(A,B,C)}
832\caption{$\textbf{hlift}$ (Multivariate Hensel Lifting - Quartic version)}
833\label{algo_mlift2}
834\end{algorithm}
835
836Finally the main work horse. It is easy to solve $t=\delta_1\beta_2\beta_3+\delta_2\beta_1\beta_3+\delta_3\beta_1\beta_2$ in $\op{frac}(R)(x_1,\dots,x_{m-1})[X]$ with pseudo remainder sequences, since
837$\delta_i= t(\beta_j\beta_k)^{-1}\pmod {\beta_i}$ and check if the $\delta_i$'s are  defined in $R[x_1,\dots, x_{m-1}][X]$. However, as intermediate expression swell is a problem in this approach. We will use a different algorithm described as below.
838
839\begin{algorithm}[H]
840\DontPrintSemicolon
841\KwIn{ 	 $l\geq 0$
842	\begin{enumerate}
843		\item $t\in R[x_1,\dots, x_l][X]$
844		\item $(\beta_1,\beta_2,\beta_3)$, $\text{where } \beta_i\in R[x_1,\dots, x_{l}][X],$ $ \beta_i \text{ pairwise coprime in } \op{frac}{(R)}(x_1,\dots,x_{l})[X]$
845		\item $(\alpha_1,\dots,\alpha_l) \in R^l$
846		\item $(d_1,\dots, d_l)\in \mathbb{N}^l$ degree bounds
847	\end{enumerate}
848}
849
850\KwOut{
851    \begin{enumerate}
852    	\item $(\delta_1,\delta_2,\delta_3), \delta_i\in R[x_1,\dots, x_r][X]\text{ such that }t=\delta_1\beta_2\beta_3+\delta_2\beta_1\beta_3+\delta_3\beta_1\beta_2$ and $\op{deg}_{X}\delta_i<\op{deg}_{X}\beta_i$
853    	or \it{FAIL}
854    \end{enumerate}
855}
856\eIf{$r=0$}{
857	set $\delta_i=t(\beta_j\beta_k)^{-1}\pmod {\beta_i} $ in  $\op{frac}(R)[X]$\\
858	\leIf{ each $\delta_i\in R[X]$}{
859		\Return{$(\delta_1,\delta_2,\delta_3)$}
860	}
861	{
862		\Return{FAIL}
863	}
864	}
865{
866	set $\tilde{\beta}_i(X)=\beta_i(X,x_1,\dots,x_{r-1},\alpha_l)\in R[x_1,\dots,x_{l-1}][X]$\;
867	set $\delta_i=0$ for each $i$\;
868	set $e=t$\;
869	\For{$j=0$ \KwTo $d_r$}
870	  {
871	  	assert that $e$ is divisible by $(x_r - \alpha_r)^j$\;
872	  	set $\tilde{t}=$ taylor coefficient of $(x_r-\alpha_r)^j$ in $e$\;
873	  	set $(\tilde{\delta_1},\tilde{\delta}_2, \tilde{\delta}_3 )=\textbf{pfrac}(\tilde{t},(\alpha_1,\dots, \alpha_{l-1}),(\tilde{\beta}_1,\tilde{\beta}_2, \tilde{\beta}_3),(d_1,\dots, d_{l-1}))$\;
874	  	\lIf{the solved failed}{\Return{FAIL}}
875	    set $\delta_i=\delta_i+\tilde{\delta}_i(x_r-\alpha_r)^j$\;
876	    set $e=t-(\delta_1\beta_2\beta_3+\delta_2\beta_1\beta_3+\delta_3\beta_1\beta_2)$
877	  }
878  \leIf{$e=0$}{
879  	\Return{$(\delta_1, \delta_2, \delta_3)$}
880  }
881  {
882  	\Return{FAIL}
883  }
884}
885
886\caption{\textbf{pfrac} (Multivariate partial fraction solver)}
887\label{algo_pfrac}
888\end{algorithm}
889
890\end{document}
891