1\documentclass[11pt,reqno]{amsart} 2\usepackage{amsmath, amssymb, amsthm} 3\usepackage{graphicx} 4\usepackage{xtab} 5\usepackage{color} 6\usepackage{hyperref} 7\usepackage[linesnumbered,ruled]{algorithm2e} 8 9\SetKw{KwGoTo}{goto} 10 11\numberwithin{equation}{section} 12\newtheorem{theorem}{Theorem}[section] 13\newtheorem{definition}[theorem]{Definition} 14\newtheorem{lemma}[theorem]{Lemma} 15\newtheorem{remark}[theorem]{Remark} 16\newtheorem{entry}[theorem]{Entry} 17\newtheorem{corollary}[theorem]{Corollary} 18\newtheorem{proposition}[theorem]{Proposition} 19\newtheorem{example}[theorem]{Example} 20\newtheorem{myalgorithm}[theorem]{Algorithm} 21 22\addtolength{\textwidth}{1.5in} 23\addtolength{\hoffset}{-0.75in} 24%\addtolength{\textheight}{0.6in} 25%\addtolength{\voffset}{-0.3in} 26 27\newcommand\T{\rule{0pt}{4.0ex}} % Top strut 28\newcommand\B{\rule[-2.5ex]{0pt}{0pt}} % Bottom strut 29 30\newcommand{\smat}[4] {(\begin{smallmatrix} #1 & #2 \\ #3 & #4 \end{smallmatrix} )} 31\newcommand{\mattt}[4] { \left(\begin{array}{cc} #1 & #2 \\ #3 & #4 \end{array} \right)} 32\newcommand{\matto}[2] { \left(\begin{array}{cc} #1 \\ #2 \end{array} \right)} 33\newcommand{\schar}[2] {( \begin{smallmatrix} #1 \\ #2 \end{smallmatrix})} 34 35\newcommand{\op}[1] { \operatorname{ #1 }} 36\newcommand{\olbbH}[0] { \overline{\mathbb{H}}} 37\newcommand{\olbbQ}[0] { \overline{\mathbb{Q}}} 38\newcommand{\olG}[0] { \overline{\Gamma}} 39\newcommand{\bbH}[0] { \mathbb{H}} 40\newcommand{\bbC}[0] { \mathbb{C}} 41\newcommand{\bbZ}[0] { \mathbb{Z}} 42\newcommand{\bbF}[0] { \mathbb{F}} 43\newcommand{\bbQ}[0] { \mathbb{Q}} 44\newcommand{\bbR}[0] { \mathbb{R}} 45\newcommand{\gok}[0] { \mathfrak{k}} 46\newcommand{\goe}[0] { \mathfrak{e}} 47\newcommand{\goR}[0] { \mathfrak{R}} 48 49\title{Algorithms for Multivariate Polynomials} 50\author{} 51 52\begin{document} 53 54\begin{abstract} 55Algorithms for multivariate polynomials in flint are discussed. 56\end{abstract} 57 58 59\maketitle 60 61\section{Introduction} 62 63A polynomial $A \in R[x_1,\dots,x_n]$ is representation as a sums of terms 64\begin{equation*} 65A = t_1 + \cdots + t_a 66\end{equation*} 67where the terms are ordered as $t_1 > t_2 > \cdots > t_a$ according to some term ordering. The basic operations of addition and subtraction are then equivalent to a merge operation and run in time proportional to the sum of the input term counts. 68 69 70\section{Monomial Representation} 71 72The {\tt mpoly} module implements the low level packing and unpacking of exponents 73for multivariate polynomials. If the variables in the polynomial are, say, 74$x$, $y$ and $z$ with $x > y > z$ in the monomial ordering, then the monomial 75$x^a y^b z^c$ is represented as the array $\{a, b, c\}$ from the user's perspective. 76 77Polynomial exponents are stored in packed format. This means that monomials are 78actually stored as an array of integer 'fields' that may be packed within 79a machine word or across multiple machine words if needed. 80This facilitates basic operations on the monomials, and we make the following assumptions about 81the correspondence between the variables' exponents and the 82fields in the packing: 83 84\begin{enumerate} 85\item {The monomial ordering is a total ordering, i.e. 1 is the smallest.} 86\item{Multiplication of monomials corresponds to field-wise addition.} 87\item{Monomials can be compared by comparing their packed representation possibly with an xor mask on certain fields.} 88\item{The exponent of each variable is itself one of the fields.} 89\item{The fields are all non-negative.} 90\end{enumerate} 91 92For the three supported ordering {\tt ORD\_LEX}, {\tt ORD\_DEGLEX}, and {\tt ORD\_DEGREVLEX}, the 93monomial $x^a y^b z^c$ is converted into fields in the following ways (the 94least significant field is on the left, the most significant is on the right), 95and the comparison mask is shown below. 96 97\begin{verbatim} 98 ORD_LEX: | c | b | a | ( 3 fields) 99 000 000 000 100 101 ORD_DEGLEX: | c | b | a | a+b+c | ( 4 fields) 102 000 000 000 00000 103 104 ORD_DEGREVLEX: | a | b | c | a+b+c | ( 4 fields) 105 111 111 111 0000000 106\end{verbatim} 107 108If one wanted to support, for example, a block ordering which was {\tt ORD\_DEGLEX} 109in $x, y$ and {\tt ORD\_DEGREVLEX} in $z, w$ with $x>y>z>w$, 110the monomial $x^a y^b z^c w^d$ would need to be stored as 111 112\begin{verbatim} 113 | c | d | c+d | b | a | a+b | (6 fields) 114 111 111 00000 000 000 00000 115\end{verbatim} 116 117No such interface is currently implemented. 118 119 120There is no limit to the size of the fields. The fields themselves are packed 121to a uniform bit width, usually denoted by {\tt bits} in the functions. This bit count should contain an extra sign bit used for overflow detection. Thus, if the maximum field is $15$, then the fields only fit into a packing with {\tt bits >= 5}. The total number of machine words taken by an exponent packed into fields is usually denoted by {\tt N} in the code. 122 123If {\tt bits <= FLINT\_BITS} then precisely a maximum of {\tt floor(FLINT\_BITS/bits)} number of fields may be packed into a single word. Within a word, the packing is from low to high, and unused fields (as well as unused bits) at the top of the word are zero. 124 125\section{Multiplication} 126\subsection{Dense multiplication in $\bbZ[x_1,\dots,x_n]$ or $\bbZ_p[x_1,\dots,x_n]$}\ 127 128Given $A(x_1,\dots,x_n), B(x_1,\dots,x_n) \in R[x_1,\dots,x_n]$, set $r_i = 1 + \op{deg}_{x_i}(a) + \op{deg}_{x_i}(b)$. The Kronecker substitution 129\begin{equation*} 130x_1 \to x, \quad x_2 \to x^{r_1}, \quad x_3 \to x^{r_1 r_2}, \quad \dots, \quad x_n \to x^{r_1 \cdots r_{n-1}} 131\end{equation*} 132gives two univariate polynomials to multiply in $\bbZ[x]$ or $\bbZ_p[x]$. This Kronecker substitution is chosen so that it can be reversed to find $A \cdot B \in R[x_1,\dots,x_n]$ from the univariate product. The flint functions {\tt \_mpoly\_mul\_\{dense|array\}} implement such techniques. The {\tt dense} functions use the ordinary polynomial multiplication functions while the {\tt array} functions use a multiply and accumulate technique that might be better for semi-sparse polynomials. 133 134\subsection{Sparse multiplication in $\bbZ[x_1,\dots,x_n]$ or $\bbZ_p[x_1,\dots,x_n]$}\ 135 136Given $A = t_1 + \cdots + t_a, B = s_1 + \cdots + s_b, \in R[x_1,\dots,x_n]$, we need to calculate all products $t_i s_j$, sort them, and combine like terms. This is done using a heap in the functions {\tt \_mpoly\_mul\_johnson} as in \cite{Johnson}. The essential idea is to read off the product terms in order from a heap. The heap never needs to become too large if one uses the relations 137\begin{equation*} 138t_i s_j > t_{i+1} s_j, \quad t_i s_j > t_i s_{j+1}\text{.} 139\end{equation*} 140 141 142\section{Division} 143 144The techniques used for multiplication (Kronecker substitutions in the dense case and heaps in the sparse case) apply to division as well. 145 146 147\section{Powering} 148 149Implements a corrected version of an algorithm called FPS in \cite{FPS}. The basic idea is to map the problem to $R[x]$ via a Kronecker substitution and use a recursion for the coefficients of $f^k$ derived from 150\begin{equation*} 151f (f^k)' = k f' (f^k)\text{.} 152\end{equation*} 153Since solving for the coefficients of $f^k$ involves division, this requires some modification for $R=\bbZ_p$. 154 155 156 157\section{Interpolation} 158 159All of the interpolation methods for $f(x_1, \dots, x_n) \in R[x_1, \dots, x_n]$ require strict degree bounds $r_i$ with $\op{deg}_{x_i}(f) < r_i$. 160 161\subsection{Dense Newton Interpolation} 162 163Straightforward, variable-by-variable, recursive, dense interpolation. Number of probes to $f$ is $\prod_i r_i$. There is only one problem with this approach. 164\begin{itemize} 165\item insufficient evaluation points 166\end{itemize} 167 168 169\subsection{Sparse Zippel Interpolation} 170Similar to Newton interpolation, but we use the assumption that monomials don't disappear under evaluation. For example, suppose $r_x, r_y, r_z$ are the strict degree bounds. We first find $f(x,1,1)$ using dense interpolation with $r_x$ values of $x$, say $x_1, \dots, x_{r_x},$. If 171\begin{equation*} 172f(x,1,1) = x^5 + 2x^2 + 1\text{,} 173\end{equation*} 174we make the assumption that 175\begin{equation*} 176f(x,y,1) = a_1(y)x^5 + a_2(y)x^2 + a_3(y)\text{,} 177\end{equation*} 178and proceed to interpolate the $a_i(y)$ using dense univariate interpolation in $y$. We need $r_y$ values of $y$, say $y_1, \dots, y_{r_y}$. For each of these values $y=y_i$ we can find the coefficients (?) in 179\begin{equation*} 180f(x,y_i,1) = (?) x^5 + (?) x^2 + (?) 181\end{equation*} 182by plugging in \emph{three} random values of $x$ and solving the linear system. To find $f(x,y,1)$ at this point the number of probes to $f$ we have used is $r_x + 3r_y$, which is probably fewer than $r_x r_y$. 183 184Now suppose we obtain 185\begin{equation*} 186f(x,y,1) = y^2 x^5 + x^2 + y^7 x^2 + y^3. 187\end{equation*} 188Make the assumption 189\begin{equation*} 190f(x,y,z) = b_1(z)y^2 x^5 + b_2(z)x^2 + b_3(z)y^7 x^2 + b_4(z)y^3\text{,} 191\end{equation*} 192and interpolate the $b_i(z)$ using dense univariate interpolation in $z$. We need $r_z$ values of $z$, say $z_1, \dots, z_{r_z}$. For each of these values $z=z_i$ we can find the coefficients (?) in 193\begin{equation*} 194f(x,y,z) = (?)y^2 x^5 + (?)x^2 + (?)y^7 x^2 + (?)y^3 195\end{equation*} 196by plugging in \emph{four} random pairs of values of $(x,y)$ and solving the linear system. To find $f(x,y,z)$ at this point the number of probes to $f$ we have used is $r_x + 3r_y + 4r_z$, which is probably fewer than $r_x r_y r_z$. 197 198This approach has an additional problems. 199\begin{itemize} 200\item insufficient evaluation points 201\item inconsistent/underdetermined linear equations 202\item associated linear algebra costs 203\end{itemize} 204 205\subsection{Sparse Interpolation with the Berlekamp-Massey Algorithm} 206Given the strict degree bounds $r_i$, in order to interpolate $f(x_1, \dots, x_n)$ it suffices to interpolate $f(\xi, \xi^{r_1}, \xi^{r_1 r_2}, \dots, \xi^{r_1 \cdots r_{n-1}})$, which is a univarate with degree bound $\prod_i r_i$. If $t$ is the number of terms of $f$, then we can summarize the probe counts of the three methods. 207\begin{enumerate} 208\item dense: $\prod_i r_i$ 209\item zippel: approximately $t \cdot \sum_i r_i$. 210\item bma: $2t$. 211\end{enumerate} 212 213This approach has problems too. 214\begin{itemize} 215\item insufficient evaluation points 216\item costs of the associated linear algebra and discrete logarithms. 217\end{itemize} 218 219 220Since the presentation in \cite{BMAR} is overly complicated and does not deal with the half gcd, it seems reasonable to review the Berlekamp-Massey Algorithm here. Given a formal power series 221\begin{equation*} 222\frac{a_1}{x} + \frac{a_2}{x^2} + \frac{a_3}{x^3} + \cdots\text{,} \quad a_i \in \bbF 223\end{equation*} 224vanishing at $x = \infty$ and the fact that this power series represents a rational function, we are interested in computing this rational function. The following theorem says that we can use the extended euclidean algorithm and stop when the first remainder of degree $<\frac{n}{2}$ is obtained. 225 226\begin{theorem} 227Suppose that 228\begin{equation*} 229\frac{a_1}{x} + \frac{a_2}{x^2} + \frac{a_3}{x^3} + \cdots = -\frac{\bar{u}}{\bar{v}} 230\end{equation*} 231for some $\bar{u}, \bar{v} \in \bbF[x]$ with $\op{deg}(\bar{u}) < \op{deg}(\bar{v}) \le \frac{n}{2}$. Suppose further that 232\begin{equation} 233\label{equ_bma1} 234u x^{n} + v (a_1 x^{n-1} + a_2 x^{n-2} + \cdots + a_{n-1} x + a_{n}) = r 235\end{equation} 236for some $u, v, r \in \bbF[x]$ with $\op{deg}(u) < \op{deg}(v) \le \frac{n}{2}$ and $\op{deg}(r) < \frac{n}{2}$ and $\op{deg}(r) < \op{deg}(v)$. Then, 237\begin{equation*} 238\frac{\bar{u}}{\bar{v}} = \frac{u}{v}\text{.} 239\end{equation*} 240\end{theorem} 241\begin{proof} 242Dividing both sides of \eqref{equ_bma1} by $v x^{n}$ shows that 243\begin{equation*} 244\frac{\bar{u}}{\bar{v}} = \frac{u}{v} + O \left( \frac{1}{x^{n+1}}\right)\text{,} 245\end{equation*} 246which, on account of the degree bounds $\op{deg}(\bar{v}), \op{deg}(v) \le \frac{n}{2}$, proves the equality. 247\end{proof} 248 249This reconstruction may be applied to reconstruct an $f(\xi) = c_1 \xi^{e_1} + \cdots + c_t \xi^{e_t} \in \bbF[\xi]$ from the sequence of evaluation points 250\begin{equation*} 251a_i = f(\alpha^{s+i-1})\text{,} \quad \alpha \neq 0, \quad s \in \bbZ\text{,} 252\end{equation*} 253for in this case we have 254\begin{equation*} 255\frac{a_1}{x} + \frac{a_2}{x^2} + \frac{a_3}{x^3} + \cdots = \frac{c_1\alpha^{e_1 s}}{x - \alpha^{e_1}} + \cdots + \frac{c_t\alpha^{e_t s}}{x - \alpha^{e_t}}\text{.} 256\end{equation*} 257If this rational function is known and the $e_i$ can be found, then $f$ is known as well. 258 259The main problem with this approach is that the term bound $t$ is not known in advance. The approach we take is to calculate the $v$ in \eqref{equ_bma1} for some $n$ points $a_1, \dots, a_{n}$. Then, we add another $m$ points to form the sequence $a_1, \dots, a_{n+m}$ and calculate the corresponding $v'$. If $v=v'$, then it is likely that $v$ is the correct denominator. The extent to which previous computations may be reused is addressed in Theorem \ref{thm_nm}. 260We follow \cite{YAP} for the presentation of the half gcd. An elementary matrix is one of the form $\smat{0}{1}{1}{q}$ for $\deg(q) > 0$ and a regular matrix is a product of zero or more elementary matrices. The notation $U \overset{M}{\longrightarrow} V$ shall mean that $M$ is a regular matrix and $U=MV$. If $\deg(A)>\deg(B)$ then $\op{hgcd}(A,B)$ is defined (see \cite{YAP}) as the (unique) regular matrix $M$ such that 261\begin{gather*} 262\matto{A}{B} \overset{M}{\longrightarrow} \matto{C'}{D'}\text{,}\\ 263\deg(C') \ge \frac{\deg(A)}{2} > \deg(D')\text{.} 264\end{gather*} 265 266\begin{theorem} 267\label{thm_correctness} 268Suppose that 269\begin{align*} 270\matto{A_0}{B_0} &\overset{M}{\longrightarrow} \matto{A_0'}{B_0'}\\ 271\op{deg}(A_0') &> \deg(B_0')\\ 272\op{deg}(A_0) &\le 2 \op{deg}(A_0') 273\end{align*} 274Then, for any $A_1$, $B_1$ with $\deg(A_1),\deg(B_1) < m$, 275\begin{align*} 276\matto{A_0 x^m + A_1}{B_0 x^m + B_1} &\overset{M}{\longrightarrow} \matto{A'}{B'}\\ 277\op{deg}(A') &= m + \op{deg}(A_0')\\ 278\op{deg}(B') &\le m + \op{max}( \deg(B_0'), \deg(A_0')-1)\\ 279\op{deg}(B') &\le m + \op{max}( \deg(B_0'), \frac{\deg(A_0)}{2}-1) 280\end{align*} 281for some $A', B'$. 282\end{theorem} 283\begin{proof} 284This is a trivial rearrangement of Lemma 1 in \cite{YAP}. 285\end{proof} 286 287\begin{theorem} 288\label{thm_nm} 289Suppose $\deg(s_n) < n$, $\deg(s_m) < m$ and 290\begin{gather*} 291\matto{x^n}{s_n} \overset{M}{\longrightarrow} \matto{r_0}{r_1}\\ 292\deg(r_0) \ge \frac{n}{2} > \deg(r_1) 293\end{gather*} 294Then, a regular matrix $M'$ (and thus $r_0', r_1'$) such that 295\begin{gather*} 296\matto{x^{n+m}}{s_n x^m + s_m} \overset{M'}{\longrightarrow} \matto{r_0'}{r_1'}\\ 297\deg(r_0') \ge \frac{n+m}{2} > \deg(r_1') 298\end{gather*} 299may be calculated as follows. Define $A', B'$ by 300\begin{equation*} 301\matto{x^{n+m}}{s_n x^m + s_m} \overset{M}{\longrightarrow} \matto{A'}{B'} 302\end{equation*} 303It will be the case that $\deg(A') \ge \frac{n+m}{2}$. If $\frac{n+m}{2} > \deg(B')$, return with $M'=M$. Otherwise set $C = B'$, $D = \op{rem}(A',B')$ and $q=\op{quo}(A',B')$. Define $k := n + m - \deg(C)$. It will be the case that $0 < k \le \deg(C)$. Return with 304\begin{equation*} 305M' = M \cdot \mattt{0}{1}{1}{q} \cdot \op{hgcd} \matto{\op{quo}(C,x^k)}{\op{quo}(D,x^k)} 306\end{equation*} 307\end{theorem} 308\begin{proof} 309By Theorem \ref{thm_correctness}, $\deg(A') = m + \deg(r_0) \ge m + \frac{n}{2} \ge \frac{n+m}{2}$. Now suppose $\frac{n+m}{2} \le \deg(B')$, from which the assertion $k \le \deg(C)$ follows automatically. By Theorem \ref{thm_correctness}, $\deg(B') \le m + \max(\deg(r_1), \deg(r_0) - 1) < m + n$. Thus, the assertion $0 < k$ is proved. Finally, suppose 310\begin{gather*} 311\matto{C_0 := \op{quo}(C,x^k)}{D_0 := \op{quo}(D,x^k)} \overset{H}{\longrightarrow} \matto{C_0'}{D_0'}\text{,}\\ 312\deg(C_0') \ge \frac{\deg(C_0)}{2} > \deg(D_0')\text{.} 313\end{gather*} 314If $C', D'$ are defined by 315\begin{equation*} 316\matto{C}{D} \overset{H}{\longrightarrow} \matto{C'}{D'}\text{,} 317\end{equation*} 318it suffices to prove that $\deg(C') \ge \frac{n+m}{2} > \deg(D')$. By Theorem \ref{thm_correctness}, 319\begin{align*} 320\deg(C') &= k + \deg(C_0')\\ 321 &\ge k + \frac{\deg(C_0)}{2}\\ 322 &= k + \frac{\deg(C) - k}{2}\\ 323 &= \frac{n+m}{2}\text{.} 324\end{align*} 325Also by Theorem \ref{thm_correctness}, 326\begin{align*} 327\deg(D') &\le k + \max(\deg(D_0'), \frac{\deg(C_0)}{2} - 1)\\ 328 & < k + \max(\frac{\deg(C_0)}{2}, \frac{\deg(C_0)}{2})\\ 329 &= \frac{n+m}{2}\text{.} 330\end{align*} 331\end{proof} 332 333 334 335\section{Greatest Common Divisor} 336 337\subsection{Dense GCD in $\bbZ_p[x_1,\dots,x_n]$}\ 338Brown's algorithm \cite{Brown} is used here. This comes in two versions - a small prime version and a large prime version. These refer not to the size of the $p$'s involved, but rather to the field from which evaluation points are chosen: it can either be $\bbF_p$ or an extension of $\bbF_p$. The small prime version interpolates in each variable by choosing evaluation points from $\bbF_p$. If this fails, then the large prime method uses interpolation in $\bbF_p/(f(x_n))[x_1,\dots,x_{n-1}]$, i.e. $\bbF_q[x_1,\dots,x_{n-1}]$, for sufficiently many irreducible $f(x) \in \bbZ_p[x]$. No explicit divisibility checks need to be performed because the cofactors are reconstructed along with the GCD. 339 340\subsection{Dense GCD in $\bbZ[x_1,\dots,x_n]$}\ 341We simply reconstruct the GCD from its image in $\bbZ_p[x_1,\dots,x_n]$ for sufficiently many $p$. Only large $p$'s are used, and dense GCD's in $\bbZ_p[x_1,\dots,x_n]$ only use the small prime version. Each image GCD in $\bbZ_p$ is correct and Brown's coefficient bounds \cite{Brown} are used instead of a divisibility check. Some pseudocode is Section \ref{Pseudocode}. 342 343\subsection{Sparse GCD in $R[x_1,\dots,x_n]$}\ 344Assuming that we have a gcd algorithm for $R[x_1,\dots,x_m]$, we can view the inputs as elements of $R[x_1,\dots,x_m][x_{m+1},\dots,x_n]$ and use interpolation to extend this algorithm from $m$ variables to $n$ variables. Brown's algorithm corresponds to taking $m=n-1$, using univariate interpolation for the extension of $n-1$ variables to $n$ variables, and recursively solving the $n-1$ variable gcd problem with the Euclidean algorithm as the base case. Taking $m=1$ gives Zippel's approach \cite{ZIPPEL}. 345If the inputs are made primitive with respect to $x_1,\dots,x_m$ by factoring out polynomials in $R[x_{m+1},\dots,x_n]$, the gcd of the leading coefficients of the input with respect to $x_1,\dots,x_m$ may be imposed as the leading coefficient of the interpolated gcd. Finally, if the primitive part with respect to $x_1,\dots,x_m$ of this interpolated gcd divides both inputs, it must be the true gcd. 346 347Of note here is an algorithm stated slightly incorrectly in \cite{SULING} and \cite{LINZIP}; The basic idea is to reconstruct the correct leading term of the gcd $\in R[x_1,\dots,x_m]$ using some linear algebra directly instead of constructing some known multiple and then removing content. This is in {\tt fmpz\_mpolyl\_gcd\_zippel} and a rough overview is: 348\ \\ 349{\tt fmpz\_mpolyl\_gcdm\_zippel}($A, B \in \bbZ_p[x_1,\dots,x_n][X]$, $n \ge 1$):\\ 350\indent $\left[\begin{tabular}{l} 351The GCD is assumed to have no content w.r.t. $X$ (content in $\bbZ[x_1, \dots, x_n]$)\\ 352Pick a prime $p$ and call {\tt nmod\_polyl\_gcdp\_zippel} to get an probable image of $G \mod p$ \\ 353Assume that the true gcd $G$ over $\bbZ$ has the same monomials as this image mod $p$.\\ 354Pick more primes $p$ and call {\tt nmod\_mpolyl\_gcds\_zippel} to get more images of $G \mod p$. \\ 355Combine the images via chinese remaindering and test divisibility. 356\end{tabular}\right.$ 357 358\ \\ 359The ``p'' versions produce a correct gcd when the inputs have no content in $\bbF_p[x_1,\dots,x_n]$. 360\ \\ 361{\tt nmod\_mpolyl\_gcdp\_zippel}($A, B \in \bbF_p[x_1,\dots,x_n][X]$, $n \ge 1$):\\ 362\indent $\left[\begin{tabular}{l} 363If the GCD has content w.r.t. $X, x_1, \dots, x_1$ (content in $\bbF_p[x_n]$), fail.\\ 364Pick an evaluation point $x_n \to \alpha$ for $\alpha \in \bbF_p$.\\ 365(1) Call {\tt nmod\_mpolyl\_gcdp\_zippel} recursively on the evaluated inputs in $\bbF_p[x_1,\dots,x_{n-1}][X]$.\\ 366Record the form $f$ of the GCD obtained for step (2) below.\\ 367Pick severial evaluation points $x_n \to \alpha$ for $\alpha \in \bbF_q$.\\ 368(2) Call {\tt [fq\_]nmod\_mpoly\_gcds\_zippel} on the evaluated inputs in $\bbF_q[x_1,\dots,x_{n-1}][X]$.\\ 369Combine the answer from (1) and the answers from (2) via interpolation in $x_n$.\\ 370Check divisibility on the proposed interpolated GCD. 371\end{tabular}\right.$ 372 373\ \\ 374The ``s'' versions are the heart of Zippel's sparse interpolation. 375\ \\ 376{\tt nmod\_mpolyl\_gcds\_zippel}($A, B \in \bbF_q[x_1,\dots,x_n][X]$, assumed monomial form $f$ of gcd):\\ 377\indent $\left[\begin{tabular}{l} 378Via evaluations of the form $(x_1,\dots,x_n) \to (\alpha_1,\dots,\alpha_n) \in \bbF_p^n$,\\ and GCD computations in $\bbF_p[X]$, and linear algebra, try to compute the coefficients \\of the assumed form $f$ to match the GCD of the inputs (up to scalar multiples in $\bbF_p$). 379\end{tabular}\right.$ 380 381\subsection{PRS} 382The PRS algorithm works over any gcd domain $R$. It starts with a primitive input with respect to some main variable and calculates a pseudo gcd with a pseudo remainder sequence. Content is removed from the pseudo gcd to produce the true gcd by a recursive call. The final content can be computed without expensive recursive calls to gcd in the case when we know the leading or trailing coefficient in the main variable must be a monomial in the remaining variables. 383 384This algorithm has been discarded because it is so bad but may be reintroduced for low degrees. 385 386\subsection{Hensel Lifting} 387The gcd can also be calculated using Hensel lifting \cite{EZGCD}. The gcd of the resulting univariates when all variables but one are substituted away gives two factorizations which can be lifted to obtain the multivariate gcd. 388 389 390\section{Factorization} 391 392 393\subsection{Squarefree Factorization in $K[x_1,\dots,x_n]$} 394\label{section_sqfr} 395By taking derivatives and greatest common divisors, we may assume that the input polynomial is squarefree and primitive with respect to each variable. Thus in characteristic zero the input polynomial $f \in K[x_1,\dots,x_n]$ may be assumed to satisfy 396\begin{equation*} 397\forall_i \quad f_{x_i} \neq 0 \quad \text{and} \quad \gcd(f,f_{x_i}) = 1\text{.} 398\end{equation*} 399Over a finite field ($K=\mathbb{F}_q$) of characteristic $p$, we have the slightly weaker conditions 400\begin{equation} 401\label{sqrfp_cond} 402\begin{alignedat}{3} 403&f_{x_1} \ne 0 \quad &\text{and}& \quad \gcd(f,f_{x_1}) = 1\\ 404\forall_{i>1} \quad &f_{x_i} = 0 \quad &\text{or}& \quad \gcd(f,f_{x_i}) = 1 405\end{alignedat} 406\end{equation} 407 408While we could apply the factorization algorithms directly to this $f$ with $x_1$ as the main variable, it is possible to a bit better when some of the other derivatives vanish. 409 410\begin{theorem} 411With the assumption \ref{sqrfp_cond} on $f$ and prime powers $p^{e_2},\dots,p^{e_n}$ and a deflated polynomial $g$ with 412\begin{equation*} 413g(x_1,x_2^{p^{e_2}},\dots, x_n^{p^{e_n}}) = f(x_1,x_2,\dots,x_n)\text{,} 414\end{equation*} 415the factorization of $f$ is the inflated factorization of $g$. 416\end{theorem} 417The proof follows by induction from the following lemma: the polynomials $g(x_1,x_2, x_3,\dots, x_n)$ and $g(x_1,x_2^p, x_3,\dots, x_n)$ have the same factorization (up to inflation $x_2 \to x_2^p$). 418\begin{lemma} 419If $p = \operatorname{char}(K) > 0$ and $f(x,y) \in K[x,y] \setminus (K[x] \cup K[y])$ is irreducible and $f(x^p,y)$ is squarefree, then $f(x^p,y)$ is irreducible. 420\end{lemma} 421\begin{proof} 422Suppose that $f(x^p,y)=g(x,y)h(x,y)$ for $g,h \not \in K$. Since $f(x^p,y)$ is squarefree, $g$ and $h$ are squarefree, and there are $s, t \in K(y)[x]$ with $1=sg+th$. By differentiating $f(x^p,y)=g(x,y)h(x,y)$, we obtain $0=h g_x + g h_x$, which when combined with $1=sg+th$ gives $h(t h_x - s g_x)=h_x$. This implies that $h_x=0$ and in turn that $g_x=0$, which implies that $f(x,y)$ is reducible, a contradiction. 423\end{proof} 424 425\subsection{Factorization in $R[x]$} 426\subsubsection{Quadratic over characteristic $\ne 2$} 427The primitive polynomial $ax^2+bx+c$ factors if and only if $b^2-4ac$ is a square in $R$, in which case the factors are the primitive parts of $2ax+b\pm \sqrt{b^2-4ac}$. 428 429\subsubsection{Quadratic in $R[X]$ for $R=\mathbb{F}_{2^k}[x_1,\dots,x_n]$} 430 431We wish to determine if $X^2+AX+B$ has a root in $R$. Since $X_0+A$ is a root if $X_0$ is, at least one of the two roots does not have $\operatorname{lt}(A)$ as a term. (It very well may be the case that both roots have a monomial matching $\operatorname{lm}(A)$, but then both corresponding coefficients must be different from the leading coffcient of $A$). Therefore, we make the important assumption that \emph{we are searching for a root $X_0$ with $\operatorname{lt}(A)$ not a term of $X_0$}. Let $m$ denote the leading term of $X_0$. By taking leading terms in $X_0^2+AX_0+B$ and applying the assumption, we have 432\begin{equation*} 433\operatorname{lt}(m^2+\operatorname{lt}(A)m) = \operatorname{lt}(B)\text{,} \quad \text{and} \quad m \neq \operatorname{lt}(A)\text{.} 434\end{equation*} 435For any specific given terms $\operatorname{lt}(A)$, $\operatorname{lt}(B)$, this equation is easy to solve for $m$ or to determine that there is no solution. 436\begin{align*} 437\operatorname{lm}(m)>\operatorname{lm}(A)&: \quad m = \sqrt{\operatorname{lt}(B)}\\ 438\operatorname{lm}(m)=\operatorname{lm}(A)&: \quad m = \zeta /\operatorname{lc}(A)\sqrt{\operatorname{lm}(B)}\text{,} \quad \zeta^2+\zeta =\operatorname{lc}(B)/\operatorname{lc}(A)^2\\ 439\operatorname{lm}(m)<\operatorname{lm}(A)&: \quad m = \operatorname{lt}(B)/\operatorname{lt}(A) 440\end{align*} 441 442Once $m$ is found, the equation satisfied by $X_0-m$ has the same $A$ and a new $B$ with a smaller leading monomial. In this way the solution may be written down in order, and this process is a simplification of Sections 4 and 5 in \cite{QuadraticFactor}, which does not present a sparse algorithm due to the many (possibly disastrous) divisions performed. The quadratic $\zeta^2+\zeta+c \in \mathbb{F}_{2^k}$ has a root if and only if $\operatorname{Tr}(c)=0$, in which case $c=\sum_{i=1}^{k-1}c^{2^i}\sum_{j=0}^{i-1}u^{2^j}$ is a root where $u$ is any element of $\mathbb{F}_{2^k}$ with $\operatorname{Tr}(u)=1$. If $\mathbb{F}_{2^k} = \mathbb{F}_2[\theta]/P(\theta)$, then $u=1/(\theta P'(\theta))$ will do. 443 444\subsubsection{Cubic over $\mathbb{Z}$} 445To factor a cubic over $\mathbb{Z}$, we first find the roots over the more friendly ring $\mathbb{Z}_2$ and then test these roots over $\mathbb{Z}$. Since it is easy to bound the roots over $\mathbb{Z}$, the roots over $\mathbb{Z}_2$ only need to be calculated to some finite precision $p$, that is, to order $O(2^p)$. 446 447Factor $x^3+2^\alpha ax+2^\beta b $ over $\mathbb{Z}_2$ where $\alpha, \beta\geq 0$ and $a, b$ are odd integers: 448\begin{enumerate} 449 \item $2\beta=3\alpha$: irreducible, as replacing $x\leftarrow 2^{\beta/3}y$ has no roots modulo $2$ for $y$. 450 \item $ 2\beta< 3\alpha$: 451 \begin{enumerate} 452 \item $3\nmid \beta$: irreducible as all roots have valuation $\beta/3$. 453 \item $3\mid \beta$: Replacing $x\leftarrow 2^{\beta/3}y$ gives $y^3+2^{\alpha-2\beta/3}a y+b=0$, which factors as $(y^2+y+1)(y+1)=0$ modulo $2$. Hence there is a unique root in $\mathbb{Z}_2$, and this root has valuation $\beta/3$. 454 \end{enumerate} 455 \item $2\beta > 3\alpha$: Replacing $x\leftarrow 2^{\beta-\alpha}y$ gives $2^{2\beta-3\alpha}y^3+ay+b=0$, which has $y=1$ mod $2$ as a root. This gives a factorization 456 $$2^{2\beta-3\alpha}y^3+ay+b=(y+r)(2^{2\beta-3\alpha}y^2-2^{2\beta-3\alpha}ry+s)$$ for some odd $r, s\in \mathbb{Z}_2$. This becomes 457 $$ (x+2^{\beta-\alpha}r)(x^2-2^{\beta-\alpha}rx+2^\alpha s)=0$$ 458 \begin{enumerate} 459 \item $2\nmid \alpha$: quadratic is irreducible and $-2^{\beta-\alpha}r$ is the only root. 460 \item $2\mid \alpha$: assuming the square roots exist, the roots of the quadratic, which have valuation $\alpha/2$, are 461 \begin{equation*} 462 2^{\beta-\alpha-1}r \pm 2^{\alpha/2} \sqrt{2^{2\beta-3\alpha-2}r^2-s} 463 \end{equation*} 464 If $r$ and $s$ are calculated to some absolute precision $O(2^p)$, then this expression is also known to absolute precision $O(2^p)$ except when $\alpha=0$ and $\beta=1$, in which case the square root loses more than one bit of precision. 465 \end{enumerate} 466\end{enumerate} 467 468 469\subsection{Factorization in $K[x,y]$} 470For $K=\mathbb{Q}$, an irreducible bivariate polynomial $f(x,y)$ remains irreducible modulo $y=y_0$ for a generic $y_0 \in \mathbb{Q}$. Hence, all of the difficult recombination may be pushed to the univariate factorization. When $K=\mathbb{F}_q$ the recombination in \cite{GlobalFactor} is necessary. 471\subsubsection{Bivariate factorization over $\mathbb{Q}$} 472We begin with $f(x,y)$ satisfying 473\begin{enumerate} 474\item $f(x,y) \in \mathbb{Z}[x,y]$ and $f(x,0) \in \mathbb{Z}[x]$ are squarefree so that we can lift. 475\item $f(x,y)$ is primitive with respect to $x$ (i.e. $\op{cont}_x(f) \in \mathbb{Z}[y]$ is $1$) so that any factor is also primitive with respect to $x$. 476\item $\op{deg}_x (f(x,y)) = \op{deg}_x (f(x,0))$ (i.e. $\op{lc}_x(f)$ does not vanish at $y=0$) so that we can make $f$ monic. 477\end{enumerate} 478Let 479\begin{equation*} 480\tilde{f}(x,y) = f(x,y) / \op{lc}_x (f(x,y)) \in \mathbb{Q}[[y]][x] 481\end{equation*} 482be the monic version of $f$ computed to precision $O(y^{1 + \op{deg}_y f})$. We can factor $\tilde{f}(x,0) \in \mathbb{Q}[x]$ by a univariate algorithm and lift the factors to produce an irreducible factorization in $\mathbb{Q}[[y]][x]$ as 483\begin{equation*} 484\tilde{f}(x,y) = \prod_{i=1}^{l} \tilde{f}_i(x,y)\text{.} 485\end{equation*} 486The $\tilde{f}_i(x,y)$ are also monic and need to be computed to precision $O(y^{1 + \op{deg}_y f})$. For each subset $S$ of $\{1,\dots, l\}$, we then have the candidate true factor 487\begin{equation*} 488\op{ppart}_x \left(\op{lc}_x(f) \prod_{i \in S} \tilde{f}_i(x,y) \right)\text{,} 489\end{equation*} 490where, before taking the primitive part, the elements of $\mathbb{Q}[[y]][x]$ must be mapped to $\mathbb{Q}[y][x]$ via remainder upon division by $y^{1 + \op{deg}_y f}$. Since we are only interested in candidate factors over $\mathbb{Z}$, $\mathbb{Q}$ may be replaced by $\mathbb{Z}/p^k \mathbb{Z}$ for appropriate $p^k$ (in particular $p \nmid \op{lc}_x (f(x,0))$). The coefficients in $\mathbb{Z}/p^k \mathbb{Z}$ must then be mapped to $\mathbb{Z}$ via the symmetric remainder before taking the primitive part. 491 492\subsubsection{Bivariate Factorization over $\mathbb{F}_q$} 493We begin with $f(x,y) \in \mathbb{F}_q[x,y]$ and an irreducible $\alpha(y) \in \mathbb{F}_q[y]$ (with $\mathbb{F}_{q^k} := \mathbb{F}_q[y]/\alpha(y)$) such that 494\begin{enumerate} 495 \item $\alpha(y)$ does not divide $\op{lc}_x f(x,y)$ so that we can make $f$ monic. 496 \item $f(x,y) \bmod \alpha(y) \in \mathbb{F}_{q^k}[x]$ is squarefree so that we can lift. 497 \item $f(x,y)$ is primitive with respect to $x$. 498\end{enumerate} 499 500The irreducible factorization of $f(x,y) \bmod \alpha(y)$ can be lifted to a monic factorization in $\mathbb{F}_q[[\alpha(y)]][x]$. With the help of some linear algebra over $\mathbb{F}_p$ these factors can be recombined into true factors. 501 502 503\subsection{Factorization in $R[x_1,\dots,x_n][X]$} 504Factoring of a multivariate squarefree primitive polynomial $f$ over $R[x_1,...,x_n][X]$ (satisfying the assumptions of Section \ref{section_sqfr} works by reducing $f$ modulo the ideal 505\begin{equation*} 506\langle x_1 = \alpha_1, x_2 = \alpha_2, \dots, x_n = \alpha_n \rangle 507\end{equation*} 508for some $\alpha_i \in R$, factoring the resulting univariate into, say, $r$, factors, and then lifting the univariate factorization to a multivariate factorization. The evaluation points must be good in the sense that $f(\alpha_1, \dots, \alpha_n, X)$ is squarefree and has the same degree as $f(x_1, \dots, x_n, X)$ in $X$. This lifting process does not change the leading coefficients in $X$, hence it is necessary that the leading coefficients be ``correct" before the lifting. In the most general setting, we can determine $d_i \in R[x_1,...,x_n]$, such that it is known that $d_i$ divides the leading coefficient of the $i$-th lifted factor. Then, before lifting, we compute $m=\operatorname{lc}_X(f)/(d_1 \cdots d_r)$, impose a leading coefficient of $d_i m$ on the $i$-th factor, and multiply $f$ by $m^{r-1}$. If the lifting succeeds, then the actual factors can be obtained by taking principle parts. Doing no work to precompute leading coefficients corresponds to taking $d_i=1$, which can obviously lead to large swells. 509 510\subsubsection{Wang's leading coefficient computation} 511Wang \cite{WANG} has a good solution to the leading coefficient problem over $\mathbb{Z}$. The idea can be illustrated by a simple example. 512\begin{equation*} 513(2x_1^3 x_2+2x_1^3 x_2)X^2 + \cdots = (2x_1(x_1+x_2)X+x_1)(x_1 x_2 X + 6) 514\end{equation*} 515First the irreducible factorization of the leading coefficient is computed 516\begin{equation*} 517(2x_1^2 x_2+2x_1^2 x_2)=2x_1^2x_2(x_1+x_2) 518\end{equation*} 519Next, an evaluation point $x_i=\alpha_i$ such that there exists primes $p_i$ such that 520\begin{align*} 521&p_3 \mid \alpha_1 + \alpha_2, \quad p_3 \nmid \alpha_2, \quad p_3 \nmid \alpha_1,\\ 522&p_2 \mid \alpha_2, \quad p_2 \nmid \alpha_1,\\ 523&p_1 \mid \alpha_1 524\end{align*} 525Lets take $\alpha_1=10, \alpha_2=14$ and $p_1=5, p_2=7, p_3=3$. The univariate factorization comes out as 526\begin{equation*} 52720(48X+1)(70X + 3) 528\end{equation*} 529What is of interest here is the leading coefficients of the primitive factors over $\mathbb{Z}$. From $p_3=3$ we can correctly distribute $x_1+x_2$ to the first multivariate factor. From $p_2=7$ we can distribute $x_2^2$ to both factors, and from $p_1=5$, we can distribute $x_1$ to the second factor. 530 531When $R$ is a finite field, there is no useful notion of ``prime''. Furthermore, the probability that an irreducible univariate factorization can be lifted to a multivariate factorization is low and sometimes zero. Hence this does not work as stated. One may replace $R$ by $R[Y]$ for an auxiliary indeterminate $Y$ and consider polynomial substitutions of the form 532\begin{align*} 533x_1 &= \alpha_1 + \beta_1 Y + \gamma_1 Y^2 + \cdots\\ 534x_2 &= \alpha_2 + \beta_2 Y + \gamma_2 Y^2 + \cdots\\ 535&\cdots\\ 536x_n &= \alpha_n + \beta_n Y + \gamma_n Y^2 + \cdots\text{.} 537\end{align*} 538The base case factorization is now not $R[X]$ but $R[Y][X]$. The points $\alpha_1, ..., \alpha_n$ still need to be good because the lifting will ultimately begin with univariates. However, the univariate factors come not from an irreducible univariate factorization, but from the $Y=0$ image of a bivariate factorization, which should greatly increases the changes of success in the lifting. 539 540\subsubsection{Kaltofen's leading coefficient computation} 541In this recursive approach \cite{KALTOFEN}, after substituting away all but \emph{two} of the variables, the bivariate polynomial is factored and the leading coefficients of the bivariate factors can be lifted against the leading cofficient of the original polynomial. Since only squarefree lifting is implemented, it is actually the squarefree parts of everything that are lifted. 542 543\subsubsection{Dense Hensel lifting} 544Some pseudocode is Section \ref{Pseudocode}. Of note here is that when lifting over $\mathbb{Z}$, we do not lift over $\mathbb{Z}/p^k\mathbb{Z}$ as Wang \cite{WANG} advises but do the lifting directly over $\mathbb{Z}$. 545 546\subsubsection{Sparse Hensel lifting} 547 548 549\section{Absolute Factorization} 550 551The goal of absolute factorization is to take an irreducible $f \in R[x_1, \dots, x_n]$ and either determine that $f$ is absolutely irreducible or provide a factorization 552\begin{equation*} 553f = g h \text{,} \quad g, h \in R'[x_1, \dots, x_n] 554\end{equation*} 555where $g$ is absolutely irreducible. $h$ may or may not be absolutely irreducible: it is simply the product of the rest. 556 557\subsection{Absolute Irreduciblity Testing} 558Here we follow Gao \cite{GAO}. For a multivariate polynomial $f = \sum_{\bold{i} \in \mathbb{Z}^n}{c_{\bold{i}} \, \pmb{x}^{\bold{i}}}$, the Newton polygon $N(f)$ is defined to be the convex hull of $\{\bold{i} \in \mathbb{Z}^n | c_{\bold{i}} \ne 0\}$ in $\mathbb{R}^n$. 559Since $N(fg) = N(f) + N(g)$ where $+$ denotes the Minkowski sum, if $N(f)$ is indecomposable, then $f$ is absolutely irreducible. Although indecomposability testing is hard, Gao gives a reasonable algorithm in two dimensions \cite{GAO2}, that is, for bivariate polynomials, and projects higher dimensional polytopes onto a two-dimensional ``shadow'' to test them for indecomposability. 560 561If $f \in K[\pmb{x}]$ happens to be irreducible over $K$ but not over the algebraic closure $\overline{K}$, then $N(f)$ will never be sufficient to prove the irreducibility over $K$. In the case that we are able to prove that $f$ is irreducible over $K$ using other methods, $N(f)$ can still be used to obtain some information on the degree of an extension of $K$ needed to factor $f$ absolutely. An absolute factorization of an irreducible $f(\pmb{x}) \in K[\pmb{x}]$ looks like 562\begin{equation*} 563f(\pmb{x}) = \operatorname{resultant}_{\alpha} (u(\alpha), g(\alpha, \pmb{x})) 564\end{equation*} 565for some irreducible $u(\alpha) \in K[\alpha]$ of degree, say, $m$. Since all $m$ of the $g(\alpha, \pmb{x})$ have the same Newton polygon, it follows that $N(f)=m\cdot N(g)$, and thus $m$ divides the coordinates of every vertex in $N(f)$. This can severely limit the possibilities for the extension degree required for an absolute factorization. 566 567\subsection{Bivariate Absolute Factorization over $\mathbb{Q}$} 568The idea here is that an absolutely irreducible $g(x,y) \in \overline{\mathbb{Q}}[x,y]$ remains absolutely irreducible in $\overline{\mathbb{F}}_p[x,y]$ for generic $p$. 569 570Assume that $f(x,y) \in \mathbb{Q}[y][x]$ is irreducible. Pick a good $\alpha \in \mathbb{Q}$ and a good rational prime $p$. The definition of ``good'' is that none of the following steps or assumptions fail. Determine an $\mathbb{F}_q = \mathbb{F}_{p^?}$ such that $f(x,\alpha)$ splits completely into distinct linear irreducibles: 571\begin{equation*} 572\frac{f(x,\alpha)}{\operatorname{lc}_x(f(x,y)) |_{y=\alpha}} = \prod_i x - r_i \text{ in } \mathbb{F}_q[x]\text{.} 573\end{equation*} 574Lift this to power series: 575\begin{equation*} 576\frac{f(x,y)}{\operatorname{lc}_x(f(x,y))} = \prod_i x - r_i(y) \text{ in } \mathbb{F}_q[[y-\alpha]][x]\text{.} 577\end{equation*} 578Do some linear algebra to recombine the factors into a real factorization: 579\begin{equation*} 580f(x,y) = \prod_j g_j(x,y) \text{ in } \mathbb{F}_q[y][x]\text{.} 581\end{equation*} 582The $g_i(x,y) \in \mathbb{F}_q[y][x]$ are absolutely irreducible. It might be possible to reduce the size of $q$ at this point. 583 584We then try to lift this to a factorization in $\mathbb{Q}_q[y][x]$: 585\begin{equation*} 586f(x,y) = \prod_j \widetilde{g}_j(x,y) \text{ in } \mathbb{Q}_q[y][x]\text{.} 587\end{equation*} 588In order to attemp this lift the $\operatorname{lc}_x(\widetilde{g}_j(x,y)) \in \mathbb{Q}_q[y]$ must be correct before starting. Assume $\operatorname{lc}_x(f(x,y))$ is monic in $y$, and that its squarefree part remains squarefree modulo $p$. Then, the squarefree factors of the $\operatorname{lc}_x \widetilde{g}_j(x,y)$ can be lifted and we can recover the monic $\operatorname{lc}_x(\widetilde{g}_j(x,y)) \in \mathbb{Q}_q[y]$. 589 590Finally, we map $\widetilde{g}_1(x,y)$ to some number field $K[x,y]$ (so that the other $\widetilde{g}_j(x,y)$ are its conjugates) and test divisibility $g_1|f$. 591 592\subsection{Bivariate Absolute Factorization over $\mathbb{F}_q$} 593 594 595 596\subsection{Multivariate Absolute Factorization} 597For absolutely factoring an irreducible in $R[x_1,\dots,x_n][X]$, the plan is to substitute good auxiliary polynomials 598\begin{align*} 599x_1 &= \alpha_1 + \beta_1 Y + \gamma_1 Y^2 + \cdots\\ 600x_2 &= \alpha_2 + \beta_2 Y + \gamma_2 Y^2 + \cdots\\ 601&\cdots\\ 602x_n &= \alpha_n + \beta_n Y + \gamma_n Y^2 + \cdots\text{,} 603\end{align*} 604and absolutely factor the resulting bivariate in $R[X,Y]$, and then lift the $Y=0$ images of the two factors back to a multivariate factorization. This would require the fact that an absolutely irreducible multivariate remains an absolutely irreducible bivariate under a generic substitution of this form. 605 606\begin{thebibliography}{99} 607\bibitem{Brown} W. S. Brown. On Euclid’s Algorithm and theComputation of Polynomial Greatest Common Divisors. J. ACM 18 (1971), 478-504. 608 609\bibitem{Johnson} Johnson, S.C., 1974. Sparse polynomial arithmetic. ACM SIGSAM Bulletin 8 (3), pp. 63--71. 610 611\bibitem{FPS} Monagan M., Pearce R.: Sparse polynomial powering using heaps. “Computer Algebra in Scientific Computing”, Springer, 2012, s.236-247. 612 613\bibitem{ZIPPEL} Zippel, Richard, Probabilistic algorithms for sparse polynomials. Lecture Notes in Computer Science. 72. pp. 216--226, 1979 614 615\bibitem{LINZIP} J. de Kleine, M. Monagan and A. Wittkopf, Algorithms for the non-monic case of the sparse modular GCD algorithm. Proceedings of ISSAC ’05, ACM Press, pp. 124--131, 2005. 616 617\bibitem{SULING} Yang, Suling. Computing the Greatest Common Divisor of Multivariate Polynomials over Finite Fields. http://www.cecm.sfu.ca/CAG/theses/suling.pdf 618 619\bibitem{BMAR} The Berlekamp-Massey Algorithm revisited, N. B. Atti, G. M. Diaz–Toca, H. Lombardi, 9 March 2006 620 621\bibitem{YAP} A Unified Approach to HGCD Algorithms for polynomials and integers by Klaus Thull , Chee K. Yap 622 623\bibitem{GlobalFactor} Factoring polynomials over global fields Belabas, Karim; van Hoeij, Mark; Klüners, Jürgen; Steel, Allan Journal de théorie des nombres de Bordeaux, Volume 21 (2009) no. 1, p. 15-39 624 625\bibitem{EZGCD} P. S. Wang, The EEZ-GCD Algorithm, ACM SIGSAM Bulletin 14, pp. 50--60, 1980 626 627\bibitem{WANG} P. S. Wang, An improved multivariate polynomial factoring algorithm. Mathematics of Computation 32, no. 144, 1215--1231, 1978 628 629\bibitem{GAO} S. Gao, Absolute irreducibility of polynomials via Newton polytopes, Journal of Algebra 630237 (2001), 501--520. 631 632\bibitem{GAO2} S. Gao and A.G.B. Lauder, Decomposition of polytopes and polynomials, Discrete and Computational Geometry 26 (2001), 89--104. 633 634\bibitem{QuadraticFactor} Jørgen Cherly, Luis Gallardo, Leonid Vaserstein and Ethel Wheland: Solving Quadratic Equations over Polynomial Rings of Characteristic Two. Publicacions Matemàtiques, Vol. 42, No. 1 (1998), pp. 131-142 635 636\bibitem{KALTOFEN} E. Kaltofen. Sparse Hensel lifting. In EUROCAL 85 European Conf. Comput. Algebra Proc. Vol. 2, pages 4–17, 1985 637\end{thebibliography} 638 639\section{Pseudocode} 640\label{Pseudocode} 641 642\subsection{gcd} For the dense gcd over finite fields, if one runs out of primes of the form $x-\alpha$, instead of failing it is possible to use any irreducible polynomial in place of $x-\alpha$ in Algorithm \ref{algo_brownp}, and this would constitute the large prime version of the algorithm. 643 644\begin{algorithm}[H] 645 \DontPrintSemicolon 646 \KwIn{ 647 \begin{enumerate} 648 \item $A,B \in \mathbb{F}_q[x][x_1, \dots, x_n]$ neither is zero 649 \end{enumerate} 650 } 651 \KwOut{ 652 \begin{enumerate} 653 \item monic $G= \op{gcd}(A,B)$, $\bar{A}=A/G$, $\bar{B}=B/G$ 654 \end{enumerate} 655 } 656 \lIf{$n=0$}{\textbf{return} using univariate arithmetic} 657 set $cA= \op{cont}_{x_1,\dots, x_n}(A)$ and $ cB=\op{cont}_{x_1, \dots, x_n}(B) \in \mathbb{F}_p[x]$\; 658 set $A= A/cA$ and $B=B/cB$ \tcp*{content $cA, cB, \dots$ is always monic} 659 set $cG= \op{gcd}(cA, cB)$, $c\bar{A}=cA/cG$ and $c\bar{B}=cB/cG$\; 660 set $\gamma=\op{gcd}(\op{lc}_{x_1,\dots, x_n}(A),\op{lc}_{x_1,\dots, x_n}(B))\in \mathbb{F}_q[x]$\; 661 set $bound= 1+ \op{deg}_{x}\gamma+ \max(\op{deg}_x(A), \op{deg}_x(B))$, 662 and set $m=1\in \mathbb{F}_p[x]$\; 663 \texttt{pick a prime}: \tcp*{primes are $(x-\alpha)$} 664 665 choose a new $\alpha\in \mathbb{F}_q$ else \textbf{return} FAIL\; 666 set $\gamma^*=\gamma\op{mod} {(x-\alpha)}$\; 667 set $A^*=A \op{mod} (x-\alpha)$ and $B^*=B\op{mod} (x-\alpha)\in \mathbb{F}_q[x_n][x_1,\dots,x_{n-1}]$\; 668 \lIf{$\gamma^*=0$}{\textbf{goto} \texttt{pick a prime}} 669 set $(G^*,\bar{A}^*, \bar{B}^*)= \textbf{brownp}(A^*,B^*)$ or \textbf{goto} \texttt{pick a prime} if the call failed\; 670 \lIf{$G^*=1$} 671 {set $G=1, A=\bar{A},B=\bar{B}$, 672 \textbf{goto} \texttt{put content}} 673 \If{$\op{deg}_x(m)>0$} 674 {\lIf{$\op{lm}_{x_1, \dots,x_n}(G^*)<\op{lm}_{x_1, \dots,x_n}(G)$}{set $m=1$}\lIf{$\op{lm}_{x_1, \dots,x_n}(G^*)>\op{lm}_{x_1, \dots,x_n}(G)$}{\textbf{goto} \texttt{pick a prime}} 675 } 676 set $\bar{A}=\op{crt}(\bar{A} \op{mod} m,\ \bar{A}^* \op{mod} \ (x-\alpha))$ 677 and $\bar{B}=\op{crt}(\bar{B} \op{mod} m,\ \bar{B}^* \op{mod} \ (x-\alpha))$\; \label{algo_brownp_abcrt} 678 \If{$\bar{A}$ did not change and, with $T=\bar{A}/\op{cont}_{x_1, \dots, x_n}(\bar{A})$, $T\mid A$ and $A/T\mid B$ \label{algo_brownp_astab}} 679 { set $G=A/T$, $\bar{A}=T$ and $\bar{B}={B}/G$, \textbf{goto} \texttt{fix lcs} 680 } 681 set $G=\op{crt}(G \op{mod} m,\ \gamma^*\cdot G^* \op{mod} \ (x-\alpha))$ 682 and $m= m \cdot (x-\alpha)$\; \label{algo_brownp_gcrt} 683 \If{$G$ did not change and, with $T=G/\op{cont}_{x_1,\dots,x_n}(G)$, $T \mid A$ and $T\mid B$ \label{algo_brownp_gstab}}{set $G=T$, $\bar{A}=\bar{A}/G$ and $\bar{B}=B/G$, \textbf{goto} \texttt{fix lcs}} 684 \lIf{$\op{deg}_x(m)< bound$}{\textbf{goto} \texttt{pick a prime}} 685 \lIf{$\op{deg}_x\gamma+\op{deg}_xA=\op{deg}_xG+\op{deg}_x\bar{A}$ and $\op{deg}_x\gamma+\op{deg}_xB=\op{deg}_xG+\op{deg}_x\bar{B}$}{\textbf{goto} \texttt{success}} 686 set $m=1$, \textbf{goto} \texttt{pick a prime} 687 688 \texttt{success:}\; 689 set $G=G/\op{cont}_{x_1,\dots, x_n}(G)$, $A=A/\op{lc}_{x_1,\dots, x_n}(G)$ and $B=B/\op{lc}_{x_1,\dots, x_n}(G)$\; 690 \texttt{put content:}\; 691 set $G=G \cdot cG$, $\bar{A}=\bar{A}\cdot c\bar{A}$ and $\bar{B}=\bar{B}\cdot c\bar{B}$\; 692 \Return{$(G, \bar{A}, \bar{B})$}\; 693 \texttt{fix lcs:}\; 694 with $\delta = \op{lc}_{x,x_1,\dots, x_n}(G)$, set $G=\delta^{-1} G$, $A=\delta A$ and $B=\delta B$, \textbf{goto} \texttt{put content} 695 \caption{$\textbf{brownp}$ dense gcd over finite field} 696 \label{algo_brownp} 697\end{algorithm} 698 699On lines \ref{algo_brownp_abcrt} and \ref{algo_brownp_gcrt}, the inputs $G, \bar{A}, \bar{B}$ are undefined only when $m=1$, in which case the $\op{crt}$ ignores them anyways. There should also be a check analogous to line \ref{algo_brownp_astab} for the stabilization of $\bar{B}$. This was omitted simply due to space constraints. Finally, the stability checks in lines \ref{algo_brownp_astab} and \ref{algo_brownp_gstab} (and the missing one for $\bar{B}$) are completely optional and may be executed or skipped on every iteration at the user's discretion. 700 701Similarly to the previous algorithm, divisibility checks could be performed over the integers as well. 702 703\begin{algorithm}[H] 704 \DontPrintSemicolon 705 \KwIn{ $n \ge 1$ 706 \begin{enumerate} 707 \item $A,B \in \mathbb{Z}[x_1, \dots, x_n]$ neither is zero 708 \end{enumerate} 709 } 710 \KwOut{ 711 \begin{enumerate} 712 \item unit normal $G= \op{gcd}(A,B)$, $\bar{A}=A/G$, $\bar{B}=B/G$ 713 \end{enumerate} 714 } 715 set $cA= \op{cont}_{x_1,\dots, x_n}(A)$ and $cB=\op{cont}_{x_1, \dots, x_n}(B) \in \mathbb{Z}$\; 716 set $A= A/cA$ and $B=B/cB$ \tcp*{content $cA, cB, \dots$ is always positive} 717 set $cG= \op{gcd}(cA, cB)$, $c\bar{A}=cA/cG$ and $c\bar{B}=cB/cG$\; 718 set $\gamma=\op{gcd}(\op{lc}_{x_1,\dots, x_n}(A),\op{lc}_{x_1,\dots, x_n}(B))\in \mathbb{Z}[x]$\; 719 set $bound= 2 \cdot \gamma \cdot \max(|A|_{\infty}, |B|_{\infty})$, 720 and set $m=1 \in \mathbb{Z}$\; 721 \texttt{pick a prime}: \tcp*{primes are numbers} 722 choose a new prime $p \in \mathbb{Z}$ else \textbf{return} FAIL\; 723 set $\gamma^*=\gamma\op{mod} p$\; 724 set $A^*=A \op{mod} p$ and $B^*=B\op{mod} p\in \mathbb{F}_p[x_n][x_1,\dots,x_{n-1}]$\; 725 \lIf{$\gamma^*=0$}{\textbf{goto} \texttt{pick a prime}} 726 set $(G^*,\bar{A}^*, \bar{B}^*)= \textbf{brownp}(A^*,B^*)$ or \textbf{goto} \texttt{pick a prime} if the call failed\; 727 \lIf{$G^*=1$} 728 {set $G=1, A=\bar{A},B=\bar{B}$, 729 \textbf{goto} \texttt{put content}} 730 \If{$m>1$} 731 {\lIf{$\op{lm}_{x_1, \dots,x_n}(G^*)<\op{lm}_{x_1, \dots,x_n}(G)$}{set $m=1$}\lIf{$\op{lm}_{x_1, \dots,x_n}(G^*)>\op{lm}_{x_1, \dots,x_n}(G)$}{\textbf{goto} \texttt{pick a prime}} 732 } 733 set $\bar{A}=\op{crt}(\bar{A} \op{mod} m,\ \bar{A}^* \op{mod} p)$ 734 and $\bar{B}=\op{crt}(\bar{B} \op{mod} m,\ \bar{B}^* \op{mod} p)$\; 735 set $G=\op{crt}(G \op{mod} m,\ \gamma^*\cdot G^* \op{mod} p)$ 736 and $m= m \cdot p$\; 737 \lIf{$m< bound$}{\textbf{goto} \texttt{pick a prime}} 738 set $hA = \min(|G|_1 \cdot |\bar{A}|_\infty, |G|_\infty \cdot |\bar{A}|_1)$ \tcp*{upper bound on $|G\cdot\bar{A}|_\infty$} 739 set $hB = \min(|G|_1 \cdot |\bar{B}|_\infty, |G|_\infty \cdot |\bar{B}|_1)$ \tcp*{upper bound on $|G\cdot\bar{B}|_\infty$} 740 \lIf{$hA < m$ and $hB < m$}{\textbf{goto} \texttt{success}} 741 \textbf{goto }\texttt{pick a prime} 742 743 \texttt{success:}\; 744 set $G=G/\op{cont}_{x_1,\dots, x_n}(G)$, $A=A/\op{lc}_{x_1,\dots, x_n}(G)$ and $B=B/\op{lc}_{x_1,\dots, x_n}(G)$\; 745 \texttt{put content:}\; 746 set $G=G \cdot cG$, $\bar{A}=\bar{A}\cdot c\bar{A}$ and $\bar{B}=\bar{B}\cdot c\bar{B}$\; 747 \Return{$(G, \bar{A}, \bar{B})$}\; 748 \caption{$\textbf{brownm}$ dense gcd over integers} 749 \label{algo_brownm} 750\end{algorithm} 751 752\subsection{factoring} 753 754The lifting algorithms with be stated with $3$ factors. 755 756 757\begin{algorithm}[H] 758\DontPrintSemicolon 759\KwIn{ $m \ge 2$ 760 \begin{enumerate} 761 \item $(\alpha_1, \dots, \alpha_m) \in R^m$ 762 \item $A \in R[x_1, \dots, x_m][X]$ with $A(X, \alpha_1, \dots, \alpha_m)$ squarefree 763 \item $(B_1, B_2, B_3) \in R[x_1, \dots, x_m][X]$ (however, all but the leading coefficients of each $B_i$ are in $R[x_1, \dots, x_{m-1}]$) such that $A(X, x_1, \dots, x_{m-1}, \alpha_m) = (B_1 B_2 B_3)(X, x_1, \dots, x_{m-1}, \alpha_m)$ 764 \end{enumerate} 765} 766\KwOut{ 767 \begin{enumerate} 768 \item $(B_1, B_2, B_3) \in R[x_1, \dots, x_m][X]$ such that $A(X, x_1, \dots, x_m) = (B_1 B_2 B_3)(X, x_1, \dots, x_m)$ or FAIL 769 \end{enumerate} 770} 771 772set $e = A - B_1 B_2 B_3$ \tcp*{current error} 773set $\beta_i = B_i(X, x_1, \dots, x_{m-1}, \alpha_m) \in R[x_1, \dots, x_{m-1}][X]$\; 774\For{$j=1$ \KwTo $\op{deg}_{x_m}(A)$} 775{ 776 assert that $e$ is divisible by $(x_m - \alpha_m)^j$\; 777 set $t =$ taylor coefficient of $(x_m - \alpha_m)^j$ in $e$ \tcp*{ $t \in R[x_1, \dots, x_{m-1}][X]$} 778 $(\delta_1, \delta_2, \delta_3) = \textbf{pfrac}(t, (\beta_1, \beta_2, \beta_3),(\alpha_1, \dots,\alpha_{m-1}), (\op{deg}_{x_1}A, \dots, \op{deg}_{x_{m-1}}A))$ \; 779 \tcp*{solve $t = \delta_1 \beta_2 \beta_3 + \delta_2 \beta_1 \beta_3 + \delta_3 \beta_1 \beta_2$} 780 \lIf{the solved failed}{\Return{FAIL}} 781 set $B_i = B_i + \delta_i (x_m - \alpha_m)^j$ for each $i$\; 782 set $e = A - B_1 B_2 B_3$ 783} 784\leIf{$e=0$}{ 785 \Return{$(B_1, B_2, B_3)$} 786} 787{ 788 \Return{FAIL} 789} 790\caption{$\textbf{hlift}$ (Multivariate Hensel Lifting - Quintic version)} 791\label{algo_mlift} 792\end{algorithm} 793 794Since the solutions $\delta_i$ must satisfy $\op{deg}_{X} \delta_i < \op{deg}_{X} B_i$, the leading coefficients of the $B_i$ will not be changed by Algorithm \ref{algo_mlift}. 795 796\begin{algorithm}[H] 797\DontPrintSemicolon 798\KwIn{ $m \ge 2$ 799 \begin{enumerate} 800 \item $(\alpha_1, \dots, \alpha_m) \in R^m$ 801 \item $F \in R[x_1, \dots, x_m][X]$ with $F(X, \alpha_1, \dots, \alpha_m)$ squarefree 802 \item $(A, B, C) \in R[x_1, \dots, x_m][X]$ (however, all but the leading coefficients of each $A,B,C$ are in $R[x_1, \dots, x_{m-1}]$) such that $F(X, x_1, \dots, x_{m-1}, \alpha_m) = (A B C)(X, x_1, \dots, x_{m-1}, \alpha_m)$ 803 \end{enumerate} 804} 805\KwOut{ 806 \begin{enumerate} 807 \item $(A, B, C) \in R[x_1, \dots, x_m][X]$ such that $A(X, x_1, \dots, x_m) = (A B C)(X, x_1, \dots, x_m)$ or FAIL 808 \end{enumerate} 809} 810 811set $a_0 = [(x_m - \alpha_m)^0] A$ and set $dA = 0$\; 812set $b_0 = [(x_m - \alpha_m)^0] B$ and set $dB = 0$\; 813set $c_0 = [(x_m - \alpha_m)^0] C$ and set $dC = 0$\; 814\For{$d=1$ \KwTo $\op{deg}_{x_m}(A)$} 815{ 816 set $t = [(x_m - \alpha_m)^d]F - \sum_{\substack{i+j+k=d \\ i \le dA, \ j \le dB, \ k \le dC}} a_i b_j c_k$\; 817 use \textbf{pfrac} to find $a_d, b_d, c_d$ from $t=a_d b_0 c_0+a_0 b_d c_0+a_0 b_0 c_d$\; 818 \lIf{the solved failed}{\Return{FAIL}} 819 set $a_d = a_d + [(x_m - \alpha_m)^d]A$\; 820 set $b_d = b_d + [(x_m - \alpha_m)^d]B$\; 821 set $c_d = c_d + [(x_m - \alpha_m)^d]C$\; 822 \lIf{ $a_d \neq 0$}{set $dA = d$} 823 \lIf{ $b_d \neq 0$}{set $dB = d$} 824 \lIf{ $c_d \neq 0$}{set $dC = d$} 825 \lIf{$dA + dB + dC > \op{deg}_{x_m}(A)$}{\Return{FAIL}} 826} 827assert that $dA + dB + dC = \op{deg}_{x_m}(A)$\; 828set $A = \sum_{i=0}^{dA} a_i (x_m - \alpha_m)^i$\; 829set $B = \sum_{i=0}^{dB} b_i (x_m - \alpha_m)^i$\; 830set $C = \sum_{i=0}^{dC} c_i (x_m - \alpha_m)^i$\; 831\Return{(A,B,C)} 832\caption{$\textbf{hlift}$ (Multivariate Hensel Lifting - Quartic version)} 833\label{algo_mlift2} 834\end{algorithm} 835 836Finally the main work horse. It is easy to solve $t=\delta_1\beta_2\beta_3+\delta_2\beta_1\beta_3+\delta_3\beta_1\beta_2$ in $\op{frac}(R)(x_1,\dots,x_{m-1})[X]$ with pseudo remainder sequences, since 837$\delta_i= t(\beta_j\beta_k)^{-1}\pmod {\beta_i}$ and check if the $\delta_i$'s are defined in $R[x_1,\dots, x_{m-1}][X]$. However, as intermediate expression swell is a problem in this approach. We will use a different algorithm described as below. 838 839\begin{algorithm}[H] 840\DontPrintSemicolon 841\KwIn{ $l\geq 0$ 842 \begin{enumerate} 843 \item $t\in R[x_1,\dots, x_l][X]$ 844 \item $(\beta_1,\beta_2,\beta_3)$, $\text{where } \beta_i\in R[x_1,\dots, x_{l}][X],$ $ \beta_i \text{ pairwise coprime in } \op{frac}{(R)}(x_1,\dots,x_{l})[X]$ 845 \item $(\alpha_1,\dots,\alpha_l) \in R^l$ 846 \item $(d_1,\dots, d_l)\in \mathbb{N}^l$ degree bounds 847 \end{enumerate} 848} 849 850\KwOut{ 851 \begin{enumerate} 852 \item $(\delta_1,\delta_2,\delta_3), \delta_i\in R[x_1,\dots, x_r][X]\text{ such that }t=\delta_1\beta_2\beta_3+\delta_2\beta_1\beta_3+\delta_3\beta_1\beta_2$ and $\op{deg}_{X}\delta_i<\op{deg}_{X}\beta_i$ 853 or \it{FAIL} 854 \end{enumerate} 855} 856\eIf{$r=0$}{ 857 set $\delta_i=t(\beta_j\beta_k)^{-1}\pmod {\beta_i} $ in $\op{frac}(R)[X]$\\ 858 \leIf{ each $\delta_i\in R[X]$}{ 859 \Return{$(\delta_1,\delta_2,\delta_3)$} 860 } 861 { 862 \Return{FAIL} 863 } 864 } 865{ 866 set $\tilde{\beta}_i(X)=\beta_i(X,x_1,\dots,x_{r-1},\alpha_l)\in R[x_1,\dots,x_{l-1}][X]$\; 867 set $\delta_i=0$ for each $i$\; 868 set $e=t$\; 869 \For{$j=0$ \KwTo $d_r$} 870 { 871 assert that $e$ is divisible by $(x_r - \alpha_r)^j$\; 872 set $\tilde{t}=$ taylor coefficient of $(x_r-\alpha_r)^j$ in $e$\; 873 set $(\tilde{\delta_1},\tilde{\delta}_2, \tilde{\delta}_3 )=\textbf{pfrac}(\tilde{t},(\alpha_1,\dots, \alpha_{l-1}),(\tilde{\beta}_1,\tilde{\beta}_2, \tilde{\beta}_3),(d_1,\dots, d_{l-1}))$\; 874 \lIf{the solved failed}{\Return{FAIL}} 875 set $\delta_i=\delta_i+\tilde{\delta}_i(x_r-\alpha_r)^j$\; 876 set $e=t-(\delta_1\beta_2\beta_3+\delta_2\beta_1\beta_3+\delta_3\beta_1\beta_2)$ 877 } 878 \leIf{$e=0$}{ 879 \Return{$(\delta_1, \delta_2, \delta_3)$} 880 } 881 { 882 \Return{FAIL} 883 } 884} 885 886\caption{\textbf{pfrac} (Multivariate partial fraction solver)} 887\label{algo_pfrac} 888\end{algorithm} 889 890\end{document} 891