1*b30d1939SAndy Fiddaman /***********************************************************************
2*b30d1939SAndy Fiddaman *                                                                      *
3*b30d1939SAndy Fiddaman *               This software is part of the ast package               *
4*b30d1939SAndy Fiddaman *          Copyright (c) 1992-2012 AT&T Intellectual Property          *
5*b30d1939SAndy Fiddaman *                      and is licensed under the                       *
6*b30d1939SAndy Fiddaman *                 Eclipse Public License, Version 1.0                  *
7*b30d1939SAndy Fiddaman *                    by AT&T Intellectual Property                     *
8*b30d1939SAndy Fiddaman *                                                                      *
9*b30d1939SAndy Fiddaman *                A copy of the License is available at                 *
10*b30d1939SAndy Fiddaman *          http://www.eclipse.org/org/documents/epl-v10.html           *
11*b30d1939SAndy Fiddaman *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12*b30d1939SAndy Fiddaman *                                                                      *
13*b30d1939SAndy Fiddaman *              Information and Software Systems Research               *
14*b30d1939SAndy Fiddaman *                            AT&T Research                             *
15*b30d1939SAndy Fiddaman *                           Florham Park NJ                            *
16*b30d1939SAndy Fiddaman *                                                                      *
17*b30d1939SAndy Fiddaman *                 Glenn Fowler <gsf@research.att.com>                  *
18*b30d1939SAndy Fiddaman *                  David Korn <dgk@research.att.com>                   *
19*b30d1939SAndy Fiddaman *                                                                      *
20*b30d1939SAndy Fiddaman ***********************************************************************/
21*b30d1939SAndy Fiddaman #pragma prototyped
22*b30d1939SAndy Fiddaman /*
23*b30d1939SAndy Fiddaman  * David Korn
24*b30d1939SAndy Fiddaman  * Glenn Fowler
25*b30d1939SAndy Fiddaman  * AT&T Research
26*b30d1939SAndy Fiddaman  *
27*b30d1939SAndy Fiddaman  * join
28*b30d1939SAndy Fiddaman  */
29*b30d1939SAndy Fiddaman 
30*b30d1939SAndy Fiddaman static const char usage[] =
31*b30d1939SAndy Fiddaman "[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
32*b30d1939SAndy Fiddaman USAGE_LICENSE
33*b30d1939SAndy Fiddaman "[+NAME?join - relational database operator]"
34*b30d1939SAndy Fiddaman "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35*b30d1939SAndy Fiddaman 	"and \afile2\a and writes the resulting joined files to standard "
36*b30d1939SAndy Fiddaman 	"output.  By default, a field is delimited by one or more spaces "
37*b30d1939SAndy Fiddaman 	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
38*b30d1939SAndy Fiddaman 	"can be used to change the field delimiter.]"
39*b30d1939SAndy Fiddaman "[+?The \ajoin field\a is a field in each file on which files are compared. "
40*b30d1939SAndy Fiddaman 	"By default \bjoin\b writes one line in the output for each pair "
41*b30d1939SAndy Fiddaman 	"of lines in \afiles1\a and \afiles2\a that have identical join "
42*b30d1939SAndy Fiddaman 	"fields.  The default output line consists of the join field, "
43*b30d1939SAndy Fiddaman 	"then the remaining fields from \afile1\a, then the remaining "
44*b30d1939SAndy Fiddaman 	"fields from \afile2\a, but this can be changed with the \b-o\b "
45*b30d1939SAndy Fiddaman 	"option.  The \b-a\b option can be used to add unmatched lines "
46*b30d1939SAndy Fiddaman 	"to the output.  The \b-v\b option can be used to output only "
47*b30d1939SAndy Fiddaman 	"unmatched lines.]"
48*b30d1939SAndy Fiddaman "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49*b30d1939SAndy Fiddaman 	"sequence of \bsort -b\b on the fields on which they are to be "
50*b30d1939SAndy Fiddaman 	"joined otherwise the results are unspecified.]"
51*b30d1939SAndy Fiddaman "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52*b30d1939SAndy Fiddaman         "uses standard input starting at the current location.]"
53*b30d1939SAndy Fiddaman 
54*b30d1939SAndy Fiddaman "[e:empty]:[string?Replace empty output fields in the list selected with"
55*b30d1939SAndy Fiddaman "	\b-o\b with \astring\a.]"
56*b30d1939SAndy Fiddaman "[o:output]:[list?Construct the output line to comprise the fields specified "
57*b30d1939SAndy Fiddaman 	"in a blank or comma separated list \alist\a.  Each element in "
58*b30d1939SAndy Fiddaman 	"\alist\a consists of a file number (either 1 or 2), a period, "
59*b30d1939SAndy Fiddaman 	"and a field number or \b0\b representing the join field.  "
60*b30d1939SAndy Fiddaman 	"As an obsolete feature multiple occurrences of \b-o\b can "
61*b30d1939SAndy Fiddaman 	"be specified.]"
62*b30d1939SAndy Fiddaman "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63*b30d1939SAndy Fiddaman "	and output.]"
64*b30d1939SAndy Fiddaman "[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
65*b30d1939SAndy Fiddaman "[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
66*b30d1939SAndy Fiddaman "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67*b30d1939SAndy Fiddaman "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68*b30d1939SAndy Fiddaman "	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69*b30d1939SAndy Fiddaman "	normal output.  If \b-a\b options appear for both 1 and 2, then "
70*b30d1939SAndy Fiddaman 	"all unpairable lines will be output.]"
71*b30d1939SAndy Fiddaman "[v:suppress]#[fileno?Write a line for each unpairable line in file"
72*b30d1939SAndy Fiddaman "	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73*b30d1939SAndy Fiddaman 	"output.  If \b-v\b options appear for both 1 and 2, then "
74*b30d1939SAndy Fiddaman 	"all unpairable lines will be output.] ]"
75*b30d1939SAndy Fiddaman "[i:ignorecase?Ignore case in field comparisons.]"
76*b30d1939SAndy Fiddaman "[B!:mmap?Enable memory mapped reads instead of buffered.]"
77*b30d1939SAndy Fiddaman 
78*b30d1939SAndy Fiddaman "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79*b30d1939SAndy Fiddaman "	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80*b30d1939SAndy Fiddaman "	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81*b30d1939SAndy Fiddaman "	equivalent to \b-2\b \afield\a.]"
82*b30d1939SAndy Fiddaman 
83*b30d1939SAndy Fiddaman "\n"
84*b30d1939SAndy Fiddaman "\nfile1 file2\n"
85*b30d1939SAndy Fiddaman "\n"
86*b30d1939SAndy Fiddaman "[+EXIT STATUS?]{"
87*b30d1939SAndy Fiddaman 	"[+0?Both files processed successfully.]"
88*b30d1939SAndy Fiddaman 	"[+>0?An error occurred.]"
89*b30d1939SAndy Fiddaman "}"
90*b30d1939SAndy Fiddaman "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91*b30d1939SAndy Fiddaman ;
92*b30d1939SAndy Fiddaman 
93*b30d1939SAndy Fiddaman #include <cmd.h>
94*b30d1939SAndy Fiddaman #include <sfdisc.h>
95*b30d1939SAndy Fiddaman 
96*b30d1939SAndy Fiddaman #if _hdr_wchar && _hdr_wctype && _lib_iswctype
97*b30d1939SAndy Fiddaman 
98*b30d1939SAndy Fiddaman #include <wchar.h>
99*b30d1939SAndy Fiddaman #include <wctype.h>
100*b30d1939SAndy Fiddaman 
101*b30d1939SAndy Fiddaman #else
102*b30d1939SAndy Fiddaman 
103*b30d1939SAndy Fiddaman #include <ctype.h>
104*b30d1939SAndy Fiddaman 
105*b30d1939SAndy Fiddaman #ifndef iswspace
106*b30d1939SAndy Fiddaman #define iswspace(x)	isspace(x)
107*b30d1939SAndy Fiddaman #endif
108*b30d1939SAndy Fiddaman 
109*b30d1939SAndy Fiddaman #endif
110*b30d1939SAndy Fiddaman 
111*b30d1939SAndy Fiddaman #define C_FILE1		001
112*b30d1939SAndy Fiddaman #define C_FILE2		002
113*b30d1939SAndy Fiddaman #define C_COMMON	004
114*b30d1939SAndy Fiddaman #define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
115*b30d1939SAndy Fiddaman 
116*b30d1939SAndy Fiddaman #define NFIELD		10
117*b30d1939SAndy Fiddaman #define JOINFIELD	2
118*b30d1939SAndy Fiddaman 
119*b30d1939SAndy Fiddaman #define S_DELIM		1
120*b30d1939SAndy Fiddaman #define S_SPACE		2
121*b30d1939SAndy Fiddaman #define S_NL		3
122*b30d1939SAndy Fiddaman #define S_WIDE		4
123*b30d1939SAndy Fiddaman 
124*b30d1939SAndy Fiddaman typedef struct Field_s
125*b30d1939SAndy Fiddaman {
126*b30d1939SAndy Fiddaman 	char*		beg;
127*b30d1939SAndy Fiddaman 	char*		end;
128*b30d1939SAndy Fiddaman } Field_t;
129*b30d1939SAndy Fiddaman 
130*b30d1939SAndy Fiddaman typedef struct File_s
131*b30d1939SAndy Fiddaman {
132*b30d1939SAndy Fiddaman 	Sfio_t*		iop;
133*b30d1939SAndy Fiddaman 	char*		name;
134*b30d1939SAndy Fiddaman 	char*		recptr;
135*b30d1939SAndy Fiddaman 	int		reclen;
136*b30d1939SAndy Fiddaman 	int		field;
137*b30d1939SAndy Fiddaman 	int		fieldlen;
138*b30d1939SAndy Fiddaman 	int		nfields;
139*b30d1939SAndy Fiddaman 	int		maxfields;
140*b30d1939SAndy Fiddaman 	int		spaces;
141*b30d1939SAndy Fiddaman 	int		hit;
142*b30d1939SAndy Fiddaman 	int		discard;
143*b30d1939SAndy Fiddaman 	Field_t*	fields;
144*b30d1939SAndy Fiddaman } File_t;
145*b30d1939SAndy Fiddaman 
146*b30d1939SAndy Fiddaman typedef struct Join_s
147*b30d1939SAndy Fiddaman {
148*b30d1939SAndy Fiddaman 	unsigned char	state[1<<CHAR_BIT];
149*b30d1939SAndy Fiddaman 	Sfio_t*		outfile;
150*b30d1939SAndy Fiddaman 	int*		outlist;
151*b30d1939SAndy Fiddaman 	int		outmode;
152*b30d1939SAndy Fiddaman 	int		ooutmode;
153*b30d1939SAndy Fiddaman 	char*		nullfield;
154*b30d1939SAndy Fiddaman 	char*		delimstr;
155*b30d1939SAndy Fiddaman 	int		delim;
156*b30d1939SAndy Fiddaman 	int		delimlen;
157*b30d1939SAndy Fiddaman 	int		buffered;
158*b30d1939SAndy Fiddaman 	int		ignorecase;
159*b30d1939SAndy Fiddaman 	int		mb;
160*b30d1939SAndy Fiddaman 	char*		same;
161*b30d1939SAndy Fiddaman 	int		samesize;
162*b30d1939SAndy Fiddaman 	Shbltin_t*	context;
163*b30d1939SAndy Fiddaman 	File_t		file[2];
164*b30d1939SAndy Fiddaman } Join_t;
165*b30d1939SAndy Fiddaman 
166*b30d1939SAndy Fiddaman static void
done(register Join_t * jp)167*b30d1939SAndy Fiddaman done(register Join_t* jp)
168*b30d1939SAndy Fiddaman {
169*b30d1939SAndy Fiddaman 	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
170*b30d1939SAndy Fiddaman 		sfclose(jp->file[0].iop);
171*b30d1939SAndy Fiddaman 	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
172*b30d1939SAndy Fiddaman 		sfclose(jp->file[1].iop);
173*b30d1939SAndy Fiddaman 	if (jp->outlist)
174*b30d1939SAndy Fiddaman 		free(jp->outlist);
175*b30d1939SAndy Fiddaman 	if (jp->file[0].fields)
176*b30d1939SAndy Fiddaman 		free(jp->file[0].fields);
177*b30d1939SAndy Fiddaman 	if (jp->file[1].fields)
178*b30d1939SAndy Fiddaman 		free(jp->file[1].fields);
179*b30d1939SAndy Fiddaman 	if (jp->same)
180*b30d1939SAndy Fiddaman 		free(jp->same);
181*b30d1939SAndy Fiddaman 	free(jp);
182*b30d1939SAndy Fiddaman }
183*b30d1939SAndy Fiddaman 
184*b30d1939SAndy Fiddaman static Join_t*
init(void)185*b30d1939SAndy Fiddaman init(void)
186*b30d1939SAndy Fiddaman {
187*b30d1939SAndy Fiddaman 	register Join_t*	jp;
188*b30d1939SAndy Fiddaman 	register int		i;
189*b30d1939SAndy Fiddaman 
190*b30d1939SAndy Fiddaman 	setlocale(LC_ALL, "");
191*b30d1939SAndy Fiddaman 	if (jp = newof(0, Join_t, 1, 0))
192*b30d1939SAndy Fiddaman 	{
193*b30d1939SAndy Fiddaman 		if (jp->mb = mbwide())
194*b30d1939SAndy Fiddaman 			for (i = 0x80; i <= 0xff; i++)
195*b30d1939SAndy Fiddaman 				jp->state[i] = S_WIDE;
196*b30d1939SAndy Fiddaman 		jp->state[' '] = jp->state['\t'] = S_SPACE;
197*b30d1939SAndy Fiddaman 		jp->state['\n'] = S_NL;
198*b30d1939SAndy Fiddaman 		jp->delim = -1;
199*b30d1939SAndy Fiddaman 		jp->nullfield = 0;
200*b30d1939SAndy Fiddaman 		if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) ||
201*b30d1939SAndy Fiddaman 		    !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0)))
202*b30d1939SAndy Fiddaman 		{
203*b30d1939SAndy Fiddaman 			done(jp);
204*b30d1939SAndy Fiddaman 			return 0;
205*b30d1939SAndy Fiddaman 		}
206*b30d1939SAndy Fiddaman 		jp->file[0].maxfields = NFIELD;
207*b30d1939SAndy Fiddaman 		jp->file[1].maxfields = NFIELD;
208*b30d1939SAndy Fiddaman 		jp->outmode = C_COMMON;
209*b30d1939SAndy Fiddaman 	}
210*b30d1939SAndy Fiddaman 	return jp;
211*b30d1939SAndy Fiddaman }
212*b30d1939SAndy Fiddaman 
213*b30d1939SAndy Fiddaman static int
getolist(Join_t * jp,const char * first,char ** arglist)214*b30d1939SAndy Fiddaman getolist(Join_t* jp, const char* first, char** arglist)
215*b30d1939SAndy Fiddaman {
216*b30d1939SAndy Fiddaman 	register const char*	cp = first;
217*b30d1939SAndy Fiddaman 	char**			argv = arglist;
218*b30d1939SAndy Fiddaman 	register int		c;
219*b30d1939SAndy Fiddaman 	int*			outptr;
220*b30d1939SAndy Fiddaman 	int*			outmax;
221*b30d1939SAndy Fiddaman 	int			nfield = NFIELD;
222*b30d1939SAndy Fiddaman 	char*			str;
223*b30d1939SAndy Fiddaman 
224*b30d1939SAndy Fiddaman 	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
225*b30d1939SAndy Fiddaman 	outmax = outptr + NFIELD;
226*b30d1939SAndy Fiddaman 	while (c = *cp++)
227*b30d1939SAndy Fiddaman 	{
228*b30d1939SAndy Fiddaman 		if (c==' ' || c=='\t' || c==',')
229*b30d1939SAndy Fiddaman 			continue;
230*b30d1939SAndy Fiddaman 		str = (char*)--cp;
231*b30d1939SAndy Fiddaman 		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
232*b30d1939SAndy Fiddaman 		{
233*b30d1939SAndy Fiddaman 			str++;
234*b30d1939SAndy Fiddaman 			c = JOINFIELD;
235*b30d1939SAndy Fiddaman 			goto skip;
236*b30d1939SAndy Fiddaman 		}
237*b30d1939SAndy Fiddaman 		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
238*b30d1939SAndy Fiddaman 		{
239*b30d1939SAndy Fiddaman 			error(2,"%s: invalid field list",first);
240*b30d1939SAndy Fiddaman 			break;
241*b30d1939SAndy Fiddaman 		}
242*b30d1939SAndy Fiddaman 		c--;
243*b30d1939SAndy Fiddaman 		c <<=2;
244*b30d1939SAndy Fiddaman 		if (*cp=='2')
245*b30d1939SAndy Fiddaman 			c |=1;
246*b30d1939SAndy Fiddaman 	skip:
247*b30d1939SAndy Fiddaman 		if (outptr >= outmax)
248*b30d1939SAndy Fiddaman 		{
249*b30d1939SAndy Fiddaman 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
250*b30d1939SAndy Fiddaman 			outptr = jp->outlist + nfield;
251*b30d1939SAndy Fiddaman 			nfield *= 2;
252*b30d1939SAndy Fiddaman 			outmax = jp->outlist + nfield;
253*b30d1939SAndy Fiddaman 		}
254*b30d1939SAndy Fiddaman 		*outptr++ = c;
255*b30d1939SAndy Fiddaman 		cp = str;
256*b30d1939SAndy Fiddaman 	}
257*b30d1939SAndy Fiddaman 	/* need to accept obsolescent command syntax */
258*b30d1939SAndy Fiddaman 	while (cp = *argv)
259*b30d1939SAndy Fiddaman 	{
260*b30d1939SAndy Fiddaman 		if (cp[1]!='.' || (*cp!='1' && *cp!='2'))
261*b30d1939SAndy Fiddaman 		{
262*b30d1939SAndy Fiddaman 			if (*cp=='0' && cp[1]==0)
263*b30d1939SAndy Fiddaman 			{
264*b30d1939SAndy Fiddaman 				c = JOINFIELD;
265*b30d1939SAndy Fiddaman 				goto skip2;
266*b30d1939SAndy Fiddaman 			}
267*b30d1939SAndy Fiddaman 			break;
268*b30d1939SAndy Fiddaman 		}
269*b30d1939SAndy Fiddaman 		str = (char*)cp;
270*b30d1939SAndy Fiddaman 		c = strtol(cp+2, &str,10);
271*b30d1939SAndy Fiddaman 		if (*str || --c<0)
272*b30d1939SAndy Fiddaman 			break;
273*b30d1939SAndy Fiddaman 		argv++;
274*b30d1939SAndy Fiddaman 		c <<= 2;
275*b30d1939SAndy Fiddaman 		if (*cp=='2')
276*b30d1939SAndy Fiddaman 			c |=1;
277*b30d1939SAndy Fiddaman 	skip2:
278*b30d1939SAndy Fiddaman 		if (outptr >= outmax)
279*b30d1939SAndy Fiddaman 		{
280*b30d1939SAndy Fiddaman 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
281*b30d1939SAndy Fiddaman 			outptr = jp->outlist + nfield;
282*b30d1939SAndy Fiddaman 			nfield *= 2;
283*b30d1939SAndy Fiddaman 			outmax = jp->outlist + nfield;
284*b30d1939SAndy Fiddaman 		}
285*b30d1939SAndy Fiddaman 		*outptr++ = c;
286*b30d1939SAndy Fiddaman 	}
287*b30d1939SAndy Fiddaman 	*outptr = -1;
288*b30d1939SAndy Fiddaman 	return argv-arglist;
289*b30d1939SAndy Fiddaman }
290*b30d1939SAndy Fiddaman 
291*b30d1939SAndy Fiddaman /*
292*b30d1939SAndy Fiddaman  * read in a record from file <index> and split into fields
293*b30d1939SAndy Fiddaman  */
294*b30d1939SAndy Fiddaman static unsigned char*
getrec(Join_t * jp,int index,int discard)295*b30d1939SAndy Fiddaman getrec(Join_t* jp, int index, int discard)
296*b30d1939SAndy Fiddaman {
297*b30d1939SAndy Fiddaman 	register unsigned char*	sp = jp->state;
298*b30d1939SAndy Fiddaman 	register File_t*	fp = &jp->file[index];
299*b30d1939SAndy Fiddaman 	register Field_t*	field = fp->fields;
300*b30d1939SAndy Fiddaman 	register Field_t*	fieldmax = field + fp->maxfields;
301*b30d1939SAndy Fiddaman 	register char*		cp;
302*b30d1939SAndy Fiddaman 	register int		n;
303*b30d1939SAndy Fiddaman 	char*			tp;
304*b30d1939SAndy Fiddaman 
305*b30d1939SAndy Fiddaman 	if (sh_checksig(jp->context))
306*b30d1939SAndy Fiddaman 		return 0;
307*b30d1939SAndy Fiddaman 	if (discard && fp->discard)
308*b30d1939SAndy Fiddaman 		sfraise(fp->iop, SFSK_DISCARD, NiL);
309*b30d1939SAndy Fiddaman 	fp->spaces = 0;
310*b30d1939SAndy Fiddaman 	fp->hit = 0;
311*b30d1939SAndy Fiddaman 	if (!(cp = sfgetr(fp->iop, '\n', 0)))
312*b30d1939SAndy Fiddaman 	{
313*b30d1939SAndy Fiddaman 		jp->outmode &= ~(1<<index);
314*b30d1939SAndy Fiddaman 		return 0;
315*b30d1939SAndy Fiddaman 	}
316*b30d1939SAndy Fiddaman 	fp->recptr = cp;
317*b30d1939SAndy Fiddaman 	fp->reclen = sfvalue(fp->iop);
318*b30d1939SAndy Fiddaman 	if (jp->delim == '\n')	/* handle new-line delimiter specially */
319*b30d1939SAndy Fiddaman 	{
320*b30d1939SAndy Fiddaman 		field->beg = cp;
321*b30d1939SAndy Fiddaman 		cp += fp->reclen;
322*b30d1939SAndy Fiddaman 		field->end = cp - 1;
323*b30d1939SAndy Fiddaman 		field++;
324*b30d1939SAndy Fiddaman 	}
325*b30d1939SAndy Fiddaman 	else
326*b30d1939SAndy Fiddaman 		do /* separate into fields */
327*b30d1939SAndy Fiddaman 		{
328*b30d1939SAndy Fiddaman 			if (field >= fieldmax)
329*b30d1939SAndy Fiddaman 			{
330*b30d1939SAndy Fiddaman 				n = 2 * fp->maxfields;
331*b30d1939SAndy Fiddaman 				fp->fields = newof(fp->fields, Field_t, n + 1, 0);
332*b30d1939SAndy Fiddaman 				field = fp->fields + fp->maxfields;
333*b30d1939SAndy Fiddaman 				fp->maxfields = n;
334*b30d1939SAndy Fiddaman 				fieldmax = fp->fields + n;
335*b30d1939SAndy Fiddaman 			}
336*b30d1939SAndy Fiddaman 			field->beg = cp;
337*b30d1939SAndy Fiddaman 			if (jp->delim == -1)
338*b30d1939SAndy Fiddaman 			{
339*b30d1939SAndy Fiddaman 				switch (sp[*(unsigned char*)cp])
340*b30d1939SAndy Fiddaman 				{
341*b30d1939SAndy Fiddaman 				case S_SPACE:
342*b30d1939SAndy Fiddaman 					cp++;
343*b30d1939SAndy Fiddaman 					break;
344*b30d1939SAndy Fiddaman 				case S_WIDE:
345*b30d1939SAndy Fiddaman 					tp = cp;
346*b30d1939SAndy Fiddaman 					if (iswspace(mbchar(tp)))
347*b30d1939SAndy Fiddaman 					{
348*b30d1939SAndy Fiddaman 						cp = tp;
349*b30d1939SAndy Fiddaman 						break;
350*b30d1939SAndy Fiddaman 					}
351*b30d1939SAndy Fiddaman 					/*FALLTHROUGH*/
352*b30d1939SAndy Fiddaman 				default:
353*b30d1939SAndy Fiddaman 					goto next;
354*b30d1939SAndy Fiddaman 				}
355*b30d1939SAndy Fiddaman 				fp->spaces = 1;
356*b30d1939SAndy Fiddaman 				if (jp->mb)
357*b30d1939SAndy Fiddaman 					for (;;)
358*b30d1939SAndy Fiddaman 					{
359*b30d1939SAndy Fiddaman 						switch (sp[*(unsigned char*)cp++])
360*b30d1939SAndy Fiddaman 						{
361*b30d1939SAndy Fiddaman 						case S_SPACE:
362*b30d1939SAndy Fiddaman 							continue;
363*b30d1939SAndy Fiddaman 						case S_WIDE:
364*b30d1939SAndy Fiddaman 							tp = cp - 1;
365*b30d1939SAndy Fiddaman 							if (iswspace(mbchar(tp)))
366*b30d1939SAndy Fiddaman 							{
367*b30d1939SAndy Fiddaman 								cp = tp;
368*b30d1939SAndy Fiddaman 								continue;
369*b30d1939SAndy Fiddaman 							}
370*b30d1939SAndy Fiddaman 							break;
371*b30d1939SAndy Fiddaman 						}
372*b30d1939SAndy Fiddaman 						break;
373*b30d1939SAndy Fiddaman 					}
374*b30d1939SAndy Fiddaman 				else
375*b30d1939SAndy Fiddaman 					while (sp[*(unsigned char*)cp++]==S_SPACE);
376*b30d1939SAndy Fiddaman 				cp--;
377*b30d1939SAndy Fiddaman 			}
378*b30d1939SAndy Fiddaman 		next:
379*b30d1939SAndy Fiddaman 			if (jp->mb)
380*b30d1939SAndy Fiddaman 			{
381*b30d1939SAndy Fiddaman 				for (;;)
382*b30d1939SAndy Fiddaman 				{
383*b30d1939SAndy Fiddaman 					tp = cp;
384*b30d1939SAndy Fiddaman 					switch (n = sp[*(unsigned char*)cp++])
385*b30d1939SAndy Fiddaman 					{
386*b30d1939SAndy Fiddaman 					case 0:
387*b30d1939SAndy Fiddaman 						continue;
388*b30d1939SAndy Fiddaman 					case S_WIDE:
389*b30d1939SAndy Fiddaman 						cp--;
390*b30d1939SAndy Fiddaman 						n = mbchar(cp);
391*b30d1939SAndy Fiddaman 						if (n == jp->delim)
392*b30d1939SAndy Fiddaman 						{
393*b30d1939SAndy Fiddaman 							n = S_DELIM;
394*b30d1939SAndy Fiddaman 							break;
395*b30d1939SAndy Fiddaman 						}
396*b30d1939SAndy Fiddaman 						if (jp->delim == -1 && iswspace(n))
397*b30d1939SAndy Fiddaman 						{
398*b30d1939SAndy Fiddaman 							n = S_SPACE;
399*b30d1939SAndy Fiddaman 							break;
400*b30d1939SAndy Fiddaman 						}
401*b30d1939SAndy Fiddaman 						continue;
402*b30d1939SAndy Fiddaman 					}
403*b30d1939SAndy Fiddaman 					break;
404*b30d1939SAndy Fiddaman 				}
405*b30d1939SAndy Fiddaman 				field->end = tp;
406*b30d1939SAndy Fiddaman 			}
407*b30d1939SAndy Fiddaman 			else
408*b30d1939SAndy Fiddaman 			{
409*b30d1939SAndy Fiddaman 				while (!(n = sp[*(unsigned char*)cp++]));
410*b30d1939SAndy Fiddaman 				field->end = cp - 1;
411*b30d1939SAndy Fiddaman 			}
412*b30d1939SAndy Fiddaman 			field++;
413*b30d1939SAndy Fiddaman 		} while (n != S_NL);
414*b30d1939SAndy Fiddaman 	fp->nfields = field - fp->fields;
415*b30d1939SAndy Fiddaman 	if ((n = fp->field) < fp->nfields)
416*b30d1939SAndy Fiddaman 	{
417*b30d1939SAndy Fiddaman 		cp = fp->fields[n].beg;
418*b30d1939SAndy Fiddaman 		/* eliminate leading spaces */
419*b30d1939SAndy Fiddaman 		if (fp->spaces)
420*b30d1939SAndy Fiddaman 		{
421*b30d1939SAndy Fiddaman 			if (jp->mb)
422*b30d1939SAndy Fiddaman 				for (;;)
423*b30d1939SAndy Fiddaman 				{
424*b30d1939SAndy Fiddaman 					switch (sp[*(unsigned char*)cp++])
425*b30d1939SAndy Fiddaman 					{
426*b30d1939SAndy Fiddaman 					case S_SPACE:
427*b30d1939SAndy Fiddaman 						continue;
428*b30d1939SAndy Fiddaman 					case S_WIDE:
429*b30d1939SAndy Fiddaman 						tp = cp - 1;
430*b30d1939SAndy Fiddaman 						if (iswspace(mbchar(tp)))
431*b30d1939SAndy Fiddaman 						{
432*b30d1939SAndy Fiddaman 							cp = tp;
433*b30d1939SAndy Fiddaman 							continue;
434*b30d1939SAndy Fiddaman 						}
435*b30d1939SAndy Fiddaman 						break;
436*b30d1939SAndy Fiddaman 					}
437*b30d1939SAndy Fiddaman 					break;
438*b30d1939SAndy Fiddaman 				}
439*b30d1939SAndy Fiddaman 			else
440*b30d1939SAndy Fiddaman 				while (sp[*(unsigned char*)cp++]==S_SPACE);
441*b30d1939SAndy Fiddaman 			cp--;
442*b30d1939SAndy Fiddaman 		}
443*b30d1939SAndy Fiddaman 		fp->fieldlen = fp->fields[n].end - cp;
444*b30d1939SAndy Fiddaman 		return (unsigned char*)cp;
445*b30d1939SAndy Fiddaman 	}
446*b30d1939SAndy Fiddaman 	fp->fieldlen = 0;
447*b30d1939SAndy Fiddaman 	return (unsigned char*)"";
448*b30d1939SAndy Fiddaman }
449*b30d1939SAndy Fiddaman 
450*b30d1939SAndy Fiddaman #if DEBUG_TRACE
451*b30d1939SAndy Fiddaman static unsigned char* u1;
452*b30d1939SAndy Fiddaman #define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
453*b30d1939SAndy Fiddaman #endif
454*b30d1939SAndy Fiddaman 
455*b30d1939SAndy Fiddaman /*
456*b30d1939SAndy Fiddaman  * print field <n> from file <index>
457*b30d1939SAndy Fiddaman  */
458*b30d1939SAndy Fiddaman static int
outfield(Join_t * jp,int index,register int n,int last)459*b30d1939SAndy Fiddaman outfield(Join_t* jp, int index, register int n, int last)
460*b30d1939SAndy Fiddaman {
461*b30d1939SAndy Fiddaman 	register File_t*	fp = &jp->file[index];
462*b30d1939SAndy Fiddaman 	register char*		cp;
463*b30d1939SAndy Fiddaman 	register char*		cpmax;
464*b30d1939SAndy Fiddaman 	register int		size;
465*b30d1939SAndy Fiddaman 	register Sfio_t*	iop = jp->outfile;
466*b30d1939SAndy Fiddaman 	char*			tp;
467*b30d1939SAndy Fiddaman 
468*b30d1939SAndy Fiddaman 	if (n < fp->nfields)
469*b30d1939SAndy Fiddaman 	{
470*b30d1939SAndy Fiddaman 		cp = fp->fields[n].beg;
471*b30d1939SAndy Fiddaman 		cpmax = fp->fields[n].end + 1;
472*b30d1939SAndy Fiddaman 	}
473*b30d1939SAndy Fiddaman 	else
474*b30d1939SAndy Fiddaman 		cp = 0;
475*b30d1939SAndy Fiddaman 	if ((n = jp->delim) == -1)
476*b30d1939SAndy Fiddaman 	{
477*b30d1939SAndy Fiddaman 		if (cp && fp->spaces)
478*b30d1939SAndy Fiddaman 		{
479*b30d1939SAndy Fiddaman 			register unsigned char*	sp = jp->state;
480*b30d1939SAndy Fiddaman 
481*b30d1939SAndy Fiddaman 			/*eliminate leading spaces */
482*b30d1939SAndy Fiddaman 			if (jp->mb)
483*b30d1939SAndy Fiddaman 				for (;;)
484*b30d1939SAndy Fiddaman 				{
485*b30d1939SAndy Fiddaman 					switch (sp[*(unsigned char*)cp++])
486*b30d1939SAndy Fiddaman 					{
487*b30d1939SAndy Fiddaman 					case S_SPACE:
488*b30d1939SAndy Fiddaman 						continue;
489*b30d1939SAndy Fiddaman 					case S_WIDE:
490*b30d1939SAndy Fiddaman 						tp = cp - 1;
491*b30d1939SAndy Fiddaman 						if (iswspace(mbchar(tp)))
492*b30d1939SAndy Fiddaman 						{
493*b30d1939SAndy Fiddaman 							cp = tp;
494*b30d1939SAndy Fiddaman 							continue;
495*b30d1939SAndy Fiddaman 						}
496*b30d1939SAndy Fiddaman 						break;
497*b30d1939SAndy Fiddaman 					}
498*b30d1939SAndy Fiddaman 					break;
499*b30d1939SAndy Fiddaman 				}
500*b30d1939SAndy Fiddaman 			else
501*b30d1939SAndy Fiddaman 				while (sp[*(unsigned char*)cp++]==S_SPACE);
502*b30d1939SAndy Fiddaman 			cp--;
503*b30d1939SAndy Fiddaman 		}
504*b30d1939SAndy Fiddaman 		n = ' ';
505*b30d1939SAndy Fiddaman 	}
506*b30d1939SAndy Fiddaman 	else if (jp->delimstr)
507*b30d1939SAndy Fiddaman 		n = -1;
508*b30d1939SAndy Fiddaman 	if (last)
509*b30d1939SAndy Fiddaman 		n = '\n';
510*b30d1939SAndy Fiddaman 	if (cp)
511*b30d1939SAndy Fiddaman 		size = cpmax - cp;
512*b30d1939SAndy Fiddaman 	else
513*b30d1939SAndy Fiddaman 		size = 0;
514*b30d1939SAndy Fiddaman 	if (n == -1)
515*b30d1939SAndy Fiddaman 	{
516*b30d1939SAndy Fiddaman 		if (size<=1)
517*b30d1939SAndy Fiddaman 		{
518*b30d1939SAndy Fiddaman 			if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0)
519*b30d1939SAndy Fiddaman 				return -1;
520*b30d1939SAndy Fiddaman 		}
521*b30d1939SAndy Fiddaman 		else if (sfwrite(iop, cp, size) < 0)
522*b30d1939SAndy Fiddaman 			return -1;
523*b30d1939SAndy Fiddaman 		if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0)
524*b30d1939SAndy Fiddaman 			return -1;
525*b30d1939SAndy Fiddaman 	}
526*b30d1939SAndy Fiddaman 	else if (size <= 1)
527*b30d1939SAndy Fiddaman 	{
528*b30d1939SAndy Fiddaman 		if (!jp->nullfield)
529*b30d1939SAndy Fiddaman 			sfputc(iop, n);
530*b30d1939SAndy Fiddaman 		else if (sfputr(iop, jp->nullfield, n) < 0)
531*b30d1939SAndy Fiddaman 			return -1;
532*b30d1939SAndy Fiddaman 	}
533*b30d1939SAndy Fiddaman 	else
534*b30d1939SAndy Fiddaman 	{
535*b30d1939SAndy Fiddaman 		last = cp[size-1];
536*b30d1939SAndy Fiddaman 		cp[size-1] = n;
537*b30d1939SAndy Fiddaman 		if (sfwrite(iop, cp, size) < 0)
538*b30d1939SAndy Fiddaman 			return -1;
539*b30d1939SAndy Fiddaman 		cp[size-1] = last;
540*b30d1939SAndy Fiddaman 	}
541*b30d1939SAndy Fiddaman 	return 0;
542*b30d1939SAndy Fiddaman }
543*b30d1939SAndy Fiddaman 
544*b30d1939SAndy Fiddaman #if DEBUG_TRACE
545*b30d1939SAndy Fiddaman static int i1,i2,i3;
546*b30d1939SAndy Fiddaman #define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
547*b30d1939SAndy Fiddaman #endif
548*b30d1939SAndy Fiddaman 
549*b30d1939SAndy Fiddaman static int
outrec(register Join_t * jp,int mode)550*b30d1939SAndy Fiddaman outrec(register Join_t* jp, int mode)
551*b30d1939SAndy Fiddaman {
552*b30d1939SAndy Fiddaman 	register File_t*	fp;
553*b30d1939SAndy Fiddaman 	register int		i;
554*b30d1939SAndy Fiddaman 	register int		j;
555*b30d1939SAndy Fiddaman 	register int		k;
556*b30d1939SAndy Fiddaman 	register int		n;
557*b30d1939SAndy Fiddaman 	int*			out;
558*b30d1939SAndy Fiddaman 
559*b30d1939SAndy Fiddaman 	if (mode < 0 && jp->file[0].hit++)
560*b30d1939SAndy Fiddaman 		return 0;
561*b30d1939SAndy Fiddaman 	if (mode > 0 && jp->file[1].hit++)
562*b30d1939SAndy Fiddaman 		return 0;
563*b30d1939SAndy Fiddaman 	if (out = jp->outlist)
564*b30d1939SAndy Fiddaman 	{
565*b30d1939SAndy Fiddaman 		while ((n = *out++) >= 0)
566*b30d1939SAndy Fiddaman 		{
567*b30d1939SAndy Fiddaman 			if (n == JOINFIELD)
568*b30d1939SAndy Fiddaman 			{
569*b30d1939SAndy Fiddaman 				i = mode >= 0;
570*b30d1939SAndy Fiddaman 				j = jp->file[i].field;
571*b30d1939SAndy Fiddaman 			}
572*b30d1939SAndy Fiddaman 			else
573*b30d1939SAndy Fiddaman 			{
574*b30d1939SAndy Fiddaman 				i = n & 1;
575*b30d1939SAndy Fiddaman 				j = (mode<0 && i || mode>0 && !i) ?
576*b30d1939SAndy Fiddaman 					jp->file[i].nfields :
577*b30d1939SAndy Fiddaman 					n >> 2;
578*b30d1939SAndy Fiddaman 			}
579*b30d1939SAndy Fiddaman 			if (outfield(jp, i, j, *out < 0) < 0)
580*b30d1939SAndy Fiddaman 				return -1;
581*b30d1939SAndy Fiddaman 		}
582*b30d1939SAndy Fiddaman 		return 0;
583*b30d1939SAndy Fiddaman 	}
584*b30d1939SAndy Fiddaman 	k = jp->file[0].nfields;
585*b30d1939SAndy Fiddaman 	if (mode >= 0)
586*b30d1939SAndy Fiddaman 		k += jp->file[1].nfields - 1;
587*b30d1939SAndy Fiddaman 	for (i=0; i<2; i++)
588*b30d1939SAndy Fiddaman 	{
589*b30d1939SAndy Fiddaman 		fp = &jp->file[i];
590*b30d1939SAndy Fiddaman 		if (mode>0 && i==0)
591*b30d1939SAndy Fiddaman 		{
592*b30d1939SAndy Fiddaman 			k -= (fp->nfields - 1);
593*b30d1939SAndy Fiddaman 			continue;
594*b30d1939SAndy Fiddaman 		}
595*b30d1939SAndy Fiddaman 		n = fp->field;
596*b30d1939SAndy Fiddaman 		if (mode||i==0)
597*b30d1939SAndy Fiddaman 		{
598*b30d1939SAndy Fiddaman 			/* output join field first */
599*b30d1939SAndy Fiddaman 			if (outfield(jp,i,n,!--k) < 0)
600*b30d1939SAndy Fiddaman 				return -1;
601*b30d1939SAndy Fiddaman 			if (!k)
602*b30d1939SAndy Fiddaman 				return 0;
603*b30d1939SAndy Fiddaman 			for (j=0; j<n; j++)
604*b30d1939SAndy Fiddaman 			{
605*b30d1939SAndy Fiddaman 				if (outfield(jp,i,j,!--k) < 0)
606*b30d1939SAndy Fiddaman 					return -1;
607*b30d1939SAndy Fiddaman 				if (!k)
608*b30d1939SAndy Fiddaman 					return 0;
609*b30d1939SAndy Fiddaman 			}
610*b30d1939SAndy Fiddaman 			j = n + 1;
611*b30d1939SAndy Fiddaman 		}
612*b30d1939SAndy Fiddaman 		else
613*b30d1939SAndy Fiddaman 			j = 0;
614*b30d1939SAndy Fiddaman 		for (;j<fp->nfields; j++)
615*b30d1939SAndy Fiddaman 		{
616*b30d1939SAndy Fiddaman 			if (j!=n && outfield(jp,i,j,!--k) < 0)
617*b30d1939SAndy Fiddaman 				return -1;
618*b30d1939SAndy Fiddaman 			if (!k)
619*b30d1939SAndy Fiddaman 				return 0;
620*b30d1939SAndy Fiddaman 		}
621*b30d1939SAndy Fiddaman 	}
622*b30d1939SAndy Fiddaman 	return 0;
623*b30d1939SAndy Fiddaman }
624*b30d1939SAndy Fiddaman 
625*b30d1939SAndy Fiddaman #if DEBUG_TRACE
626*b30d1939SAndy Fiddaman #define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
627*b30d1939SAndy Fiddaman #endif
628*b30d1939SAndy Fiddaman 
629*b30d1939SAndy Fiddaman static int
join(Join_t * jp)630*b30d1939SAndy Fiddaman join(Join_t* jp)
631*b30d1939SAndy Fiddaman {
632*b30d1939SAndy Fiddaman 	register unsigned char*	cp1;
633*b30d1939SAndy Fiddaman 	register unsigned char*	cp2;
634*b30d1939SAndy Fiddaman 	register int		n1;
635*b30d1939SAndy Fiddaman 	register int		n2;
636*b30d1939SAndy Fiddaman 	register int		n;
637*b30d1939SAndy Fiddaman 	register int		cmp;
638*b30d1939SAndy Fiddaman 	register int		same;
639*b30d1939SAndy Fiddaman 	int			o2;
640*b30d1939SAndy Fiddaman 	Sfoff_t			lo = -1;
641*b30d1939SAndy Fiddaman 	Sfoff_t			hi = -1;
642*b30d1939SAndy Fiddaman 
643*b30d1939SAndy Fiddaman 	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
644*b30d1939SAndy Fiddaman 	{
645*b30d1939SAndy Fiddaman 		n1 = jp->file[0].fieldlen;
646*b30d1939SAndy Fiddaman 		n2 = jp->file[1].fieldlen;
647*b30d1939SAndy Fiddaman 		same = 0;
648*b30d1939SAndy Fiddaman 		for (;;)
649*b30d1939SAndy Fiddaman 		{
650*b30d1939SAndy Fiddaman 			n = n1 < n2 ? n1 : n2;
651*b30d1939SAndy Fiddaman #if DEBUG_TRACE
652*b30d1939SAndy Fiddaman 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
653*b30d1939SAndy Fiddaman 				cmp = n1 - n2;
654*b30d1939SAndy Fiddaman sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
655*b30d1939SAndy Fiddaman 			if (!cmp)
656*b30d1939SAndy Fiddaman #else
657*b30d1939SAndy Fiddaman 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
658*b30d1939SAndy Fiddaman #endif
659*b30d1939SAndy Fiddaman 			{
660*b30d1939SAndy Fiddaman 				if (!(jp->outmode & C_COMMON))
661*b30d1939SAndy Fiddaman 				{
662*b30d1939SAndy Fiddaman 					if (cp1 = getrec(jp, 0, 1))
663*b30d1939SAndy Fiddaman 					{
664*b30d1939SAndy Fiddaman 						n1 = jp->file[0].fieldlen;
665*b30d1939SAndy Fiddaman 						same = 1;
666*b30d1939SAndy Fiddaman 						continue;
667*b30d1939SAndy Fiddaman 					}
668*b30d1939SAndy Fiddaman 					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
669*b30d1939SAndy Fiddaman 						break;
670*b30d1939SAndy Fiddaman 					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
671*b30d1939SAndy Fiddaman 					{
672*b30d1939SAndy Fiddaman 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
673*b30d1939SAndy Fiddaman 						return -1;
674*b30d1939SAndy Fiddaman 					}
675*b30d1939SAndy Fiddaman 				}
676*b30d1939SAndy Fiddaman 				else if (outrec(jp, 0) < 0)
677*b30d1939SAndy Fiddaman 					return -1;
678*b30d1939SAndy Fiddaman 				else if (lo < 0 && (jp->outmode & C_COMMON))
679*b30d1939SAndy Fiddaman 				{
680*b30d1939SAndy Fiddaman 					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
681*b30d1939SAndy Fiddaman 					{
682*b30d1939SAndy Fiddaman 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
683*b30d1939SAndy Fiddaman 						return -1;
684*b30d1939SAndy Fiddaman 					}
685*b30d1939SAndy Fiddaman 					lo -= jp->file[1].reclen;
686*b30d1939SAndy Fiddaman 				}
687*b30d1939SAndy Fiddaman 				if (cp2 = getrec(jp, 1, lo < 0))
688*b30d1939SAndy Fiddaman 				{
689*b30d1939SAndy Fiddaman 					n2 = jp->file[1].fieldlen;
690*b30d1939SAndy Fiddaman 					continue;
691*b30d1939SAndy Fiddaman 				}
692*b30d1939SAndy Fiddaman #if DEBUG_TRACE
693*b30d1939SAndy Fiddaman sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
694*b30d1939SAndy Fiddaman #endif
695*b30d1939SAndy Fiddaman 			}
696*b30d1939SAndy Fiddaman 			else if (cmp > 0)
697*b30d1939SAndy Fiddaman 			{
698*b30d1939SAndy Fiddaman 				if (same)
699*b30d1939SAndy Fiddaman 				{
700*b30d1939SAndy Fiddaman 					same = 0;
701*b30d1939SAndy Fiddaman 				next:
702*b30d1939SAndy Fiddaman 					if (n2 > jp->samesize)
703*b30d1939SAndy Fiddaman 					{
704*b30d1939SAndy Fiddaman 						jp->samesize = roundof(n2, 16);
705*b30d1939SAndy Fiddaman 						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
706*b30d1939SAndy Fiddaman 						{
707*b30d1939SAndy Fiddaman 							error(ERROR_SYSTEM|2, "out of space");
708*b30d1939SAndy Fiddaman 							return -1;
709*b30d1939SAndy Fiddaman 						}
710*b30d1939SAndy Fiddaman 					}
711*b30d1939SAndy Fiddaman 					memcpy(jp->same, cp2, o2 = n2);
712*b30d1939SAndy Fiddaman 					if (!(cp2 = getrec(jp, 1, 0)))
713*b30d1939SAndy Fiddaman 						break;
714*b30d1939SAndy Fiddaman 					n2 = jp->file[1].fieldlen;
715*b30d1939SAndy Fiddaman 					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
716*b30d1939SAndy Fiddaman 						goto next;
717*b30d1939SAndy Fiddaman 					continue;
718*b30d1939SAndy Fiddaman 				}
719*b30d1939SAndy Fiddaman 				if (hi >= 0)
720*b30d1939SAndy Fiddaman 				{
721*b30d1939SAndy Fiddaman 					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
722*b30d1939SAndy Fiddaman 					{
723*b30d1939SAndy Fiddaman 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
724*b30d1939SAndy Fiddaman 						return -1;
725*b30d1939SAndy Fiddaman 					}
726*b30d1939SAndy Fiddaman 					hi = -1;
727*b30d1939SAndy Fiddaman 				}
728*b30d1939SAndy Fiddaman 				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
729*b30d1939SAndy Fiddaman 					return -1;
730*b30d1939SAndy Fiddaman 				lo = -1;
731*b30d1939SAndy Fiddaman 				if (cp2 = getrec(jp, 1, 1))
732*b30d1939SAndy Fiddaman 				{
733*b30d1939SAndy Fiddaman 					n2 = jp->file[1].fieldlen;
734*b30d1939SAndy Fiddaman 					continue;
735*b30d1939SAndy Fiddaman 				}
736*b30d1939SAndy Fiddaman #if DEBUG_TRACE
737*b30d1939SAndy Fiddaman sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
738*b30d1939SAndy Fiddaman #endif
739*b30d1939SAndy Fiddaman 			}
740*b30d1939SAndy Fiddaman 			else if (same)
741*b30d1939SAndy Fiddaman 			{
742*b30d1939SAndy Fiddaman 				same = 0;
743*b30d1939SAndy Fiddaman 				if (!(cp1 = getrec(jp, 0, 0)))
744*b30d1939SAndy Fiddaman 					break;
745*b30d1939SAndy Fiddaman 				n1 = jp->file[0].fieldlen;
746*b30d1939SAndy Fiddaman 				continue;
747*b30d1939SAndy Fiddaman 			}
748*b30d1939SAndy Fiddaman 			if (lo >= 0)
749*b30d1939SAndy Fiddaman 			{
750*b30d1939SAndy Fiddaman 				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
751*b30d1939SAndy Fiddaman 				    (hi -= jp->file[1].reclen) < 0 ||
752*b30d1939SAndy Fiddaman 				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
753*b30d1939SAndy Fiddaman 				    !(cp2 = getrec(jp, 1, 0)))
754*b30d1939SAndy Fiddaman 				{
755*b30d1939SAndy Fiddaman 					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
756*b30d1939SAndy Fiddaman 					return -1;
757*b30d1939SAndy Fiddaman 				}
758*b30d1939SAndy Fiddaman 				n2 = jp->file[1].fieldlen;
759*b30d1939SAndy Fiddaman 				lo = -1;
760*b30d1939SAndy Fiddaman 				if (jp->file[1].discard)
761*b30d1939SAndy Fiddaman 					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
762*b30d1939SAndy Fiddaman 			}
763*b30d1939SAndy Fiddaman 			else if (!cp2)
764*b30d1939SAndy Fiddaman 				break;
765*b30d1939SAndy Fiddaman 			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
766*b30d1939SAndy Fiddaman 				return -1;
767*b30d1939SAndy Fiddaman 			if (!(cp1 = getrec(jp, 0, 1)))
768*b30d1939SAndy Fiddaman 				break;
769*b30d1939SAndy Fiddaman 			n1 = jp->file[0].fieldlen;
770*b30d1939SAndy Fiddaman 		}
771*b30d1939SAndy Fiddaman 	}
772*b30d1939SAndy Fiddaman #if DEBUG_TRACE
773*b30d1939SAndy Fiddaman sfprintf(sfstdout, "[X#%d:?,%p,%p,%d,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
774*b30d1939SAndy Fiddaman #endif
775*b30d1939SAndy Fiddaman 	if (cp2)
776*b30d1939SAndy Fiddaman 	{
777*b30d1939SAndy Fiddaman 		if (hi >= 0 &&
778*b30d1939SAndy Fiddaman 		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
779*b30d1939SAndy Fiddaman 		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
780*b30d1939SAndy Fiddaman 		{
781*b30d1939SAndy Fiddaman 			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
782*b30d1939SAndy Fiddaman 			return -1;
783*b30d1939SAndy Fiddaman 		}
784*b30d1939SAndy Fiddaman #if DEBUG_TRACE
785*b30d1939SAndy Fiddaman sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
786*b30d1939SAndy Fiddaman #endif
787*b30d1939SAndy Fiddaman 		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
788*b30d1939SAndy Fiddaman 		cmp = 1;
789*b30d1939SAndy Fiddaman 		n = 1;
790*b30d1939SAndy Fiddaman 	}
791*b30d1939SAndy Fiddaman 	else
792*b30d1939SAndy Fiddaman 	{
793*b30d1939SAndy Fiddaman 		cmp = -1;
794*b30d1939SAndy Fiddaman 		n = 0;
795*b30d1939SAndy Fiddaman 	}
796*b30d1939SAndy Fiddaman #if DEBUG_TRACE
797*b30d1939SAndy Fiddaman sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
798*b30d1939SAndy Fiddaman #endif
799*b30d1939SAndy Fiddaman 	if (!cp1 || !(jp->outmode & (1<<n)))
800*b30d1939SAndy Fiddaman 	{
801*b30d1939SAndy Fiddaman 		if (cp1 && jp->file[n].iop == sfstdin)
802*b30d1939SAndy Fiddaman 			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
803*b30d1939SAndy Fiddaman 		return 0;
804*b30d1939SAndy Fiddaman 	}
805*b30d1939SAndy Fiddaman 	if (outrec(jp, cmp) < 0)
806*b30d1939SAndy Fiddaman 		return -1;
807*b30d1939SAndy Fiddaman 	do
808*b30d1939SAndy Fiddaman 	{
809*b30d1939SAndy Fiddaman 		if (!getrec(jp, n, 1))
810*b30d1939SAndy Fiddaman 			return 0;
811*b30d1939SAndy Fiddaman 	} while (outrec(jp, cmp) >= 0);
812*b30d1939SAndy Fiddaman 	return -1;
813*b30d1939SAndy Fiddaman }
814*b30d1939SAndy Fiddaman 
815*b30d1939SAndy Fiddaman int
b_join(int argc,char ** argv,Shbltin_t * context)816*b30d1939SAndy Fiddaman b_join(int argc, char** argv, Shbltin_t* context)
817*b30d1939SAndy Fiddaman {
818*b30d1939SAndy Fiddaman 	register int		n;
819*b30d1939SAndy Fiddaman 	register char*		cp;
820*b30d1939SAndy Fiddaman 	register Join_t*	jp;
821*b30d1939SAndy Fiddaman 	char*			e;
822*b30d1939SAndy Fiddaman 
823*b30d1939SAndy Fiddaman #if !DEBUG_TRACE
824*b30d1939SAndy Fiddaman 	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
825*b30d1939SAndy Fiddaman #endif
826*b30d1939SAndy Fiddaman 	if (!(jp = init()))
827*b30d1939SAndy Fiddaman 		error(ERROR_system(1),"out of space");
828*b30d1939SAndy Fiddaman 	jp->context = context;
829*b30d1939SAndy Fiddaman 	for (;;)
830*b30d1939SAndy Fiddaman 	{
831*b30d1939SAndy Fiddaman 		switch (n = optget(argv, usage))
832*b30d1939SAndy Fiddaman 		{
833*b30d1939SAndy Fiddaman  		case 'j':
834*b30d1939SAndy Fiddaman 			/*
835*b30d1939SAndy Fiddaman 			 * check for obsolete "-j1 field" and "-j2 field"
836*b30d1939SAndy Fiddaman 			 */
837*b30d1939SAndy Fiddaman 
838*b30d1939SAndy Fiddaman 			if (opt_info.offset == 0)
839*b30d1939SAndy Fiddaman 			{
840*b30d1939SAndy Fiddaman 				cp = argv[opt_info.index - 1];
841*b30d1939SAndy Fiddaman 				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
842*b30d1939SAndy Fiddaman 				n = cp[n] == 'j';
843*b30d1939SAndy Fiddaman 			}
844*b30d1939SAndy Fiddaman 			else
845*b30d1939SAndy Fiddaman 				n = 0;
846*b30d1939SAndy Fiddaman 			if (n)
847*b30d1939SAndy Fiddaman 			{
848*b30d1939SAndy Fiddaman 				if (opt_info.num!=1 && opt_info.num!=2)
849*b30d1939SAndy Fiddaman 					error(2,"-jfileno field: fileno must be 1 or 2");
850*b30d1939SAndy Fiddaman 				n = '0' + opt_info.num;
851*b30d1939SAndy Fiddaman 				if (!(cp = argv[opt_info.index]))
852*b30d1939SAndy Fiddaman 				{
853*b30d1939SAndy Fiddaman 					argc = 0;
854*b30d1939SAndy Fiddaman 					break;
855*b30d1939SAndy Fiddaman 				}
856*b30d1939SAndy Fiddaman 				opt_info.num = strtol(cp, &e, 10);
857*b30d1939SAndy Fiddaman 				if (*e)
858*b30d1939SAndy Fiddaman 				{
859*b30d1939SAndy Fiddaman 					argc = 0;
860*b30d1939SAndy Fiddaman 					break;
861*b30d1939SAndy Fiddaman 				}
862*b30d1939SAndy Fiddaman 				opt_info.index++;
863*b30d1939SAndy Fiddaman 			}
864*b30d1939SAndy Fiddaman 			else
865*b30d1939SAndy Fiddaman 			{
866*b30d1939SAndy Fiddaman 				jp->file[0].field = (int)(opt_info.num-1);
867*b30d1939SAndy Fiddaman 				n = '2';
868*b30d1939SAndy Fiddaman 			}
869*b30d1939SAndy Fiddaman 			/*FALLTHROUGH*/
870*b30d1939SAndy Fiddaman  		case '1':
871*b30d1939SAndy Fiddaman 		case '2':
872*b30d1939SAndy Fiddaman 			if (opt_info.num <=0)
873*b30d1939SAndy Fiddaman 				error(2,"field number must positive");
874*b30d1939SAndy Fiddaman 			jp->file[n-'1'].field = (int)(opt_info.num-1);
875*b30d1939SAndy Fiddaman 			continue;
876*b30d1939SAndy Fiddaman 		case 'v':
877*b30d1939SAndy Fiddaman 			jp->outmode &= ~C_COMMON;
878*b30d1939SAndy Fiddaman 			/*FALLTHROUGH*/
879*b30d1939SAndy Fiddaman 		case 'a':
880*b30d1939SAndy Fiddaman 			if (opt_info.num!=1 && opt_info.num!=2)
881*b30d1939SAndy Fiddaman 				error(2,"%s: file number must be 1 or 2", opt_info.name);
882*b30d1939SAndy Fiddaman 			jp->outmode |= 1<<(opt_info.num-1);
883*b30d1939SAndy Fiddaman 			continue;
884*b30d1939SAndy Fiddaman 		case 'e':
885*b30d1939SAndy Fiddaman 			jp->nullfield = opt_info.arg;
886*b30d1939SAndy Fiddaman 			continue;
887*b30d1939SAndy Fiddaman 		case 'o':
888*b30d1939SAndy Fiddaman 			/* need to accept obsolescent command syntax */
889*b30d1939SAndy Fiddaman 			n = getolist(jp, opt_info.arg, argv+opt_info.index);
890*b30d1939SAndy Fiddaman 			opt_info.index += n;
891*b30d1939SAndy Fiddaman 			continue;
892*b30d1939SAndy Fiddaman 		case 't':
893*b30d1939SAndy Fiddaman 			jp->state[' '] = jp->state['\t'] = 0;
894*b30d1939SAndy Fiddaman 			if (jp->mb)
895*b30d1939SAndy Fiddaman 			{
896*b30d1939SAndy Fiddaman 				cp = opt_info.arg;
897*b30d1939SAndy Fiddaman 				jp->delim = mbchar(cp);
898*b30d1939SAndy Fiddaman 				if ((n = cp - opt_info.arg) > 1)
899*b30d1939SAndy Fiddaman 				{
900*b30d1939SAndy Fiddaman 					jp->delimlen = n;
901*b30d1939SAndy Fiddaman 					jp->delimstr = opt_info.arg;
902*b30d1939SAndy Fiddaman 					continue;
903*b30d1939SAndy Fiddaman 				}
904*b30d1939SAndy Fiddaman 			}
905*b30d1939SAndy Fiddaman 			n = *(unsigned char*)opt_info.arg;
906*b30d1939SAndy Fiddaman 			jp->state[n] = S_DELIM;
907*b30d1939SAndy Fiddaman 			jp->delim = n;
908*b30d1939SAndy Fiddaman 			continue;
909*b30d1939SAndy Fiddaman 		case 'i':
910*b30d1939SAndy Fiddaman 			jp->ignorecase = !opt_info.num;
911*b30d1939SAndy Fiddaman 			continue;
912*b30d1939SAndy Fiddaman 		case 'B':
913*b30d1939SAndy Fiddaman 			jp->buffered = !opt_info.num;
914*b30d1939SAndy Fiddaman 			continue;
915*b30d1939SAndy Fiddaman 		case ':':
916*b30d1939SAndy Fiddaman 			error(2, "%s", opt_info.arg);
917*b30d1939SAndy Fiddaman 			break;
918*b30d1939SAndy Fiddaman 		case '?':
919*b30d1939SAndy Fiddaman 			done(jp);
920*b30d1939SAndy Fiddaman 			error(ERROR_usage(2), "%s", opt_info.arg);
921*b30d1939SAndy Fiddaman 			break;
922*b30d1939SAndy Fiddaman 		}
923*b30d1939SAndy Fiddaman 		break;
924*b30d1939SAndy Fiddaman 	}
925*b30d1939SAndy Fiddaman 	argv += opt_info.index;
926*b30d1939SAndy Fiddaman 	argc -= opt_info.index;
927*b30d1939SAndy Fiddaman 	if (error_info.errors || argc!=2)
928*b30d1939SAndy Fiddaman 	{
929*b30d1939SAndy Fiddaman 		done(jp);
930*b30d1939SAndy Fiddaman 		error(ERROR_usage(2),"%s", optusage(NiL));
931*b30d1939SAndy Fiddaman 	}
932*b30d1939SAndy Fiddaman 	jp->ooutmode = jp->outmode;
933*b30d1939SAndy Fiddaman 	jp->file[0].name = cp = *argv++;
934*b30d1939SAndy Fiddaman 	if (streq(cp,"-"))
935*b30d1939SAndy Fiddaman 	{
936*b30d1939SAndy Fiddaman 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
937*b30d1939SAndy Fiddaman 		{
938*b30d1939SAndy Fiddaman 			if (sfdcseekable(sfstdin))
939*b30d1939SAndy Fiddaman 				error(ERROR_warn(0),"%s: seek may fail",cp);
940*b30d1939SAndy Fiddaman 			else
941*b30d1939SAndy Fiddaman 				jp->file[0].discard = 1;
942*b30d1939SAndy Fiddaman 		}
943*b30d1939SAndy Fiddaman 		jp->file[0].iop = sfstdin;
944*b30d1939SAndy Fiddaman 	}
945*b30d1939SAndy Fiddaman 	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
946*b30d1939SAndy Fiddaman 	{
947*b30d1939SAndy Fiddaman 		done(jp);
948*b30d1939SAndy Fiddaman 		error(ERROR_system(1),"%s: cannot open",cp);
949*b30d1939SAndy Fiddaman 	}
950*b30d1939SAndy Fiddaman 	jp->file[1].name = cp = *argv;
951*b30d1939SAndy Fiddaman 	if (streq(cp,"-"))
952*b30d1939SAndy Fiddaman 	{
953*b30d1939SAndy Fiddaman 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
954*b30d1939SAndy Fiddaman 		{
955*b30d1939SAndy Fiddaman 			if (sfdcseekable(sfstdin))
956*b30d1939SAndy Fiddaman 				error(ERROR_warn(0),"%s: seek may fail",cp);
957*b30d1939SAndy Fiddaman 			else
958*b30d1939SAndy Fiddaman 				jp->file[1].discard = 1;
959*b30d1939SAndy Fiddaman 		}
960*b30d1939SAndy Fiddaman 		jp->file[1].iop = sfstdin;
961*b30d1939SAndy Fiddaman 	}
962*b30d1939SAndy Fiddaman 	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
963*b30d1939SAndy Fiddaman 	{
964*b30d1939SAndy Fiddaman 		done(jp);
965*b30d1939SAndy Fiddaman 		error(ERROR_system(1),"%s: cannot open",cp);
966*b30d1939SAndy Fiddaman 	}
967*b30d1939SAndy Fiddaman 	if (jp->buffered)
968*b30d1939SAndy Fiddaman 	{
969*b30d1939SAndy Fiddaman 		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
970*b30d1939SAndy Fiddaman 		sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND);
971*b30d1939SAndy Fiddaman 	}
972*b30d1939SAndy Fiddaman 	jp->outfile = sfstdout;
973*b30d1939SAndy Fiddaman 	if (!jp->outlist)
974*b30d1939SAndy Fiddaman 		jp->nullfield = 0;
975*b30d1939SAndy Fiddaman 	if (join(jp) < 0)
976*b30d1939SAndy Fiddaman 	{
977*b30d1939SAndy Fiddaman 		done(jp);
978*b30d1939SAndy Fiddaman 		error(ERROR_system(1),"write error");
979*b30d1939SAndy Fiddaman 	}
980*b30d1939SAndy Fiddaman 	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
981*b30d1939SAndy Fiddaman 		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
982*b30d1939SAndy Fiddaman 	done(jp);
983*b30d1939SAndy Fiddaman 	return error_info.errors;
984*b30d1939SAndy Fiddaman }
985