xref: /netbsd/distrib/sets/join.awk (revision 3c49a566)
1*3c49a566Sriz#	$NetBSD: join.awk,v 1.6 2014/10/24 22:19:44 riz Exp $
282871120Slukem#
382871120Slukem# Copyright (c) 2002 The NetBSD Foundation, Inc.
482871120Slukem# All rights reserved.
582871120Slukem#
682871120Slukem# This code is derived from software contributed to The NetBSD Foundation
782871120Slukem# by Luke Mewburn of Wasabi Systems.
882871120Slukem#
982871120Slukem# Redistribution and use in source and binary forms, with or without
1082871120Slukem# modification, are permitted provided that the following conditions
1182871120Slukem# are met:
1282871120Slukem# 1. Redistributions of source code must retain the above copyright
1382871120Slukem#    notice, this list of conditions and the following disclaimer.
1482871120Slukem# 2. Redistributions in binary form must reproduce the above copyright
1582871120Slukem#    notice, this list of conditions and the following disclaimer in the
1682871120Slukem#    documentation and/or other materials provided with the distribution.
1782871120Slukem#
1882871120Slukem# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1982871120Slukem# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
2082871120Slukem# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2182871120Slukem# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2282871120Slukem# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2382871120Slukem# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2482871120Slukem# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2582871120Slukem# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2682871120Slukem# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2782871120Slukem# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2882871120Slukem# POSSIBILITY OF SUCH DAMAGE.
2982871120Slukem#
3082871120Slukem# join.awk F1 F2
3182871120Slukem#	Similar to join(1), this reads a list of words from F1
3282871120Slukem#	and outputs lines in F2 with a first word that is in F1.
33*3c49a566Sriz#	Neither file needs to be sorted
3482871120Slukem
352bd53f24Sapbfunction unvis(s) \
362bd53f24Sapb{
372bd53f24Sapb	# XXX: We don't handle the complete range of vis encodings
382bd53f24Sapb	unvis_result = ""
392bd53f24Sapb	while (length(s) > 0) {
402bd53f24Sapb		unvis_pos = match(s, "\\\\.")
412bd53f24Sapb		if (unvis_pos == 0) {
422bd53f24Sapb			unvis_result = unvis_result "" s
432bd53f24Sapb			s = ""
442bd53f24Sapb			break
452bd53f24Sapb		}
462bd53f24Sapb		# copy the part before the next backslash
472bd53f24Sapb		unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1)
482bd53f24Sapb		s = substr(s, unvis_pos)
492bd53f24Sapb		# process the backslash and next few chars
502bd53f24Sapb		if (substr(s, 1, 2) == "\\\\") {
512bd53f24Sapb			# double backslash -> single backslash
522bd53f24Sapb			unvis_result = unvis_result "\\"
532bd53f24Sapb			s = substr(s, 3)
542bd53f24Sapb		} else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) {
552bd53f24Sapb			# \ooo with three octal digits.
56*3c49a566Sriz			# XXX: use strnum() is that is available
572bd53f24Sapb			unvis_result = unvis_result "" sprintf("%c", \
582bd53f24Sapb				0+substr(s, 2, 1) * 64 + \
592bd53f24Sapb				0+substr(s, 3, 1) * 8 + \
602bd53f24Sapb				0+substr(s, 4, 1))
612bd53f24Sapb			s = substr(s, 5)
622bd53f24Sapb		} else {
632bd53f24Sapb			# unrecognised escape: keep the literal backslash
642bd53f24Sapb			printf "%s: %s:%s: unrecognised escape %s\n", \
652bd53f24Sapb				ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \
662bd53f24Sapb				substr(s, 1, 2) \
672bd53f24Sapb				>"/dev/stderr"
682bd53f24Sapb			unvis_result = unvis_result "" substr(s, 1, 1)
692bd53f24Sapb			s = substr(s, 2)
702bd53f24Sapb		}
712bd53f24Sapb	}
722bd53f24Sapb	return unvis_result
732bd53f24Sapb}
742bd53f24Sapb
7582871120SlukemBEGIN \
7682871120Slukem{
7782871120Slukem	if (ARGC != 3) {
78e2551d1dSlukem		printf("Usage: join file1 file2\n") >"/dev/stderr"
79e2551d1dSlukem		exit 1
8082871120Slukem	}
812bd53f24Sapb	while ( (getline < ARGV[1]) > 0) {
82*3c49a566Sriz		$1 = unvis($1)
83e2551d1dSlukem		words[$1] = $0
842bd53f24Sapb	}
85e2551d1dSlukem	delete ARGV[1]
8682871120Slukem}
8782871120Slukem
88*3c49a566Sriz// { $1 = unvis($1) }
892bd53f24Sapb
90e2551d1dSlukem$1 in words \
91e2551d1dSlukem{
92e2551d1dSlukem	f1=$1
93e2551d1dSlukem	$1=""
94e2551d1dSlukem	print words[f1] $0
95e2551d1dSlukem}
96