1*3c49a566Sriz# $NetBSD: join.awk,v 1.6 2014/10/24 22:19:44 riz Exp $ 282871120Slukem# 382871120Slukem# Copyright (c) 2002 The NetBSD Foundation, Inc. 482871120Slukem# All rights reserved. 582871120Slukem# 682871120Slukem# This code is derived from software contributed to The NetBSD Foundation 782871120Slukem# by Luke Mewburn of Wasabi Systems. 882871120Slukem# 982871120Slukem# Redistribution and use in source and binary forms, with or without 1082871120Slukem# modification, are permitted provided that the following conditions 1182871120Slukem# are met: 1282871120Slukem# 1. Redistributions of source code must retain the above copyright 1382871120Slukem# notice, this list of conditions and the following disclaimer. 1482871120Slukem# 2. Redistributions in binary form must reproduce the above copyright 1582871120Slukem# notice, this list of conditions and the following disclaimer in the 1682871120Slukem# documentation and/or other materials provided with the distribution. 1782871120Slukem# 1882871120Slukem# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 1982871120Slukem# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 2082871120Slukem# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 2182871120Slukem# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 2282871120Slukem# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2382871120Slukem# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2482871120Slukem# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2582871120Slukem# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2682871120Slukem# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 2782871120Slukem# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 2882871120Slukem# POSSIBILITY OF SUCH DAMAGE. 2982871120Slukem# 3082871120Slukem# join.awk F1 F2 3182871120Slukem# Similar to join(1), this reads a list of words from F1 3282871120Slukem# and outputs lines in F2 with a first word that is in F1. 33*3c49a566Sriz# Neither file needs to be sorted 3482871120Slukem 352bd53f24Sapbfunction unvis(s) \ 362bd53f24Sapb{ 372bd53f24Sapb # XXX: We don't handle the complete range of vis encodings 382bd53f24Sapb unvis_result = "" 392bd53f24Sapb while (length(s) > 0) { 402bd53f24Sapb unvis_pos = match(s, "\\\\.") 412bd53f24Sapb if (unvis_pos == 0) { 422bd53f24Sapb unvis_result = unvis_result "" s 432bd53f24Sapb s = "" 442bd53f24Sapb break 452bd53f24Sapb } 462bd53f24Sapb # copy the part before the next backslash 472bd53f24Sapb unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1) 482bd53f24Sapb s = substr(s, unvis_pos) 492bd53f24Sapb # process the backslash and next few chars 502bd53f24Sapb if (substr(s, 1, 2) == "\\\\") { 512bd53f24Sapb # double backslash -> single backslash 522bd53f24Sapb unvis_result = unvis_result "\\" 532bd53f24Sapb s = substr(s, 3) 542bd53f24Sapb } else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) { 552bd53f24Sapb # \ooo with three octal digits. 56*3c49a566Sriz # XXX: use strnum() is that is available 572bd53f24Sapb unvis_result = unvis_result "" sprintf("%c", \ 582bd53f24Sapb 0+substr(s, 2, 1) * 64 + \ 592bd53f24Sapb 0+substr(s, 3, 1) * 8 + \ 602bd53f24Sapb 0+substr(s, 4, 1)) 612bd53f24Sapb s = substr(s, 5) 622bd53f24Sapb } else { 632bd53f24Sapb # unrecognised escape: keep the literal backslash 642bd53f24Sapb printf "%s: %s:%s: unrecognised escape %s\n", \ 652bd53f24Sapb ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \ 662bd53f24Sapb substr(s, 1, 2) \ 672bd53f24Sapb >"/dev/stderr" 682bd53f24Sapb unvis_result = unvis_result "" substr(s, 1, 1) 692bd53f24Sapb s = substr(s, 2) 702bd53f24Sapb } 712bd53f24Sapb } 722bd53f24Sapb return unvis_result 732bd53f24Sapb} 742bd53f24Sapb 7582871120SlukemBEGIN \ 7682871120Slukem{ 7782871120Slukem if (ARGC != 3) { 78e2551d1dSlukem printf("Usage: join file1 file2\n") >"/dev/stderr" 79e2551d1dSlukem exit 1 8082871120Slukem } 812bd53f24Sapb while ( (getline < ARGV[1]) > 0) { 82*3c49a566Sriz $1 = unvis($1) 83e2551d1dSlukem words[$1] = $0 842bd53f24Sapb } 85e2551d1dSlukem delete ARGV[1] 8682871120Slukem} 8782871120Slukem 88*3c49a566Sriz// { $1 = unvis($1) } 892bd53f24Sapb 90e2551d1dSlukem$1 in words \ 91e2551d1dSlukem{ 92e2551d1dSlukem f1=$1 93e2551d1dSlukem $1="" 94e2551d1dSlukem print words[f1] $0 95e2551d1dSlukem} 96