1# Written by Aleksey Cheusov <vle@gmx.net>, public domain 2# 3# This awk module is a part of RunAWK distribution, 4# http://sourceforge.net/projects/runawk 5# 6############################################################ 7 8# =head2 tokenre.awk 9# 10# By default AWK splits input lines into tokens according to regular 11# expression that defines "spaces" between tokens using special 12# variable FS. In many situations it is more useful to define regular 13# expressions for tokens themselves. This is what this module does. 14# 15# =over 2 16# 17# =item I<tokenre(STRING, REGEXP)> 18# 19# extracts substrings from STRING 20# according to REGEXP from the left to the right and assigns $1, $2 21# etc. and NF variable. 22# 23# =item I<tokenre0(REGEXP)> 24# 25# Does the the same as `tokenre' but splits $0 instead. 26# 27# =item I<splitre(STRING, ARR, REGEXP)> 28# 29# The same as `tokenre' but ARR[1], ARR[2]... are assigned. 30# A number of extracted tokens is a return value. 31# 32# =item I<TRE> 33# 34# global variable. If it is set to non-empty string, all input 35# lines are split automatically. 36# 37# =back 38# 39 40# See example/demo_tokenre for the sample of usage 41 42function tokenre (s, re){ 43 NF = 0 44 while (match(s, re)){ 45 ++NF 46 $NF = substr(s, RSTART, RLENGTH) 47 s = substr(s, RSTART+RLENGTH) 48 } 49} 50 51function tokenre0 (re){ 52 tokenre($0, re) 53} 54 55function splitre (s, arr, re, cnt){ 56 cnt = 0 57 while (match(s, re)){ 58 ++cnt 59 arr [cnt] = substr(s, RSTART, RLENGTH) 60 s = substr(s, RSTART+RLENGTH) 61 } 62 return cnt 63} 64 65function splitre0 (arr, re){ 66 return splitre($0, arr, re) 67} 68 69{ 70 if (TRE != ""){ 71 tokenre0(TRE) 72 } 73} 74