1#!/bin/sh
2
3# Public domain notice for all NCBI EDirect scripts is located at:
4# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice
5
6startat=0
7
8while [ $# -gt 0 ]
9do
10  case "$1" in
11    -collect )
12      startat=1
13      shift
14      ;;
15    -index )
16      startat=2
17      shift
18      ;;
19    -invert )
20      startat=3
21      shift
22      ;;
23    -merge )
24      startat=4
25      shift
26      ;;
27    -promote )
28      startat=5
29      shift
30      ;;
31    * )
32      break
33      ;;
34  esac
35done
36
37while [ $# -gt 0 ]
38do
39  case "$1" in
40    -path )
41      shift
42      ;;
43    -* )
44      exec >&2
45      echo "$0: Unrecognized option $1"
46      exit 1
47      ;;
48    * )
49      break
50      ;;
51  esac
52done
53
54if [ "$#" -gt 0 ]
55then
56  target="$1"
57  MASTER=$(cd "$target" && pwd)
58  CONFIG=${MASTER}
59  shift
60else
61  if [ -z "${EDIRECT_PUBMED_MASTER}" ]
62  then
63    echo "Must supply path to master archive area or set EDIRECT_PUBMED_MASTER environment variable"
64    exit 1
65  else
66    MASTER="${EDIRECT_PUBMED_MASTER}"
67    MASTER=${MASTER%/}
68  fi
69fi
70
71while [ $# -gt 0 ]
72do
73  case "$1" in
74    -temp | -work | -working )
75      shift
76      ;;
77    -* )
78      exec >&2
79      echo "$0: Unrecognized option $1"
80      exit 1
81      ;;
82    * )
83      break
84      ;;
85  esac
86done
87
88if [ "$#" -gt 0 ]
89then
90  working="$1"
91  WORKING=$(cd "$working" && pwd)
92  shift
93else
94  if [ -z "${EDIRECT_PUBMED_WORKING}" ]
95  then
96    WORKING=${MASTER}
97  else
98    WORKING="${EDIRECT_PUBMED_WORKING}"
99    WORKING=${WORKING%/}
100  fi
101fi
102
103echo "MASTER $MASTER"
104
105echo "WORKING $WORKING"
106
107osname=`uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/'`
108
109if [ "$osname" = "Darwin" ]
110then
111  MASTER_ROOT=$(df $MASTER | awk 'END { print $NF }')
112  sdst=$(diskutil info -plist $MASTER_ROOT | plutil -extract SolidState xml1 - -o - |  sed -ne 's,<,,pg' | sed -ne 's,/>,,pg')
113  if [ "$sdst" != "true" ]
114  then
115    echo ""
116    echo "$MASTER IS A HARD DISK DRIVE, NOT THE EXPECTED SOLID-STATE DRIVE."
117    echo ""
118    echo "WOULD YOU LIKE TO PROCEED WITH ARCHIVING EVEN THOUGH IT IS NOT RECOMMENDED? [y/N]"
119    read response
120    case "$response" in
121      [Yy]*      ) echo "OK, PROCEEDING." ;;
122      [Nn]* | '' ) echo "Holding off, then."; exit 1 ;;
123      *          ) echo "Conservatively taking that as a no."; exit 1 ;;
124    esac
125  fi
126  ftyp=$(diskutil info -plist $MASTER_ROOT | plutil -extract FilesystemType xml1 - -o - | sed -ne 's,</*string>,,pg')
127  if [ "$ftyp" != "apfs" ]
128  then
129    echo ""
130    echo "$MASTER IS OF TYPE '$ftyp'"
131    echo ""
132    echo "IT NEEDS TO BE REFORMATTED AS APFS BEFORE YOU CAN PROCEED:"
133    echo ""
134    echo "  Run Utilities -> Disk Utility"
135    echo ""
136    echo "  Switch the View option to 'Show All Devices'."
137    echo ""
138    echo "  Select the entry named 'PCIe SSD Media' (not the two entries indented below it)."
139    echo ""
140    echo "  Click on 'Erase'."
141    echo ""
142    echo "  Change the Scheme to 'GUID Partition Map' (which will expand the Format choices)."
143    echo ""
144    echo "  Set the Format to 'APFS'."
145    echo ""
146    echo "  Press Erase."
147    echo ""
148    echo "ALSO RUN:"
149    echo ""
150    echo "  sudo trimforce enable"
151    echo ""
152    echo "IF NECESSARY TO ENABLE TRIM SUPPORT ON THE SOLID STATE DRIVE."
153    echo ""
154    echo "WOULD YOU LIKE TO PROCEED WITH ARCHIVING ON THE NON-APFS VOLUME ANYWAY? [y/N]"
155    read response
156    case "$response" in
157      [Yy]*      ) echo "OK, PROCEEDING." ;;
158      [Nn]* | '' ) echo "Holding off, then."; exit 1 ;;
159      *          ) echo "Conservatively taking that as a no."; exit 1 ;;
160    esac
161  fi
162fi
163
164for dir in Archive Postings
165do
166  mkdir -p "$MASTER/$dir"
167done
168
169for dir in Current Data Indexed Inverted Merged Pubmed
170do
171  mkdir -p "$WORKING/$dir"
172done
173
174if [ ! -f "$MASTER/Archive/CACHEDIR.TAG" ]
175then
176  pm-prepare "$MASTER/Archive"
177fi
178
179date
180
181DWN=0
182POP=0
183REF=0
184CLR=0
185COL=0
186IDX=0
187INV=0
188MRG=0
189PST=0
190
191if [ "$startat" -lt 1 ]
192then
193  seconds_start=$(date "+%s")
194  echo "Downloading PubMed Files"
195  cd "$WORKING/Pubmed"
196  download-pubmed baseline updatefiles
197  echo "Downloading MeSH Tree"
198  cd "$WORKING/Data"
199  download-ncbi-data meshtree
200  seconds_end=$(date "+%s")
201  seconds=$((seconds_end - seconds_start))
202  echo "$seconds seconds"
203  DWN=$seconds
204
205  seconds_start=$(date "+%s")
206  echo "Populating PubMed Archive"
207  cd "$WORKING/Pubmed"
208  pm-stash "$MASTER/Archive"
209  seconds_end=$(date "+%s")
210  seconds=$((seconds_end - seconds_start))
211  echo "$seconds seconds"
212  POP=$seconds
213
214  seconds_start=$(date "+%s")
215  echo "Refreshing Versioned Records"
216  pm-refresh "$MASTER/Archive"
217  seconds_end=$(date "+%s")
218  seconds=$((seconds_end - seconds_start))
219  echo "$seconds seconds"
220  REF=$seconds
221fi
222
223if [ "$startat" -lt 5 ]
224then
225  seconds_start=$(date "+%s")
226  echo "Removing Previous Indices"
227  if [ "$startat" -lt 2 ]
228  then
229    cd "$WORKING/Indexed"
230    target="$WORKING/Indexed"
231    find "$target" -name "*.e2x" -delete
232    find "$target" -name "*.e2x.gz" -delete
233  fi
234  if [ "$startat" -lt 3 ]
235  then
236    cd "$WORKING/Inverted"
237    target="$WORKING/Inverted"
238    find "$target" -name "*.inv" -delete
239    find "$target" -name "*.inv.gz" -delete
240  fi
241  if [ "$startat" -lt 4 ]
242  then
243    cd "$WORKING/Merged"
244    target="$WORKING/Merged"
245    find "$target" -name "*.mrg" -delete
246    find "$target" -name "*.mrg.gz" -delete
247  fi
248  seconds_end=$(date "+%s")
249  seconds=$((seconds_end - seconds_start))
250  echo "$seconds seconds"
251  CLR=$seconds
252fi
253
254if [ "$startat" -lt 2 ]
255then
256  seconds_start=$(date "+%s")
257  echo "Collecting PubMed Records"
258  cd "$WORKING/Pubmed"
259  pm-collect "$MASTER/Archive" "$WORKING/Current"
260  seconds_end=$(date "+%s")
261  seconds=$((seconds_end - seconds_start))
262  echo "$seconds seconds"
263  COL=$seconds
264fi
265
266if [ "$startat" -lt 3 ]
267then
268  seconds_start=$(date "+%s")
269  echo "Indexing PubMed Records"
270  cd "$WORKING/Current"
271  pm-index "$WORKING/Indexed" "$WORKING/Data"
272  seconds_end=$(date "+%s")
273  seconds=$((seconds_end - seconds_start))
274  echo "$seconds seconds"
275  IDX=$seconds
276fi
277
278if [ "$startat" -lt 4 ]
279then
280  seconds_start=$(date "+%s")
281  echo "Inverting PubMed Indices"
282  cd "$WORKING/Indexed"
283  pm-invert "$WORKING/Inverted"
284  seconds_end=$(date "+%s")
285  seconds=$((seconds_end - seconds_start))
286  echo "$seconds seconds"
287  INV=$seconds
288fi
289
290if [ "$startat" -lt 5 ]
291then
292  seconds_start=$(date "+%s")
293  echo "Merging Inverted Indices"
294  cd "$WORKING/Inverted"
295  pm-merge "$WORKING/Merged"
296  seconds_end=$(date "+%s")
297  seconds=$((seconds_end - seconds_start))
298  echo "$seconds seconds"
299  MRG=$seconds
300fi
301
302if [ "$startat" -lt 6 ]
303then
304  seconds_start=$(date "+%s")
305  echo "Producing Postings Files"
306  cd "$WORKING/Merged"
307  pm-promote "$MASTER/Postings"
308  seconds_end=$(date "+%s")
309  seconds=$((seconds_end - seconds_start))
310  echo "$seconds seconds"
311  PST=$seconds
312fi
313
314echo "DWN $DWN seconds"
315echo "POP $POP seconds"
316echo "REF $REF seconds"
317echo "CLR $CLR seconds"
318echo "COL $COL seconds"
319echo "IDX $IDX seconds"
320echo "INV $INV seconds"
321echo "MRG $MRG seconds"
322echo "PST $PST seconds"
323
324echo ""
325
326phrase-search -path "$MASTER/Postings" -query "mapping of spatio-temporal pollution status AND 2008 [YEAR]" |
327fetch-pubmed -path "$MASTER/Archive" |
328xtract -pattern Author -if Affiliation -contains Medicine \
329  -pfx "Archive and Index are " -element Initials
330
331echo ""
332
333date
334
335if [ -n "$CONFIG" ]
336then
337  target=bash_profile
338  if ! grep "$target" "$HOME/.bashrc" >/dev/null 2>&1
339  then
340    if [ ! -f $HOME/.$target ] || grep 'bashrc' "$HOME/.$target" >/dev/null 2>&1
341    then
342      target=bashrc
343    fi
344  fi
345  echo ""
346  echo "For convenience, please execute the following to save the archive path to a variable:"
347  echo ""
348  echo "  echo \"export EDIRECT_PUBMED_MASTER='${CONFIG}'\" >>" "\$HOME/.$target"
349  echo ""
350fi
351