1#!/bin/sh 2 3# Public domain notice for all NCBI EDirect scripts is located at: 4# https://www.ncbi.nlm.nih.gov/books/NBK179288/#chapter6.Public_Domain_Notice 5 6startat=0 7 8while [ $# -gt 0 ] 9do 10 case "$1" in 11 -collect ) 12 startat=1 13 shift 14 ;; 15 -index ) 16 startat=2 17 shift 18 ;; 19 -invert ) 20 startat=3 21 shift 22 ;; 23 -merge ) 24 startat=4 25 shift 26 ;; 27 -promote ) 28 startat=5 29 shift 30 ;; 31 * ) 32 break 33 ;; 34 esac 35done 36 37while [ $# -gt 0 ] 38do 39 case "$1" in 40 -path ) 41 shift 42 ;; 43 -* ) 44 exec >&2 45 echo "$0: Unrecognized option $1" 46 exit 1 47 ;; 48 * ) 49 break 50 ;; 51 esac 52done 53 54if [ "$#" -gt 0 ] 55then 56 target="$1" 57 MASTER=$(cd "$target" && pwd) 58 CONFIG=${MASTER} 59 shift 60else 61 if [ -z "${EDIRECT_PUBMED_MASTER}" ] 62 then 63 echo "Must supply path to master archive area or set EDIRECT_PUBMED_MASTER environment variable" 64 exit 1 65 else 66 MASTER="${EDIRECT_PUBMED_MASTER}" 67 MASTER=${MASTER%/} 68 fi 69fi 70 71while [ $# -gt 0 ] 72do 73 case "$1" in 74 -temp | -work | -working ) 75 shift 76 ;; 77 -* ) 78 exec >&2 79 echo "$0: Unrecognized option $1" 80 exit 1 81 ;; 82 * ) 83 break 84 ;; 85 esac 86done 87 88if [ "$#" -gt 0 ] 89then 90 working="$1" 91 WORKING=$(cd "$working" && pwd) 92 shift 93else 94 if [ -z "${EDIRECT_PUBMED_WORKING}" ] 95 then 96 WORKING=${MASTER} 97 else 98 WORKING="${EDIRECT_PUBMED_WORKING}" 99 WORKING=${WORKING%/} 100 fi 101fi 102 103echo "MASTER $MASTER" 104 105echo "WORKING $WORKING" 106 107osname=`uname -s | sed -e 's/_NT-.*$/_NT/; s/^MINGW[0-9]*/CYGWIN/'` 108 109if [ "$osname" = "Darwin" ] 110then 111 MASTER_ROOT=$(df $MASTER | awk 'END { print $NF }') 112 sdst=$(diskutil info -plist $MASTER_ROOT | plutil -extract SolidState xml1 - -o - | sed -ne 's,<,,pg' | sed -ne 's,/>,,pg') 113 if [ "$sdst" != "true" ] 114 then 115 echo "" 116 echo "$MASTER IS A HARD DISK DRIVE, NOT THE EXPECTED SOLID-STATE DRIVE." 117 echo "" 118 echo "WOULD YOU LIKE TO PROCEED WITH ARCHIVING EVEN THOUGH IT IS NOT RECOMMENDED? [y/N]" 119 read response 120 case "$response" in 121 [Yy]* ) echo "OK, PROCEEDING." ;; 122 [Nn]* | '' ) echo "Holding off, then."; exit 1 ;; 123 * ) echo "Conservatively taking that as a no."; exit 1 ;; 124 esac 125 fi 126 ftyp=$(diskutil info -plist $MASTER_ROOT | plutil -extract FilesystemType xml1 - -o - | sed -ne 's,</*string>,,pg') 127 if [ "$ftyp" != "apfs" ] 128 then 129 echo "" 130 echo "$MASTER IS OF TYPE '$ftyp'" 131 echo "" 132 echo "IT NEEDS TO BE REFORMATTED AS APFS BEFORE YOU CAN PROCEED:" 133 echo "" 134 echo " Run Utilities -> Disk Utility" 135 echo "" 136 echo " Switch the View option to 'Show All Devices'." 137 echo "" 138 echo " Select the entry named 'PCIe SSD Media' (not the two entries indented below it)." 139 echo "" 140 echo " Click on 'Erase'." 141 echo "" 142 echo " Change the Scheme to 'GUID Partition Map' (which will expand the Format choices)." 143 echo "" 144 echo " Set the Format to 'APFS'." 145 echo "" 146 echo " Press Erase." 147 echo "" 148 echo "ALSO RUN:" 149 echo "" 150 echo " sudo trimforce enable" 151 echo "" 152 echo "IF NECESSARY TO ENABLE TRIM SUPPORT ON THE SOLID STATE DRIVE." 153 echo "" 154 echo "WOULD YOU LIKE TO PROCEED WITH ARCHIVING ON THE NON-APFS VOLUME ANYWAY? [y/N]" 155 read response 156 case "$response" in 157 [Yy]* ) echo "OK, PROCEEDING." ;; 158 [Nn]* | '' ) echo "Holding off, then."; exit 1 ;; 159 * ) echo "Conservatively taking that as a no."; exit 1 ;; 160 esac 161 fi 162fi 163 164for dir in Archive Postings 165do 166 mkdir -p "$MASTER/$dir" 167done 168 169for dir in Current Data Indexed Inverted Merged Pubmed 170do 171 mkdir -p "$WORKING/$dir" 172done 173 174if [ ! -f "$MASTER/Archive/CACHEDIR.TAG" ] 175then 176 pm-prepare "$MASTER/Archive" 177fi 178 179date 180 181DWN=0 182POP=0 183REF=0 184CLR=0 185COL=0 186IDX=0 187INV=0 188MRG=0 189PST=0 190 191if [ "$startat" -lt 1 ] 192then 193 seconds_start=$(date "+%s") 194 echo "Downloading PubMed Files" 195 cd "$WORKING/Pubmed" 196 download-pubmed baseline updatefiles 197 echo "Downloading MeSH Tree" 198 cd "$WORKING/Data" 199 download-ncbi-data meshtree 200 seconds_end=$(date "+%s") 201 seconds=$((seconds_end - seconds_start)) 202 echo "$seconds seconds" 203 DWN=$seconds 204 205 seconds_start=$(date "+%s") 206 echo "Populating PubMed Archive" 207 cd "$WORKING/Pubmed" 208 pm-stash "$MASTER/Archive" 209 seconds_end=$(date "+%s") 210 seconds=$((seconds_end - seconds_start)) 211 echo "$seconds seconds" 212 POP=$seconds 213 214 seconds_start=$(date "+%s") 215 echo "Refreshing Versioned Records" 216 pm-refresh "$MASTER/Archive" 217 seconds_end=$(date "+%s") 218 seconds=$((seconds_end - seconds_start)) 219 echo "$seconds seconds" 220 REF=$seconds 221fi 222 223if [ "$startat" -lt 5 ] 224then 225 seconds_start=$(date "+%s") 226 echo "Removing Previous Indices" 227 if [ "$startat" -lt 2 ] 228 then 229 cd "$WORKING/Indexed" 230 target="$WORKING/Indexed" 231 find "$target" -name "*.e2x" -delete 232 find "$target" -name "*.e2x.gz" -delete 233 fi 234 if [ "$startat" -lt 3 ] 235 then 236 cd "$WORKING/Inverted" 237 target="$WORKING/Inverted" 238 find "$target" -name "*.inv" -delete 239 find "$target" -name "*.inv.gz" -delete 240 fi 241 if [ "$startat" -lt 4 ] 242 then 243 cd "$WORKING/Merged" 244 target="$WORKING/Merged" 245 find "$target" -name "*.mrg" -delete 246 find "$target" -name "*.mrg.gz" -delete 247 fi 248 seconds_end=$(date "+%s") 249 seconds=$((seconds_end - seconds_start)) 250 echo "$seconds seconds" 251 CLR=$seconds 252fi 253 254if [ "$startat" -lt 2 ] 255then 256 seconds_start=$(date "+%s") 257 echo "Collecting PubMed Records" 258 cd "$WORKING/Pubmed" 259 pm-collect "$MASTER/Archive" "$WORKING/Current" 260 seconds_end=$(date "+%s") 261 seconds=$((seconds_end - seconds_start)) 262 echo "$seconds seconds" 263 COL=$seconds 264fi 265 266if [ "$startat" -lt 3 ] 267then 268 seconds_start=$(date "+%s") 269 echo "Indexing PubMed Records" 270 cd "$WORKING/Current" 271 pm-index "$WORKING/Indexed" "$WORKING/Data" 272 seconds_end=$(date "+%s") 273 seconds=$((seconds_end - seconds_start)) 274 echo "$seconds seconds" 275 IDX=$seconds 276fi 277 278if [ "$startat" -lt 4 ] 279then 280 seconds_start=$(date "+%s") 281 echo "Inverting PubMed Indices" 282 cd "$WORKING/Indexed" 283 pm-invert "$WORKING/Inverted" 284 seconds_end=$(date "+%s") 285 seconds=$((seconds_end - seconds_start)) 286 echo "$seconds seconds" 287 INV=$seconds 288fi 289 290if [ "$startat" -lt 5 ] 291then 292 seconds_start=$(date "+%s") 293 echo "Merging Inverted Indices" 294 cd "$WORKING/Inverted" 295 pm-merge "$WORKING/Merged" 296 seconds_end=$(date "+%s") 297 seconds=$((seconds_end - seconds_start)) 298 echo "$seconds seconds" 299 MRG=$seconds 300fi 301 302if [ "$startat" -lt 6 ] 303then 304 seconds_start=$(date "+%s") 305 echo "Producing Postings Files" 306 cd "$WORKING/Merged" 307 pm-promote "$MASTER/Postings" 308 seconds_end=$(date "+%s") 309 seconds=$((seconds_end - seconds_start)) 310 echo "$seconds seconds" 311 PST=$seconds 312fi 313 314echo "DWN $DWN seconds" 315echo "POP $POP seconds" 316echo "REF $REF seconds" 317echo "CLR $CLR seconds" 318echo "COL $COL seconds" 319echo "IDX $IDX seconds" 320echo "INV $INV seconds" 321echo "MRG $MRG seconds" 322echo "PST $PST seconds" 323 324echo "" 325 326phrase-search -path "$MASTER/Postings" -query "mapping of spatio-temporal pollution status AND 2008 [YEAR]" | 327fetch-pubmed -path "$MASTER/Archive" | 328xtract -pattern Author -if Affiliation -contains Medicine \ 329 -pfx "Archive and Index are " -element Initials 330 331echo "" 332 333date 334 335if [ -n "$CONFIG" ] 336then 337 target=bash_profile 338 if ! grep "$target" "$HOME/.bashrc" >/dev/null 2>&1 339 then 340 if [ ! -f $HOME/.$target ] || grep 'bashrc' "$HOME/.$target" >/dev/null 2>&1 341 then 342 target=bashrc 343 fi 344 fi 345 echo "" 346 echo "For convenience, please execute the following to save the archive path to a variable:" 347 echo "" 348 echo " echo \"export EDIRECT_PUBMED_MASTER='${CONFIG}'\" >>" "\$HOME/.$target" 349 echo "" 350fi 351