1#!/bin/sh -e
2# Clustering workflow script
3fail() {
4    echo "Error: $1"
5    exit 1
6}
7
8notExists() {
9	[ ! -f "$1" ]
10}
11
12# check number of input variables
13[ "$#" -ne 3 ] && echo "Please provide <sequenceDB> <outDB> <tmp>" && exit 1;
14# check if files exist
15[ ! -f "$1.dbtype" ] && echo "$1.dbtype not found!" && exit 1;
16[   -f "$2.dbtype" ] && echo "$2.dbtype exists already!" && exit 1;
17[ ! -d "$3" ] && echo "tmp directory $3 not found!" && mkdir -p "$3";
18
19INPUT="$1"
20TMP_PATH="$3"
21
22if notExists "${TMP_PATH}/aln_redundancy.dbtype"; then
23    # shellcheck disable=SC2086
24    "$MMSEQS" clusthash "$INPUT" "${TMP_PATH}/aln_redundancy" ${DETECTREDUNDANCY_PAR} \
25        || fail "Fast filter step $STEP died"
26fi
27
28if notExists "${TMP_PATH}/clu_redundancy.dbtype"; then
29    # shellcheck disable=SC2086
30    "$MMSEQS" clust "$INPUT" "${TMP_PATH}/aln_redundancy" "${TMP_PATH}/clu_redundancy" ${CLUSTER_PAR} \
31        || fail "Fast Cluster filter step $STEP died"
32fi
33
34if notExists "${TMP_PATH}/input_step_redundancy.dbtype"; then
35    # shellcheck disable=SC2086
36    "$MMSEQS" createsubdb "${TMP_PATH}/clu_redundancy" "$INPUT" "${TMP_PATH}/input_step_redundancy" ${VERBOSITY} --subdb-mode 1 \
37        || fail "MMseqs order step $STEP died"
38fi
39
40ORIGINAL="$INPUT"
41INPUT="${TMP_PATH}/input_step_redundancy"
42# call prefilter module
43if notExists "${TMP_PATH}/pref.dbtype"; then
44    # shellcheck disable=SC2086
45    $RUNNER "$MMSEQS" prefilter "$INPUT" "$INPUT" "${TMP_PATH}/pref" $PREFILTER_PAR \
46        || fail "Prefilter died"
47fi
48
49# call alignment module
50if notExists "${TMP_PATH}/aln.dbtype"; then
51    # shellcheck disable=SC2086
52    $RUNNER "$MMSEQS" "${ALIGN_MODULE}" "$INPUT" "$INPUT" "${TMP_PATH}/pref" "${TMP_PATH}/aln" $ALIGNMENT_PAR \
53        || fail "Alignment died"
54fi
55
56# call cluster module
57if notExists "${TMP_PATH}/clu_step0.dbtype"; then
58    # shellcheck disable=SC2086
59    "$MMSEQS" clust "$INPUT" "${TMP_PATH}/aln" "${TMP_PATH}/clu_step0" $CLUSTER_PAR \
60        || fail "Clustering died"
61fi
62
63# merge clu_redundancy and clu
64# shellcheck disable=SC2086
65"$MMSEQS" mergeclusters "$ORIGINAL" "$2" "${TMP_PATH}/clu_redundancy" "${TMP_PATH}/clu_step0" $MERGECLU_PAR \
66        || fail "Merging of clusters has died"
67
68if [ -n "$REMOVE_TMP" ]; then
69    # shellcheck disable=SC2086
70    "$MMSEQS" rmdb "${TMP_PATH}/pref" ${VERBOSITY}
71    # shellcheck disable=SC2086
72    "$MMSEQS" rmdb "${TMP_PATH}/aln" ${VERBOSITY}
73    # shellcheck disable=SC2086
74    "$MMSEQS" rmdb "${TMP_PATH}/clu_step0" ${VERBOSITY}
75    # shellcheck disable=SC2086
76    "$MMSEQS" rmdb "${TMP_PATH}/clu_redundancy" ${VERBOSITY}
77    # shellcheck disable=SC2086
78    "$MMSEQS" rmdb "${TMP_PATH}/aln_redundancy" ${VERBOSITY}
79    # shellcheck disable=SC2086
80    "$MMSEQS" rmdb "${TMP_PATH}/input_step_redundancy" ${VERBOSITY}
81    rm -f "${TMP_PATH}/order_redundancy"
82    rm -f "${TMP_PATH}/clustering.sh"
83fi
84