1 /* 2 Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk) 3 4 This file is part of Velvet. 5 6 Velvet is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 Velvet is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with Velvet; if not, write to the Free Software 18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 20 */ 21 #ifndef _GRAPHSTATS_H_ 22 #define _GRAPHSTATS_H_ 23 24 // This header file and the corresponding code file contain a load of 25 // miscellaneous functions, many of which coded quickly and used only once 26 // for reasons long forgotten since. 27 // Sorry for the mess ;-) 28 29 /////////////////////////////////////////////////////////////////// 30 // Useful functions 31 /////////////////////////////////////////////////////////////////// 32 33 // Original 34 double getNodeDensity(Node * node); 35 int * makeDummySubgraphMask(Graph * graph); 36 int estimated_cov_multi(Graph * graph, int * subgraphMask, double expCovMulti[100]); 37 void resolveRepeatOfAllSubgraphs(Graph * graph, ReadSet * reads, double expCovMulti[100], 38 boolean * dubious, boolean force_jumps, int pebbleRounds, 39 double rateChimericSubgraph, boolean discardChimericSubgraph, 40 double repeatNodeCovSD); 41 void resetUniqueness(Graph * graph); 42 // Original 43 44 void displayGraphStatistics(Graph * graph); 45 46 void displayGeneralStatistics(Graph * graph, char *filename, ReadSet * reads); 47 48 void exportLongNodeSequences(char *filename, Graph * graph, 49 Coordinate minLength); 50 51 void exportMediumNodeSequences(char *filename, Graph * graph, 52 Coordinate minLength); 53 54 IDnum readStarts(Node * node); 55 56 Coordinate readCoverage(Node * node); 57 58 IDnum strainMarkerCount(Node * node, IDnum firstStrain); 59 60 IDnum nodeMultiplicity(Node * node); 61 62 Coordinate n50(Graph * graph); 63 64 double estimated_cov(Graph * graph, char * directory); 65 66 Coordinate maxLength(Graph * graph); 67 68 boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph, 69 double minCov); 70 71 void exportAMOSContigs(char *filename, Graph * graph, 72 Coordinate cutoff_length, ReadSet * reads); 73 74 IDnum usedReads(Graph * graph, Coordinate minContigLength); 75 76 Coordinate totalAssemblyLength(Graph * graph); 77 78 void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *directory); 79 80 void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLength, char* filename); 81 82 /////////////////////////////////////////////////////////////////// 83 // Dodgy functions 84 /////////////////////////////////////////////////////////////////// 85 86 IDnum countSinksAndSources(Graph * graph); 87 88 IDnum countTangles(Graph * graph); 89 90 IDnum countRepeats(Graph * graph); 91 92 IDnum countSNPs(Graph * graph, IDnum firstStrain, int WORDLENGTH); 93 94 void displayGraphStatisticsSelective(Graph * graph, IDnum first); 95 96 void grossErrorRemoval(Graph * graph, IDnum firstStrain); 97 98 Coordinate countCommonLength(Graph * graph, IDnum firstStrain); 99 100 IDnum countBreakpoints(Graph * graph, IDnum firstStrain); 101 102 IDnum countStrainOnlyNodes(Graph * graph, IDnum firstStrain); 103 104 Coordinate countStrainOnlyBp(Graph * graph, IDnum firstStrain); 105 106 void displayStrainOnlySequences(Graph * graph, IDnum firstStrain, 107 char *inputFilename, char *filename, 108 int WORDLENGTH); 109 110 void displayStrainOnlyDescriptors(Graph * graph, IDnum firstStrain); 111 112 void chainSawCorrection(Graph * graph, int minMult); 113 114 void displayBreakpoints(Graph * graph, IDnum firstStrain); 115 116 void destroyStrainSpecificIslands(Graph * graph, IDnum firstStrain); 117 118 void spotIrregularReads(Graph * graph, IDnum firstStrain, 119 char *sequenceFile, char *outputFile); 120 121 void displayAlignmentToReference(Graph * graph, IDnum seqID, 122 IDnum firstStrain, 123 TightString ** sequences, int WORDLENGTH, 124 char *filename); 125 126 void removeReferenceMarkers(Graph * graph, IDnum firstStrain); 127 128 void testForBizarreMarkers(Graph * graph); 129 130 void surveyPaths(Graph * graph); 131 132 void destroyMixedReads(Graph * graph, IDnum minCoverage); 133 134 void destroySinglePoolNodes(Graph * graph); 135 void destroySinglePoolNodesStrict(Graph * graph); 136 void destroyShortTips(Graph * graph); 137 138 void destroyDisconnectedElements(Graph * graph); 139 void measureTangleSizes(Graph * graph, Coordinate maxLength); 140 141 void destroyEmptyNodes(Graph * graph); 142 143 void removeShortReads(Graph * graph); 144 145 Coordinate totalGraphLength(Graph * graph); 146 147 void contigStats(Node ** node, IDnum readCount); 148 149 void exportContigs(Node ** contigs, ReadSet * reads, char *filename, 150 int WORDLENGTH, int pairedReadsCount); 151 152 void removeLowCoverageNodes(Graph * graph, double minCov); 153 void removeHighCoverageNodes(Graph * graph, double maxCov); 154 155 void removeMissingStrain(Graph * graph, Category cat); 156 157 boolean isNatural(Graph * graph); 158 159 void searchForHallidayJunction(Graph * graph); 160 161 #endif 162