1 /*
2 Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
3 
4     This file is part of Velvet.
5 
6     Velvet is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     Velvet is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with Velvet; if not, write to the Free Software
18     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 
20 */
21 #ifndef _GRAPHSTATS_H_
22 #define _GRAPHSTATS_H_
23 
24 // This header file and the corresponding code file contain a load of
25 // miscellaneous functions, many of which coded quickly and used only once
26 // for reasons long forgotten since.
27 // Sorry for the mess ;-)
28 
29 ///////////////////////////////////////////////////////////////////
30 // Useful functions
31 ///////////////////////////////////////////////////////////////////
32 
33 // Original
34 double getNodeDensity(Node * node);
35 int * makeDummySubgraphMask(Graph * graph);
36 int estimated_cov_multi(Graph * graph, int * subgraphMask, double expCovMulti[100]);
37 void resolveRepeatOfAllSubgraphs(Graph * graph, ReadSet * reads, double expCovMulti[100],
38 				 boolean * dubious, boolean force_jumps, int pebbleRounds,
39 				 double rateChimericSubgraph, boolean discardChimericSubgraph,
40 				 double repeatNodeCovSD);
41 void resetUniqueness(Graph * graph);
42 // Original
43 
44 void displayGraphStatistics(Graph * graph);
45 
46 void displayGeneralStatistics(Graph * graph, char *filename, ReadSet * reads);
47 
48 void exportLongNodeSequences(char *filename, Graph * graph,
49 			     Coordinate minLength);
50 
51 void exportMediumNodeSequences(char *filename, Graph * graph,
52 			       Coordinate minLength);
53 
54 IDnum readStarts(Node * node);
55 
56 Coordinate readCoverage(Node * node);
57 
58 IDnum strainMarkerCount(Node * node, IDnum firstStrain);
59 
60 IDnum nodeMultiplicity(Node * node);
61 
62 Coordinate n50(Graph * graph);
63 
64 double estimated_cov(Graph * graph, char * directory);
65 
66 Coordinate maxLength(Graph * graph);
67 
68 boolean *removeLowCoverageNodesAndDenounceDubiousReads(Graph * graph,
69 						       double minCov);
70 
71 void exportAMOSContigs(char *filename, Graph * graph,
72 		       Coordinate cutoff_length, ReadSet * reads);
73 
74 IDnum usedReads(Graph * graph, Coordinate minContigLength);
75 
76 Coordinate totalAssemblyLength(Graph * graph);
77 
78 void logFinalStats(Graph * graph, Coordinate minContigKmerLength, char *directory);
79 
80 void exportUnusedReads(Graph* graph, ReadSet * reads, Coordinate minContigKmerLength, char* filename);
81 
82 ///////////////////////////////////////////////////////////////////
83 // Dodgy functions
84 ///////////////////////////////////////////////////////////////////
85 
86 IDnum countSinksAndSources(Graph * graph);
87 
88 IDnum countTangles(Graph * graph);
89 
90 IDnum countRepeats(Graph * graph);
91 
92 IDnum countSNPs(Graph * graph, IDnum firstStrain, int WORDLENGTH);
93 
94 void displayGraphStatisticsSelective(Graph * graph, IDnum first);
95 
96 void grossErrorRemoval(Graph * graph, IDnum firstStrain);
97 
98 Coordinate countCommonLength(Graph * graph, IDnum firstStrain);
99 
100 IDnum countBreakpoints(Graph * graph, IDnum firstStrain);
101 
102 IDnum countStrainOnlyNodes(Graph * graph, IDnum firstStrain);
103 
104 Coordinate countStrainOnlyBp(Graph * graph, IDnum firstStrain);
105 
106 void displayStrainOnlySequences(Graph * graph, IDnum firstStrain,
107 				char *inputFilename, char *filename,
108 				int WORDLENGTH);
109 
110 void displayStrainOnlyDescriptors(Graph * graph, IDnum firstStrain);
111 
112 void chainSawCorrection(Graph * graph, int minMult);
113 
114 void displayBreakpoints(Graph * graph, IDnum firstStrain);
115 
116 void destroyStrainSpecificIslands(Graph * graph, IDnum firstStrain);
117 
118 void spotIrregularReads(Graph * graph, IDnum firstStrain,
119 			char *sequenceFile, char *outputFile);
120 
121 void displayAlignmentToReference(Graph * graph, IDnum seqID,
122 				 IDnum firstStrain,
123 				 TightString ** sequences, int WORDLENGTH,
124 				 char *filename);
125 
126 void removeReferenceMarkers(Graph * graph, IDnum firstStrain);
127 
128 void testForBizarreMarkers(Graph * graph);
129 
130 void surveyPaths(Graph * graph);
131 
132 void destroyMixedReads(Graph * graph, IDnum minCoverage);
133 
134 void destroySinglePoolNodes(Graph * graph);
135 void destroySinglePoolNodesStrict(Graph * graph);
136 void destroyShortTips(Graph * graph);
137 
138 void destroyDisconnectedElements(Graph * graph);
139 void measureTangleSizes(Graph * graph, Coordinate maxLength);
140 
141 void destroyEmptyNodes(Graph * graph);
142 
143 void removeShortReads(Graph * graph);
144 
145 Coordinate totalGraphLength(Graph * graph);
146 
147 void contigStats(Node ** node, IDnum readCount);
148 
149 void exportContigs(Node ** contigs, ReadSet * reads, char *filename,
150 		   int WORDLENGTH, int pairedReadsCount);
151 
152 void removeLowCoverageNodes(Graph * graph, double minCov);
153 void removeHighCoverageNodes(Graph * graph, double maxCov);
154 
155 void removeMissingStrain(Graph * graph, Category cat);
156 
157 boolean isNatural(Graph * graph);
158 
159 void searchForHallidayJunction(Graph * graph);
160 
161 #endif
162