1 #include "CommonHelp.h"
2 
groupby_help(void)3 void groupby_help(void) {
4 
5     cerr << "\nTool:    bedtools groupby " << endl;
6     cerr << "Version: " << VERSION << "\n";
7     cerr << "Summary: Summarizes a dataset column based upon" << endl;
8     cerr << "\t common column groupings. Akin to the SQL \"group by\" command." << endl << endl;
9 
10     cerr << "Usage:\t " << "bedtools groupby" << " -g [group_column(s)] -c [op_column(s)] -o [ops] " << endl;
11     cerr << "\t "     << "cat [FILE] | " << "bedtools groupby" << " -g [group_column(s)] -c [op_column(s)] -o [ops] " << endl << endl;
12 
13     cerr << "Options: " << endl;
14     cerr << "\t-i\t\t"        << "Input file. Assumes \"stdin\" if omitted." << endl << endl;
15 
16     cerr << "\t-g -grp\t\t"      << "Specify the columns (1-based) for the grouping." << endl;
17     cerr                         << "\t\t\tThe columns must be comma separated." << endl;
18     cerr                         << "\t\t\t- Default: 1,2,3" << endl << endl;
19 
20     cerr << "\t-c -opCols\t"     << "Specify the column (1-based) that should be summarized." << endl;
21     cerr                         << "\t\t\t- Required." << endl << endl;
22 
23     cerr << "\t-o -ops\t\t"      << "Specify the operation that should be applied to opCol." << endl;
24     cerr                         << "\t\t\tValid operations:" << endl;
25     cerr                         << "\t\t\t    sum, count, count_distinct, min, max," << endl;
26     cerr                         << "\t\t\t    mean, median, mode, antimode," << endl;
27     cerr                         << "\t\t\t    stdev, sstdev (sample standard dev.)," << endl;
28     cerr                         << "\t\t\t    collapse (i.e., print a comma separated list (duplicates allowed)), " << endl;
29     cerr                         << "\t\t\t    distinct (i.e., print a comma separated list (NO duplicates allowed)), " << endl;
30     cerr                         << "\t\t\t    distinct_sort_num (as distinct, but sorted numerically, ascending), " << endl;
31     cerr                         << "\t\t\t    distinct_sort_num_desc (as distinct, but sorted numerically, descending), " << endl;
32     cerr                         << "\t\t\t    concat   (i.e., merge values into a single, non-delimited string), " << endl;
33     cerr                         << "\t\t\t    freqdesc (i.e., print desc. list of values:freq)" << endl;
34     cerr                         << "\t\t\t    freqasc (i.e., print asc. list of values:freq)" << endl;
35     cerr                         << "\t\t\t    first (i.e., print first value)" << endl;
36     cerr                         << "\t\t\t    last (i.e., print last value)" << endl;
37 
38     cerr                         << "\t\t\t- Default: sum" << endl << endl;
39 
40     cerr						<< "\t\tIf there is only column, but multiple operations, all operations will be" << endl;
41     cerr						<< "\t\tapplied on that column. Likewise, if there is only one operation, but" << endl;
42     cerr						<< "\t\tmultiple columns, that operation will be applied to all columns." << endl;
43     cerr						<< "\t\tOtherwise, the number of columns must match the the number of operations," << endl;
44     cerr						<< "\t\tand will be applied in respective order." << endl;
45     cerr						<< "\t\tE.g., \"-c 5,4,6 -o sum,mean,count\" will give the sum of column 5," << endl;
46     cerr						<< "\t\tthe mean of column 4, and the count of column 6." << endl;
47     cerr						<< "\t\tThe order of output columns will match the ordering given in the command." << endl << endl<<endl;
48 
49     cerr << "\t-full\t\t"   << "Print all columns from input file.  The first line in the group is used." << endl;
50     cerr            << "\t\t\tDefault: print only grouped columns." << endl << endl;
51 
52     cerr << "\t-inheader\t" << "Input file has a header line - the first line will be ignored." << endl << endl ;
53 
54     cerr << "\t-outheader\t"    << "Print header line in the output, detailing the column names. " << endl;
55     cerr            << "\t\t\tIf the input file has headers (-inheader), the output file" << endl;
56     cerr            << "\t\t\twill use the input's column names." << endl;
57     cerr            << "\t\t\tIf the input file has no headers, the output file" << endl;
58     cerr            << "\t\t\twill use \"col_1\", \"col_2\", etc. as the column names." << endl << endl;
59 
60     cerr << "\t-header\t\t" << "same as '-inheader -outheader'" << endl << endl;
61 
62     cerr << "\t-ignorecase\t"   << "Group values regardless of upper/lower case." << endl << endl;
63 
64     cerr << "\t-prec\t"   << "Sets the decimal precision for output (Default: 5)" << endl << endl;
65     cerr << "\t-delim\t"                 << "Specify a custom delimiter for the collapse operations." << endl;
66     cerr                                 << "\t\t- Example: -delim \"|\"" << endl;
67     cerr                                 << "\t\t- Default: \",\"." << endl << endl;
68 
69     cerr << "Examples: " << endl;
70     cerr << "\t$ cat ex1.out" << endl;
71     cerr << "\tchr1 10  20  A   chr1    15  25  B.1 1000    ATAT" << endl;
72     cerr << "\tchr1 10  20  A   chr1    25  35  B.2 10000   CGCG" << endl << endl;
73     cerr << "\t$ groupBy -i ex1.out -g 1,2,3,4 -c 9 -o sum" << endl;
74     cerr << "\tchr1 10  20  A   11000" << endl << endl;
75     cerr << "\t$ groupBy -i ex1.out -grp 1,2,3,4 -opCols 9,9 -ops sum,max" << endl;
76     cerr << "\tchr1 10  20  A   11000   10000" << endl << endl;
77     cerr << "\t$ groupBy -i ex1.out -g 1,2,3,4 -c 8,9 -o collapse,mean" << endl;
78     cerr << "\tchr1 10  20  A   B.1,B.2,    5500" << endl << endl;
79     cerr << "\t$ cat ex1.out | groupBy -g 1,2,3,4 -c 8,9 -o collapse,mean" << endl;
80     cerr << "\tchr1 10  20  A   B.1,B.2,    5500" << endl << endl;
81     cerr << "\t$ cat ex1.out | groupBy -g 1,2,3,4 -c 10 -o concat" << endl;
82     cerr << "\tchr1 10  20  A   ATATCGCG" << endl << endl;
83 
84     cerr << "Notes: " << endl;
85     cerr << "\t(1)  The input file/stream should be sorted/grouped by the -grp. columns" << endl;
86     cerr << "\t(2)  If -i is unspecified, input is assumed to come from stdin." << endl << endl;
87 
88 
89     // end the program here
90     exit(1);
91 
92 }
93