1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD
4 license as described in the file LICENSE.
5 */
6 #include "example.h"
7
order_features(const void * first,const void * second)8 int order_features(const void* first, const void* second)
9 { return ((feature*)first)->weight_index - ((feature*)second)->weight_index;}
10
order_audit_features(const void * first,const void * second)11 int order_audit_features(const void* first, const void* second)
12 {
13 return (int)(((audit_data*)first)->weight_index) - (int)(((audit_data*)second)->weight_index);
14 }
15
unique_features(v_array<feature> & features,int max=-1)16 void unique_features(v_array<feature>& features, int max=-1)
17 {
18 if (features.empty())
19 return;
20 feature* last = features.begin;
21 if (max < 0)
22 {
23 for (feature* current = features.begin+1; current != features.end; current++)
24 if (current->weight_index != last->weight_index)
25 *(++last) = *current;
26 }
27 else
28 for (feature* current = features.begin+1; current != features.end && last+1 < features.begin+max; current++)
29 if (current->weight_index != last->weight_index)
30 *(++last) = *current;
31
32 features.end = ++last;
33 }
34
unique_audit_features(v_array<audit_data> & features,int max=-1)35 void unique_audit_features(v_array<audit_data>& features, int max = -1)
36 {
37 if (features.empty())
38 return;
39 audit_data* last = features.begin;
40 if (max < 0)
41 {
42 for (audit_data* current = features.begin+1;
43 current != features.end; current++)
44 if (current->weight_index != last->weight_index)
45 *(++last) = *current;
46 }
47 else
48 for (audit_data* current = features.begin+1;
49 current != features.end && last+1 < features.begin+max; current++)
50 if (current->weight_index != last->weight_index)
51 *(++last) = *current;
52
53 features.end = ++last;
54 }
55
unique_sort_features(bool audit,uint32_t parse_mask,example * ae)56 void unique_sort_features(bool audit, uint32_t parse_mask, example* ae)
57 {
58 for (unsigned char* b = ae->indices.begin; b != ae->indices.end; b++)
59 {
60 v_array<feature> features = ae->atomics[*b];
61
62 for (size_t i = 0; i < features.size(); i++)
63 features[i].weight_index &= parse_mask;
64 qsort(features.begin, features.size(), sizeof(feature),
65 order_features);
66 unique_features(ae->atomics[*b]);
67
68 if (audit)
69 {
70 v_array<audit_data> afeatures = ae->audit_features[*b];
71
72 for (size_t i = 0; i < ae->atomics[*b].size(); i++)
73 afeatures[i].weight_index &= parse_mask;
74
75 qsort(afeatures.begin, afeatures.size(), sizeof(audit_data),
76 order_audit_features);
77 unique_audit_features(afeatures);
78 }
79 }
80 ae->sorted=true;
81 }
82