1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved.  Released under a BSD
4 license as described in the file LICENSE.
5  */
6 #include "example.h"
7 
order_features(const void * first,const void * second)8 int order_features(const void* first, const void* second)
9 { return ((feature*)first)->weight_index - ((feature*)second)->weight_index;}
10 
order_audit_features(const void * first,const void * second)11 int order_audit_features(const void* first, const void* second)
12 {
13   return (int)(((audit_data*)first)->weight_index) - (int)(((audit_data*)second)->weight_index);
14 }
15 
unique_features(v_array<feature> & features,int max=-1)16 void unique_features(v_array<feature>& features, int max=-1)
17 {
18   if (features.empty())
19     return;
20   feature* last = features.begin;
21   if (max < 0)
22     {
23       for (feature* current = features.begin+1; current != features.end; current++)
24 	if (current->weight_index != last->weight_index)
25 	  *(++last) = *current;
26     }
27   else
28     for (feature* current = features.begin+1; current != features.end && last+1 < features.begin+max; current++)
29       if (current->weight_index != last->weight_index)
30 	*(++last) = *current;
31 
32   features.end = ++last;
33 }
34 
unique_audit_features(v_array<audit_data> & features,int max=-1)35 void unique_audit_features(v_array<audit_data>& features, int max = -1)
36 {
37   if (features.empty())
38     return;
39   audit_data* last = features.begin;
40   if (max < 0)
41     {
42       for (audit_data* current = features.begin+1;
43 	   current != features.end; current++)
44 	if (current->weight_index != last->weight_index)
45 	  *(++last) = *current;
46     }
47   else
48     for (audit_data* current = features.begin+1;
49 	 current != features.end && last+1 < features.begin+max; current++)
50       if (current->weight_index != last->weight_index)
51 	*(++last) = *current;
52 
53   features.end = ++last;
54 }
55 
unique_sort_features(bool audit,uint32_t parse_mask,example * ae)56 void unique_sort_features(bool audit, uint32_t parse_mask, example* ae)
57 {
58   for (unsigned char* b = ae->indices.begin; b != ae->indices.end; b++)
59     {
60       v_array<feature> features = ae->atomics[*b];
61 
62       for (size_t i = 0; i < features.size(); i++)
63 	features[i].weight_index &= parse_mask;
64       qsort(features.begin, features.size(), sizeof(feature),
65 	    order_features);
66       unique_features(ae->atomics[*b]);
67 
68       if (audit)
69 	{
70 	  v_array<audit_data> afeatures = ae->audit_features[*b];
71 
72 	  for (size_t i = 0; i < ae->atomics[*b].size(); i++)
73 	    afeatures[i].weight_index &= parse_mask;
74 
75 	  qsort(afeatures.begin, afeatures.size(), sizeof(audit_data),
76 		order_audit_features);
77 	  unique_audit_features(afeatures);
78 	}
79     }
80   ae->sorted=true;
81 }
82