1 import libsvm.*;
2 import java.io.*;
3 import java.util.*;
4 import java.text.DecimalFormat;
5 
6 class svm_scale
7 {
8 	private String line = null;
9 	private double lower = -1.0;
10 	private double upper = 1.0;
11 	private double y_lower;
12 	private double y_upper;
13 	private boolean y_scaling = false;
14 	private double[] feature_max;
15 	private double[] feature_min;
16 	private double y_max = -Double.MAX_VALUE;
17 	private double y_min = Double.MAX_VALUE;
18 	private int max_index;
19 	private long num_nonzeros = 0;
20 	private long new_num_nonzeros = 0;
21 
exit_with_help()22 	private static void exit_with_help()
23 	{
24 		System.out.print(
25 		 "Usage: svm-scale [options] data_filename\n"
26 		+"options:\n"
27 		+"-l lower : x scaling lower limit (default -1)\n"
28 		+"-u upper : x scaling upper limit (default +1)\n"
29 		+"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
30 		+"-s save_filename : save scaling parameters to save_filename\n"
31 		+"-r restore_filename : restore scaling parameters from restore_filename\n"
32 		);
33 		System.exit(1);
34 	}
35 
rewind(BufferedReader fp, String filename)36 	private BufferedReader rewind(BufferedReader fp, String filename) throws IOException
37 	{
38 		fp.close();
39 		return new BufferedReader(new FileReader(filename));
40 	}
41 
output_target(double value)42 	private void output_target(double value)
43 	{
44 		if(y_scaling)
45 		{
46 			if(value == y_min)
47 				value = y_lower;
48 			else if(value == y_max)
49 				value = y_upper;
50 			else
51 				value = y_lower + (y_upper-y_lower) *
52 				(value-y_min) / (y_max-y_min);
53 		}
54 
55 		System.out.print(value + " ");
56 	}
57 
output(int index, double value)58 	private void output(int index, double value)
59 	{
60 		/* skip single-valued attribute */
61 		if(feature_max[index] == feature_min[index])
62 			return;
63 
64 		if(value == feature_min[index])
65 			value = lower;
66 		else if(value == feature_max[index])
67 			value = upper;
68 		else
69 			value = lower + (upper-lower) *
70 				(value-feature_min[index])/
71 				(feature_max[index]-feature_min[index]);
72 
73 		if(value != 0)
74 		{
75 			System.out.print(index + ":" + value + " ");
76 			new_num_nonzeros++;
77 		}
78 	}
79 
readline(BufferedReader fp)80 	private String readline(BufferedReader fp) throws IOException
81 	{
82 		line = fp.readLine();
83 		return line;
84 	}
85 
run(String []argv)86 	private void run(String []argv) throws IOException
87 	{
88 		int i,index;
89 		BufferedReader fp = null, fp_restore = null;
90 		String save_filename = null;
91 		String restore_filename = null;
92 		String data_filename = null;
93 
94 
95 		for(i=0;i<argv.length;i++)
96 		{
97 			if (argv[i].charAt(0) != '-')	break;
98 			++i;
99 			switch(argv[i-1].charAt(1))
100 			{
101 				case 'l': lower = Double.parseDouble(argv[i]);	break;
102 				case 'u': upper = Double.parseDouble(argv[i]);	break;
103 				case 'y':
104 					  y_lower = Double.parseDouble(argv[i]);
105 					  ++i;
106 					  y_upper = Double.parseDouble(argv[i]);
107 					  y_scaling = true;
108 					  break;
109 				case 's': save_filename = argv[i];	break;
110 				case 'r': restore_filename = argv[i];	break;
111 				default:
112 					  System.err.println("unknown option");
113 					  exit_with_help();
114 			}
115 		}
116 
117 		if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
118 		{
119 			System.err.println("inconsistent lower/upper specification");
120 			System.exit(1);
121 		}
122 		if(restore_filename != null && save_filename != null)
123 		{
124 			System.err.println("cannot use -r and -s simultaneously");
125 			System.exit(1);
126 		}
127 
128 		if(argv.length != i+1)
129 			exit_with_help();
130 
131 		data_filename = argv[i];
132 		try {
133 			fp = new BufferedReader(new FileReader(data_filename));
134 		} catch (Exception e) {
135 			System.err.println("can't open file " + data_filename);
136 			System.exit(1);
137 		}
138 
139 		/* assumption: min index of attributes is 1 */
140 		/* pass 1: find out max index of attributes */
141 		max_index = 0;
142 
143 		if(restore_filename != null)
144 		{
145 			int idx, c;
146 
147 			try {
148 				fp_restore = new BufferedReader(new FileReader(restore_filename));
149 			}
150 			catch (Exception e) {
151 				System.err.println("can't open file " + restore_filename);
152 				System.exit(1);
153 			}
154 			if((c = fp_restore.read()) == 'y')
155 			{
156 				fp_restore.readLine();
157 				fp_restore.readLine();
158 				fp_restore.readLine();
159 			}
160 			fp_restore.readLine();
161 			fp_restore.readLine();
162 
163 			String restore_line = null;
164 			while((restore_line = fp_restore.readLine())!=null)
165 			{
166 				StringTokenizer st2 = new StringTokenizer(restore_line);
167 				idx = Integer.parseInt(st2.nextToken());
168 				max_index = Math.max(max_index, idx);
169 			}
170 			fp_restore = rewind(fp_restore, restore_filename);
171 		}
172 
173 		while (readline(fp) != null)
174 		{
175 			StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
176 			st.nextToken();
177 			while(st.hasMoreTokens())
178 			{
179 				index = Integer.parseInt(st.nextToken());
180 				max_index = Math.max(max_index, index);
181 				st.nextToken();
182 				num_nonzeros++;
183 			}
184 		}
185 
186 		try {
187 			feature_max = new double[(max_index+1)];
188 			feature_min = new double[(max_index+1)];
189 		} catch(OutOfMemoryError e) {
190 			System.err.println("can't allocate enough memory");
191 			System.exit(1);
192 		}
193 
194 		for(i=0;i<=max_index;i++)
195 		{
196 			feature_max[i] = -Double.MAX_VALUE;
197 			feature_min[i] = Double.MAX_VALUE;
198 		}
199 
200 		fp = rewind(fp, data_filename);
201 
202 		/* pass 2: find out min/max value */
203 		while(readline(fp) != null)
204 		{
205 			int next_index = 1;
206 			double target;
207 			double value;
208 
209 			StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
210 			target = Double.parseDouble(st.nextToken());
211 			y_max = Math.max(y_max, target);
212 			y_min = Math.min(y_min, target);
213 
214 			while (st.hasMoreTokens())
215 			{
216 				index = Integer.parseInt(st.nextToken());
217 				value = Double.parseDouble(st.nextToken());
218 
219 				for (i = next_index; i<index; i++)
220 				{
221 					feature_max[i] = Math.max(feature_max[i], 0);
222 					feature_min[i] = Math.min(feature_min[i], 0);
223 				}
224 
225 				feature_max[index] = Math.max(feature_max[index], value);
226 				feature_min[index] = Math.min(feature_min[index], value);
227 				next_index = index + 1;
228 			}
229 
230 			for(i=next_index;i<=max_index;i++)
231 			{
232 				feature_max[i] = Math.max(feature_max[i], 0);
233 				feature_min[i] = Math.min(feature_min[i], 0);
234 			}
235 		}
236 
237 		fp = rewind(fp, data_filename);
238 
239 		/* pass 2.5: save/restore feature_min/feature_max */
240 		if(restore_filename != null)
241 		{
242 			// fp_restore rewinded in finding max_index
243 			int idx, c;
244 			double fmin, fmax;
245 
246 			fp_restore.mark(2);				// for reset
247 			if((c = fp_restore.read()) == 'y')
248 			{
249 				fp_restore.readLine();		// pass the '\n' after 'y'
250 				StringTokenizer st = new StringTokenizer(fp_restore.readLine());
251 				y_lower = Double.parseDouble(st.nextToken());
252 				y_upper = Double.parseDouble(st.nextToken());
253 				st = new StringTokenizer(fp_restore.readLine());
254 				y_min = Double.parseDouble(st.nextToken());
255 				y_max = Double.parseDouble(st.nextToken());
256 				y_scaling = true;
257 			}
258 			else
259 				fp_restore.reset();
260 
261 			if(fp_restore.read() == 'x') {
262 				fp_restore.readLine();		// pass the '\n' after 'x'
263 				StringTokenizer st = new StringTokenizer(fp_restore.readLine());
264 				lower = Double.parseDouble(st.nextToken());
265 				upper = Double.parseDouble(st.nextToken());
266 				String restore_line = null;
267 				while((restore_line = fp_restore.readLine())!=null)
268 				{
269 					StringTokenizer st2 = new StringTokenizer(restore_line);
270 					idx = Integer.parseInt(st2.nextToken());
271 					fmin = Double.parseDouble(st2.nextToken());
272 					fmax = Double.parseDouble(st2.nextToken());
273 					if (idx <= max_index)
274 					{
275 						feature_min[idx] = fmin;
276 						feature_max[idx] = fmax;
277 					}
278 				}
279 			}
280 			fp_restore.close();
281 		}
282 
283 		if(save_filename != null)
284 		{
285 			Formatter formatter = new Formatter(new StringBuilder());
286 			BufferedWriter fp_save = null;
287 
288 			try {
289 				fp_save = new BufferedWriter(new FileWriter(save_filename));
290 			} catch(IOException e) {
291 				System.err.println("can't open file " + save_filename);
292 				System.exit(1);
293 			}
294 
295 			if(y_scaling)
296 			{
297 				formatter.format("y\n");
298 				formatter.format("%.16g %.16g\n", y_lower, y_upper);
299 				formatter.format("%.16g %.16g\n", y_min, y_max);
300 			}
301 			formatter.format("x\n");
302 			formatter.format("%.16g %.16g\n", lower, upper);
303 			for(i=1;i<=max_index;i++)
304 			{
305 				if(feature_min[i] != feature_max[i])
306 					formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
307 			}
308 			fp_save.write(formatter.toString());
309 			fp_save.close();
310 		}
311 
312 		/* pass 3: scale */
313 		while(readline(fp) != null)
314 		{
315 			int next_index = 1;
316 			double target;
317 			double value;
318 
319 			StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
320 			target = Double.parseDouble(st.nextToken());
321 			output_target(target);
322 			while(st.hasMoreElements())
323 			{
324 				index = Integer.parseInt(st.nextToken());
325 				value = Double.parseDouble(st.nextToken());
326 				for (i = next_index; i<index; i++)
327 					output(i, 0);
328 				output(index, value);
329 				next_index = index + 1;
330 			}
331 
332 			for(i=next_index;i<= max_index;i++)
333 				output(i, 0);
334 			System.out.print("\n");
335 		}
336 		if (new_num_nonzeros > num_nonzeros)
337 			System.err.print(
338 			 "WARNING: original #nonzeros " + num_nonzeros+"\n"
339 			+"         new      #nonzeros " + new_num_nonzeros+"\n"
340 			+"Use -l 0 if many original feature values are zeros\n");
341 
342 		fp.close();
343 	}
344 
main(String argv[])345 	public static void main(String argv[]) throws IOException
346 	{
347 		svm_scale s = new svm_scale();
348 		s.run(argv);
349 	}
350 }
351