1function Problem = ssread (directory, tmp)
2%SSREAD read a Problem in Matrix Market or Rutherford/Boeing format
3% containing a set of files created by sswrite, in either Matrix Market or
4% Rutherford/Boeing format. See sswrite for a description of the Problem struct.
5%
6% Usage: Problem = ssread (directory)
7%
8% Example:
9%
10%   load west0479
11%   clear Problem
12%   Problem.name = 'HB/west0479' ;
13%   Problem.title = '8 STAGE COLUMN SECTION, ALL SECTIONS RIGOROUS (CHEM.ENG.)';
14%   Problem.A = west0479 ;
15%   Problem.id = 267 ;          % the id number of west0479 in the collection
16%   Problem.date = '1983' ;
17%   Problem.author = 'A. Westerberg' ;
18%   Problem.ed = 'I. Duff, R. Grimes, J. Lewis'
19%   Problem.kind = 'chemical process simulation problem' ;
20%   sswrite (Problem, 'RB/', '') ;
21%   Prob3 = ssread ('RB/HB/west0479')
22%   isequal (Problem, Prob3)
23%
24% This part of the example requires CHOLMOD, for the mread function:
25%
26%   sswrite (Problem, 'MM/') ;
27%   Prob2 = ssread ('MM/HB/west0479')
28%   isequal (Problem, Prob2)
29%
30% You can also compare this Problem with the version in the SuiteSparse Matrix
31% Collection, with ssget(267) or ssget('HB/west0479').  Note that this includes
32% the 22 explicit zero entries present in the west0479 Harwell/Boeing matrix,
33% but not included in the MATLAB west0479.mat demo matrix.  Those entries are
34% present in the SuiteSparse Matrix Collection.  This example assumes your current
35% directory is the RBio directory, containing the west0479 problem in the
36% RBio/Test directory:
37%
38%   Prob5 = ssget ('HB/west0479')
39%   Prob6 = ssread ('Test/west0479')
40%   isequal (Prob5, Prob6)
41%
42% The directory can be a compressed tar file of the form "name.tar.gz", in
43% which case the tarfile is uncompressed into a temporary directory, and
44% the temporary directory is deleted when done.  The '.tar.gz' should not be
45% part of the directory argument.  In this case, a 2nd input argument can be
46% provided:  Problem = ssread (directory, tmp).  The problem is extracted into
47% the tmp directory.  If tmp is not present, the output of the tempdir function
48% is used instead.
49%
50% Note that ssget is much faster than ssread.  ssread is useful if you are
51% short on disk space, and want to have just one copy of the collection that
52% can be read by MATLAB (via ssread) and a non-MATLAB program (the MM or RB
53% versions of the collection).
54%
55% See also sswrite, mread, mwrite, RBread, RBread, ssget, untar, tempdir.
56
57% Optionally uses the CHOLMOD mread mexFunction, for reading Problems in
58% Matrix Market format.
59
60% Copyright 2006-2007, Timothy A. Davis, http://www.suitesparse.com
61
62%-------------------------------------------------------------------------------
63% determine the Problem name from the directory name
64%-------------------------------------------------------------------------------
65
66directory = regexprep (directory, '[\/\\]', '/') ;
67t = find (directory == '/') ;
68if (isempty (t))
69    name = directory ;
70else
71    name = directory (t(end)+1:end) ;
72end
73
74%-------------------------------------------------------------------------------
75% open the directory, or untar the tar.gz file
76%-------------------------------------------------------------------------------
77
78d = dir (directory) ;
79is_tar = 0 ;
80
81if (isempty (d))
82    % look for a .tar.gz file
83    if (nargin < 2)
84	tmpdir = [tempname '_ssread_' name] ;
85    else
86	tmpdir = [tmp '/' name] ;
87    end
88    try
89	% try untaring the problem
90	untar ([directory '.tar.gz'], tmpdir) ;
91    catch
92	% untar failed, make sure tmpdir is deleted
93	try
94	    rmdir (tmpdir, 's') ;
95	catch
96	end
97	error (['unable to read problem: ' directory]) ;
98    end
99    directory = [tmpdir '/' name] ;
100    d = dir (directory) ;
101    is_tar = 1 ;
102end
103
104%-------------------------------------------------------------------------------
105% read the problem
106%-------------------------------------------------------------------------------
107
108try
109
110    %---------------------------------------------------------------------------
111    % get name, title, id, kind, date, author, editor, notes from master file
112    %---------------------------------------------------------------------------
113
114    masterfile = [directory '/' name] ;
115    [Problem notes RB] = get_header (masterfile) ;
116
117    %---------------------------------------------------------------------------
118    % get the A and Zero matrices from the master file and add to the Problem
119    %---------------------------------------------------------------------------
120
121    if (RB)
122	% read in the primary Rutherford/Boeing file
123	[Problem.A Zeros] = RBread ([masterfile '.rb']) ;
124    else
125	% read in the primary Matrix Market file.  Get patterns as binary.
126	[Problem.A Zeros] = mread ([masterfile '.mtx'], 1) ;
127    end
128    if (nnz (Zeros) > 0)
129	Problem.Zeros = Zeros ;
130    end
131
132    % add the notes after A and Zeros
133    if (~isempty (notes))
134	Problem.notes = notes ;
135    end
136
137    namelen = length (name) ;
138
139    %---------------------------------------------------------------------------
140    % read b, x, aux (incl. any aux.cell sequences), stored as separate files
141    %---------------------------------------------------------------------------
142
143    for k = 1:length(d)
144
145	% get the next filename in the directory
146	file = d(k).name ;
147	fullfilename = [directory '/' file] ;
148
149	if (length (file) < length (name) + 1)
150
151	    % unrecognized file; skip it
152	    continue
153
154	elseif (strcmp (file, [name '.mtx']))
155
156	    % skip the master file; already read in
157	    continue
158
159	elseif (strcmp (file, [name '_b.mtx']))
160
161	    % read in b as a Matrix Market file
162	    Problem.b = mtx_read (fullfilename, RB) ;
163
164	elseif (strcmp (file, [name '_x.mtx']))
165
166	    % read in x as a Matrix Market file
167	    Problem.x = mtx_read (fullfilename, RB) ;
168
169	elseif (strcmp (file, [name '_b.rb']))
170
171	    % read in b as a Rutherford/Boeing file
172	    Problem.b = RBread (fullfilename) ;
173
174	elseif (strcmp (file, [name '_x.rb']))
175
176	    % read in x as a Rutherford/Boeing file
177	    Problem.x = RBread (fullfilename) ;
178
179	elseif (strcmp (file (1:length(name)+1), [name '_']))
180
181	    % read in an aux component, in the form name_whatever.mtx
182	    thedot = find (file == '.', 1, 'last') ;
183	    ext = file (thedot:end) ;
184
185	    if (strcmp (ext, '.txt'))
186
187                % get a txt file as either a char array or cell array of strings
188                C = sstextread (fullfilename, Problem.id > 2776) ;
189
190	    elseif (strcmp (ext, '.mtx'))
191
192		% read a full or sparse auxiliary matrix in the Matrix Market
193		% form, or a full auxiliary matrix in the Rutherford/Boeing form.
194		C = mtx_read (fullfilename, RB) ;
195
196	    elseif (strcmp (ext, '.rb'))
197
198		% read in a sparse matrix, for a Rutherford/Boeing collection
199		C = RBread (fullfilename) ;
200
201	    else
202
203		% this file is not recognized - skip it.
204		C = [ ] ;
205
206	    end
207
208	    % determine the name of the component and place it in the Problem
209	    if (~isempty (C))
210		% Determine if this is part of an aux.whatever cell sequence.
211		% These filenames have the form name_whatever_#.mtx, where name
212		% is the name of the Problem, and # is a number (1 or more
213		% digts) greater than zero.  If # = i, this becomes the
214		% aux.whatever{i} matrix.
215		suffix = file (namelen+2:thedot-1) ;
216		t = find (suffix == '_', 1, 'last') ;
217		what = suffix (1:t-1) ;
218		i = str2num (suffix (t+1:end)) ;			    %#ok
219		if (~isempty (i) && i > 0 && ~isempty (what))
220		    % this is part of aux.whatever{i} cell array
221		    Problem.aux.(what) {i,1} = C ;
222		elseif (~isempty (suffix))
223		    % this is not a cell, simply an aux.whatever matrix
224		    Problem.aux.(suffix) = C ;
225		end
226	    end
227	end
228    end
229
230    %---------------------------------------------------------------------------
231    % delete the uncompressed version of the tar file
232    %---------------------------------------------------------------------------
233
234    if (is_tar)
235	rmdir (tmpdir, 's') ;
236    end
237
238catch
239
240    %---------------------------------------------------------------------------
241    % catch the error, delete the temp directory, and rethrow the error
242    %---------------------------------------------------------------------------
243
244    try
245	if (is_tar)
246	    rmdir (tmpdir, 's') ;
247	end
248    catch
249    end
250    rethrow (lasterror) ;
251
252end
253
254
255%-------------------------------------------------------------------------------
256% get_header: get the header of the master file (Group/name/name.txt or .mtx)
257%-------------------------------------------------------------------------------
258
259function [Problem, notes, RB] = get_header (masterfile)
260% Get the name, title, id, kind, date, author, editor and notes from the master
261% file.  The name, title, and id are required.  They appear as structured
262% comments in the Matrix Market file (masterfile.mtx) or in the text file for
263% a problem in Rutherford/Boeing format (masterfile.txt).  RB is returned as
264% 1 if the problem is in Rutherford/Boeing format, 0 otherwise.
265
266% first assume it's in Matrix Market format
267f = fopen ([masterfile '.mtx'], 'r') ;
268if (f < 0)
269    % oops, that failed.  This must be a problem in Rutherford/Boeing format
270    RB = 1 ;
271    f = fopen ([masterfile '.txt'], 'r') ;
272    if (f < 0)
273	% oops again, this is not a valid problem in the SuiteSparse collection
274	error (['invalid problem: ' masterfile]) ;
275    end
276else
277    % we found the Matrix Market file
278    RB = 0 ;
279end
280Problem = [ ] ;
281notes = [ ] ;
282
283while (1)
284
285    % get the next line
286    s = fgetl (f) ;
287    if (~ischar (s) || length (s) < 3 || s (1) ~= '%')
288	% end of file or end of leading comments ... no notes found
289	fclose (f) ;
290	[Problem notes] = valid_problem (Problem, [ ]) ;
291	return ;
292    end
293
294    % remove the leading '% ' and get the first token
295    s = s (3:end) ;
296    [t r] = strtok (s) ;
297
298    % parse the line
299    if (strcmp (t, 'name:'))
300
301	% get the Problem.name.  It must be of the form Group/Name.
302	Problem.name = strtrim (r) ;
303	if (length (find (Problem.name == '/')) ~= 1)
304	    fclose (f) ;
305	    error (['invalid problem name ' Problem.name]) ;
306	end
307
308    elseif (s (1) == '[')
309
310	% get the Problem.title
311	k = find (s == ']', 1, 'last') ;
312	if (isempty (k))
313	    fclose (f) ;
314	    error ('invalid problem title') ;
315	end
316	Problem.title = s (2:k-1) ;
317
318    elseif (strcmp (t, 'id:'))
319
320	% get the Problem.id
321	Problem.id = str2num (r) ;					    %#ok
322	if (isempty (Problem.id) || Problem.id < 0)
323	    fclose (f) ;
324	    error ('invalid problem id') ;
325	end
326
327    elseif (strcmp (t, 'kind:'))
328
329	% get the Problem.kind
330	Problem.kind = strtrim (r) ;
331
332    elseif (strcmp (t, 'date:'))
333
334	% get the Problem.date
335	Problem.date = strtrim (r) ;
336
337    elseif (strcmp (t, 'author:'))
338
339	% get the Problem.author
340	Problem.author = strtrim (r) ;
341
342    elseif (strcmp (t, 'ed:'))
343
344	% get the Problem.ed
345	Problem.ed = strtrim (r) ;
346
347    elseif (strcmp (t, 'notes:'))
348
349	% get the notes, which always appear last
350	k = 0 ;
351	notes = [ ] ;
352	while (1)
353	    % get the next line
354	    s = fgetl (f) ;
355	    if (~ischar (s) || length (s) < 2 || ~strcmp (s (1:2), '% '))
356		% end of file or end of notes ... convert notes to char array
357		fclose (f) ;
358		[Problem notes] = valid_problem (Problem, notes) ;
359		return ;
360	    end
361	    % add the line to the notes
362	    k = k + 1 ;
363	    notes {k} = s ;						    %#ok
364	end
365    end
366end
367
368
369%-------------------------------------------------------------------------------
370% valid_problem: determine if a problem is valid, and finalizes the notes
371%-------------------------------------------------------------------------------
372
373function [Problem, notes] = valid_problem (Problem, notes)
374% make sure the required fields (name, title, id, date, author, ed) are present.
375% Convert notes to char, and strip off the leading '% ', inserted when the notes
376% were printed in the Matrix Market file.
377if (~isfield (Problem, 'name') || ~isfield (Problem, 'title') || ...
378    ~isfield (Problem, 'id') || ~isfield (Problem, 'date') || ...
379    ~isfield (Problem, 'author') || ~isfield (Problem, 'ed') || ...
380    ~isfield (Problem, 'kind'))
381    error ('invalid Problem mfile') ;
382end
383if (~isempty (notes))
384    notes = char (notes) ;
385    notes = notes (:, 3:end) ;
386end
387
388
389%-------------------------------------------------------------------------------
390% mtx_read: read a *.mtx file
391%-------------------------------------------------------------------------------
392
393% In the Rutherford/Boeing form, a *.mtx file is used only for full matrices,
394% using a tiny subset of the Matrix Market format.  In the Matrix Market form,
395% the *.mtx is used for all b, x, and aux matrices (both full and sparse).
396
397function C = mtx_read (file, RB)
398
399if (~RB)
400
401    % Get a Matrix Market file, using full Matrix Market features.
402    C = mread (file, 1) ;
403
404else
405
406    % mread is not installed.  The RB format uses a tiny subset of the Matrix
407    % Market format for full matrices: just the one header line, and no comment
408    % or blank lines permitted.  Allowable header lines are:
409    %	%%MatrixMarket matrix array real general
410    %	%%MatrixMarket matrix array complex general
411    % This tiny subset can be read by ssfull_read.
412    C = ssfull_read (file) ;
413
414end
415
416