1 /**
2  * Author: Nigel Brown
3  *
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5  */
6 /**
7  * InFileStream subclasses std::ifstream, adding a check for the end-of-line
8  * character convention in the input file. This is then used by the getline()
9  * member as the line delimiter, unless the caller supplies an explicit
10  * delimiter.
11  *
12  * Note: This is an ugly workaround; at present various operations repeatedly
13  * construct/destruct an instance and open/close a sequence file up to 12
14  * times! A cleaner class will probably derive this class from something like
15  * 'istream' aggregating a 'filebuf' under control of istream::seekg().
16  *
17  * Created: 09-02-07,Nigel Brown(EMBL)
18  *
19  * Changes:
20  * Mark Larkin 13-2-07: I removed the dynamic cast from the getline functions.
21  ***************************************************************************/
22 #ifdef HAVE_CONFIG_H
23     #include "config.h"
24 #endif
25 #include <string>
26 #include <fstream>
27 #include <iostream>
28 #include "InFileStream.h"
29 using namespace std;
30 
31 const char LF = 0x0a;  //linefeed
32 const char CR = 0x0d;  //carriage return
33 
InFileStream()34 InFileStream::InFileStream() :
35     ifstream()
36 {
37     delim = '\n'; // default
38     //cout << "InFileStream() constructor 1" << endl;
39 }
40 
InFileStream(const char * filename)41 InFileStream::InFileStream(const char *filename) :
42     ifstream(filename, ios::in), filename(filename)
43 {
44     //cout << "InFileStream(f) constructor 2" << endl;
45     delim = findDelimiter();
46 }
47 
48 //- copy-constructor: can't copy superclass private members
49 //- InFileStream::InFileStream(const InFileStream &copy) :
50 //-     ifstream(static_cast<const ifstream&>(copy))
51 //- {
52 //-     cout << "InFileStream() constructor 3" << endl;
53 //-     delim = copy.delim;
54 //- }
55 
open(const char * filename)56 void InFileStream::open(const char *filename)
57 {
58 
59     this->filename = filename;
60     ifstream::open(filename, ios::in);
61     if  (ifstream::fail())
62         return;
63     delim = findDelimiter();
64 }
65 
66 //not necessary, but for symmetry to open()
close()67 void InFileStream::close()
68 {
69     ifstream::close();
70 }
71 
72 
73 //getline with stored delimiter
getline(char * s,streamsize n)74 std::istream& InFileStream::getline(char *s, streamsize n)
75 {
76     return ifstream::getline(s, n, delim);
77 }
78 
79 //getline with caller supplied delimiter
getline(char * s,streamsize n,char delim)80 std::istream& InFileStream::getline(char *s, streamsize n, char delim)
81 {
82     return ifstream::getline(s, n, delim);
83 }
84 
85 
86 /**
87  * Mark 24-1-2007. I added the function findDelimiter to determine if '\r' or
88  * '\n' will be used as the line delimiter when parsing the file.
89  *
90  * 25-01-07,Nigel Brown(EMBL): changed body of loop to check successive chars
91  * in case of DOS/Windows
92  *
93  * 09-02-07,Nigel Brown(EMBL): moved member into new InFileStream subclassed
94  * from std::ifstream, so this is called automatically for any file reader
95  * that uses InFileStream in place of std::ifstream. Replaced if/then/else
96  * with switch.
97  */
findDelimiter()98 char InFileStream::findDelimiter()
99 {
100     ifstream in;
101     int type = 0;
102 
103     in.open(filename.c_str(), ios::in);
104     if (in.fail())
105         return delim;
106 
107     in.seekg(0, ios::beg);
108 
109     //look for CR or LF or CRLF (or LFCR)
110     if (in.is_open()) {
111         char c;
112         while (in.get(c)) {
113             if (c == CR)
114                 type |= 1;
115             else if (c == LF)
116                 type |= 2;
117             else if (type)
118                 break;
119         }
120     }
121     in.close();
122 
123     switch (type) {
124 	case 1:
125 	    //cout << "file is Mac System 9" << endl;
126 	    delim = '\r';
127 	    break;
128 	case 2:
129 	    //cout << "file is UNIX" << endl;
130 	    delim = '\n';
131 	    break;
132 	case 3:
133 	    //cout << "file is DOS" << endl;
134 	    delim = '\n';
135 	    break;
136 	default: //short or empty file
137 	    //cout << "file is UNIX (default)" << endl;
138 	    delim = '\n';
139     }
140     return delim;
141 }
142 
143 
144