xref: /reactos/sdk/lib/ucrt/stdio/ftell.cpp (revision e98e9000)
1 //
2 // ftell.cpp
3 //
4 //      Copyright (c) Microsoft Corporation.  All rights reserved.
5 //
6 // Defines the ftell() family of functions, which computes the current position
7 // of the file pointer of a stream.
8 //
9 #include <corecrt_internal_stdio.h>
10 #include <corecrt_internal_ptd_propagation.h>
11 
12 
buffer_contains_wide_characters(__crt_lowio_text_mode const text_mode)13 static bool __cdecl buffer_contains_wide_characters(__crt_lowio_text_mode const text_mode) throw()
14 {
15     return text_mode == __crt_lowio_text_mode::utf8
16         || text_mode == __crt_lowio_text_mode::utf16le;
17 }
18 
buffer_character_size(__crt_lowio_text_mode const text_mode)19 static size_t __cdecl buffer_character_size(__crt_lowio_text_mode const text_mode) throw()
20 {
21     return buffer_contains_wide_characters(text_mode) ? sizeof(wchar_t) : sizeof(char);
22 }
23 
24 // This function counts the number of newlines (LFs) in the buffer that contains
25 // elements of type Character.  When sizeof(Character) != 1, the caller must
26 // ensure that the buffer consists of a whole number of Character elements.
27 template <typename Character>
28 static __int64 __cdecl count_newlines_of_type(
29     _In_reads_(buffer_last - buffer_first) char const* const buffer_first,
30     _In_reads_(0)                          char const* const buffer_last
31     ) throw()
32 {
33     // This invariant is maintained by the lowio library.  When Character==wchar_t,
34     // all writes to the buffer are in wide characters.
35     _ASSERTE((buffer_last - buffer_first) % sizeof(Character) == 0);
36 
37     Character const* const typed_first = reinterpret_cast<Character const*>(buffer_first);
38     Character const* const typed_last  = reinterpret_cast<Character const*>(buffer_last);
39 
40     __int64 newline_count = 0;
41     for (Character const* it = typed_first; it != typed_last; ++it)
42     {
43         if (*it == '\n')
44         {
45             ++newline_count;
46         }
47     }
48 
49     return newline_count;
50 }
51 
52 static __int64 __cdecl count_newline_bytes(
53     _In_reads_(buffer_last - buffer_first) char const*           const buffer_first,
54     _In_reads_(0)                          char const*           const buffer_last,
55     _In_                                   __crt_lowio_text_mode const text_mode
56     ) throw()
57 {
58     if (buffer_contains_wide_characters(text_mode))
59     {
60         return count_newlines_of_type<wchar_t>(buffer_first, buffer_last) * sizeof(wchar_t);
61     }
62     else
63     {
64         return count_newlines_of_type<char>(buffer_first, buffer_last);
65     }
66 }
67 
68 // This function handles the case where the file is open in UTF-8 text mode and
69 // the translated UTF-16 form of the text has a different number of characters
70 // than the original UTF-8 text (remember: when reading a file in UTF-8 mode, the
71 // lowio library converts the UTF-8 to UTF-16).
common_ftell_translated_utf8_nolock(__crt_stdio_stream const stream,__int64 const lowio_position,__crt_cached_ptd_host & ptd)72 static __int64 __cdecl common_ftell_translated_utf8_nolock(
73     __crt_stdio_stream const stream,
74     __int64            const lowio_position,
75     __crt_cached_ptd_host&   ptd
76     ) throw()
77 {
78     int const fh = _fileno(stream.public_stream());
79 
80     // If the buffer has been exhausted, then the current lowio position is also
81     // the current stdio position:
82     if (stream->_cnt == 0)
83     {
84         return lowio_position;
85     }
86 
87     __int64 const current_buffer_position = (stream->_ptr - stream->_base) / static_cast<__int64>(sizeof(wchar_t));
88 
89     // Otherwise, we have to re-read the buffer, in binary mode, so that we can
90     // analyze the original UTF-8 text to compute the actual position in the
91     // file.  To do this, we seek the lowio pointer back to the beginning of
92     // the stdio buffer, re-read the buffer, then seek the lowio pointer back
93     // to its original location:
94     __int64 const base_buffer_position = _lseeki64_internal(fh, _startpos(fh), SEEK_SET, ptd);
95     if (base_buffer_position != _startpos(fh))
96     {
97         return -1;
98     }
99 
100     DWORD bytes_read;
101     char  raw_buffer[_INTERNAL_BUFSIZ];
102     if (!ReadFile(reinterpret_cast<HANDLE>(_osfhnd(fh)), raw_buffer, _INTERNAL_BUFSIZ, &bytes_read, nullptr))
103         return -1;
104 
105     // Seek back to where we were, to ensure the stdio stream is left in a
106     // consistent state (and "unmodified" from before the call):
107     if (_lseeki64_internal(fh, lowio_position, SEEK_SET, ptd) < 0)
108     {
109         return -1;
110     }
111 
112     // This should not normally happen:  we should always read enough bytes:
113     if (current_buffer_position > static_cast<__int64>(bytes_read))
114     {
115         return -1;
116     }
117 
118     // Scan the raw, untranslated buffer to find the current position, updating
119     // the file pointer to account for newline translation in the buffer:
120     char const* const raw_first = raw_buffer;
121 #pragma warning(disable:__WARNING_UNUSED_POINTER_ASSIGNMENT) // 28930
122     char const* const raw_last = raw_buffer + bytes_read;
123 
124     char const* raw_it = raw_first;
125     for (__int64 i = 0; i != current_buffer_position && raw_it < raw_last; ++i, ++raw_it)
126     {
127         if (*raw_it == CR)
128         {
129             if (raw_it < raw_last - 1 && *(raw_it + 1) == LF)
130                 ++raw_it;
131         }
132         else
133         {
134             raw_it += _utf8_no_of_trailbytes(static_cast<const unsigned char>(*raw_it));
135         }
136     }
137 
138     return base_buffer_position + (raw_it - raw_first);
139 }
140 
141 
142 
143 // This function handles the extra adjustments that need to be made to the file
144 // position returned by ftell when a stream is opened in read mode.
common_ftell_read_mode_nolock(__crt_stdio_stream const stream,__int64 const lowio_position,__int64 const buffer_offset,__crt_cached_ptd_host & ptd)145 static __int64 __cdecl common_ftell_read_mode_nolock(
146     __crt_stdio_stream const stream,
147     __int64            const lowio_position,
148     __int64            const buffer_offset,
149     __crt_cached_ptd_host&   ptd
150     ) throw()
151 {
152     int const fh = _fileno(stream.public_stream());
153 
154     // We will need to adjust the file position of UTF-8 files to account for
155     // UTF-8 to UTF-16 translation:
156     __crt_lowio_text_mode const text_mode = _textmode(fh);
157 
158     __int64 const translation_factor = text_mode == __crt_lowio_text_mode::utf8
159         ? static_cast<__int64>(sizeof(wchar_t))
160         : static_cast<__int64>(sizeof(char));
161 
162     // If the buffer has been exhausted, then the current lowio position is also
163     // the current stdio position:
164     if (stream->_cnt == 0)
165     {
166         return lowio_position;
167     }
168 
169     // The lowio position points one-past-the-end of the current stdio buffer.
170     // We need to find the position of the beginning of the buffer.  To start,
171     // we compute the number of bytes in the buffer.  Note that we cannot just
172     // use the buffer size, because the buffer will not be full if EOF is
173     // readhed before the buffer is full.
174     __int64 bytes_read = stream->_cnt + static_cast<__int64>(stream->_ptr - stream->_base);
175 
176     // If this is a binary mode stream, we can simply subtract this from the
177     // lowio position, and combine it with the buffer offset to get the stdio
178     // position:
179     if ((_osfile(fh) & FTEXT) == 0)
180     {
181         return lowio_position
182             - (bytes_read    / translation_factor)
183             + (buffer_offset / translation_factor);
184     }
185 
186     // If this is a text mode stream, we need to adjust the number of bytes that
187     // were read into the buffer to account for newline translation.
188     //
189     // If we are _not_ at EOF, the number of untranslated characters read is the
190     // buffer size.  However, if we are not at EOF, the buffer may not be full,
191     // so we need to scan the buffer to count newline characters.  (Note:  we
192     // only count newline characters if the stream is at EOF, because doing so
193     // is more expensive than seeking to the end and seeking back).
194 
195     // Seek to the end of the file.  If the current position is the end of the
196     // file, then scan the buffer for newlines and adjust bytes_read:
197     if (_lseeki64_internal(fh, 0, SEEK_END, ptd) == lowio_position)
198     {
199         bytes_read += count_newline_bytes(stream->_base, stream->_base + bytes_read, text_mode);
200 
201         // If the last byte was a ^Z, that character will not be present in the
202         // buffer (it is omitted by lowio):
203         if (stream.ctrl_z())
204         {
205             bytes_read += buffer_character_size(text_mode);
206         }
207     }
208     // Otherwise, the current position is not at the end of the file; we need to
209     // seek back to the original position and compute the size of the buffer:
210     else
211     {
212         if (_lseeki64_internal(fh, lowio_position, SEEK_SET, ptd) == -1)
213             return -1;
214 
215         // If the number of bytes read is smaller than the small buffer and was
216         // not user-provided, the buffer size was set to _SMALL_BUFSIZ during
217         // the last call to __acrt_stdio_refill_and_read_{narrow,wide}_nolock:
218         if (bytes_read <= _SMALL_BUFSIZ &&
219             stream.has_crt_buffer() &&
220             !stream.has_setvbuf_buffer())
221         {
222             bytes_read = _SMALL_BUFSIZ;
223         }
224         // Otherwise, the buffer size is what is stated in the stream object:
225         else
226         {
227             bytes_read = stream->_bufsiz;
228         }
229 
230         // If the first byte in the untranslated buffer was a '\n', we assume it
231         // was preceded by a '\r', which was discarded by the previous read
232         // operation:
233         if (_osfile(fh) & FCRLF)
234         {
235             bytes_read += buffer_character_size(text_mode);
236         }
237     }
238 
239     return lowio_position
240         - (bytes_read    / translation_factor)
241         + (buffer_offset / translation_factor);
242 }
243 
244 
245 
246 template <typename Integer>
247 static Integer __cdecl common_ftell_nolock(__crt_stdio_stream, __crt_cached_ptd_host& ptd) throw();
248 
249 template <>
common_ftell_nolock(__crt_stdio_stream const stream,__crt_cached_ptd_host & ptd)250 __int64 __cdecl common_ftell_nolock(__crt_stdio_stream const stream, __crt_cached_ptd_host& ptd) throw()
251 {
252     _UCRT_VALIDATE_RETURN(ptd, stream.public_stream(), EINVAL, -1);
253 
254     int const fh = _fileno(stream.public_stream());
255 
256     if (stream->_cnt < 0)
257     {
258         stream->_cnt = 0;
259     }
260 
261     // Get the current lowio file position.  If stdio is buffering the stream,
262     // this position will point one past the end of the current stdio buffer.
263     __int64 const lowio_position = _lseeki64_internal(fh, 0, SEEK_CUR, ptd);
264     if (lowio_position < 0)
265     {
266         return -1;
267     }
268 
269     // If the stream is unbuffered or no buffering is designated, we can simply
270     // compute the stdio position via the remaining stdio stream count:
271     if (!stream.has_big_buffer())
272     {
273         return lowio_position - stream->_cnt;
274     }
275 
276     // The above lseek validates the handle, so it's okay to get the text mode:
277     __crt_lowio_text_mode const text_mode = _textmode(fh);
278 
279     // This is the current offset into the stdio buffer; we will adjust this to
280     // account for translation and updates as this function progresses:
281     __int64 buffer_offset = stream->_ptr - stream->_base;
282 
283     // If the file is in read or write mode, we need special handling for UTF-8
284     // and text mode files, to account for newline translation and UTF-8 to
285     // UTF-16 conversion:
286     if (stream.has_any_of(_IOWRITE | _IOREAD))
287     {
288         if (text_mode == __crt_lowio_text_mode::utf8 && _utf8translations(fh))
289         {
290             return common_ftell_translated_utf8_nolock(stream, lowio_position, ptd);
291         }
292 
293         // For text mode files, adjust the buffer offset to account for newline
294         // translation:
295         if (_osfile(fh) & FTEXT)
296         {
297             buffer_offset += count_newline_bytes(stream->_base, stream->_ptr, text_mode);
298         }
299     }
300     // Otherwise, if the file is not in read/write mode, ftell cannot proceed:
301     else if (!stream.has_all_of(_IOUPDATE))
302     {
303         ptd.get_errno().set(EINVAL);
304         return -1;
305     }
306 
307     // If the current lowio position is at the beginning of the file, the stdio
308     // position is whatever the offset is:
309     if (lowio_position == 0)
310     {
311         return buffer_offset;
312     }
313 
314     if (stream.has_all_of(_IOREAD))
315     {
316         return common_ftell_read_mode_nolock(stream, lowio_position, buffer_offset, ptd);
317     }
318 
319     if (text_mode == __crt_lowio_text_mode::utf8)
320     {
321         buffer_offset /= sizeof(wchar_t);
322     }
323 
324     return lowio_position + buffer_offset;
325 }
326 
327 template <>
common_ftell_nolock(__crt_stdio_stream const stream,__crt_cached_ptd_host & ptd)328 long __cdecl common_ftell_nolock(__crt_stdio_stream const stream, __crt_cached_ptd_host& ptd) throw()
329 {
330     __int64 const result = common_ftell_nolock<__int64>(stream, ptd);
331     if (result > LONG_MAX)
332     {
333         ptd.get_errno().set(EINVAL);
334         return -1;
335     }
336 
337     return static_cast<long>(result);
338 }
339 
340 
341 
342 // Queries the position of the file pointer of a stream.  This function computes
343 // the position of the pointer, accounting for stdio buffering.  This is not the
344 // same as fseek(stream, 0, SEEK_SET), because fseek will remove an ungetc and
345 // may flush buffers.
346 //
347 // Returns the present file position on success; returns -1 and sets errno on
348 // failure.
349 template <typename Integer>
common_ftell(__crt_stdio_stream const stream,__crt_cached_ptd_host & ptd)350 static Integer __cdecl common_ftell(__crt_stdio_stream const stream, __crt_cached_ptd_host& ptd) throw()
351 {
352     _UCRT_VALIDATE_RETURN(ptd, stream.valid(), EINVAL, -1);
353 
354     Integer return_value = 0;
355 
356     _lock_file(stream.public_stream());
357     __try
358     {
359         return_value = common_ftell_nolock<Integer>(stream, ptd);
360     }
361     __finally
362     {
363         _unlock_file(stream.public_stream());
364     }
365     __endtry
366 
367     return return_value;
368 }
369 
370 
371 
372 
ftell(FILE * const public_stream)373 extern "C" long __cdecl ftell(FILE* const public_stream)
374 {
375     __crt_cached_ptd_host ptd;
376     return common_ftell<long>(__crt_stdio_stream(public_stream), ptd);
377 }
378 
_ftell_nolock(FILE * const public_stream)379 extern "C" long __cdecl _ftell_nolock(FILE* const public_stream)
380 {
381     __crt_cached_ptd_host ptd;
382     return common_ftell_nolock<long>(__crt_stdio_stream(public_stream), ptd);
383 }
384 
_ftelli64(FILE * const public_stream)385 extern "C" __int64 __cdecl _ftelli64(FILE* const public_stream)
386 {
387     __crt_cached_ptd_host ptd;
388     return common_ftell<__int64>(__crt_stdio_stream(public_stream), ptd);
389 }
390 
_ftelli64_nolock(FILE * const public_stream)391 extern "C" __int64 __cdecl _ftelli64_nolock(FILE* const public_stream)
392 {
393     __crt_cached_ptd_host ptd;
394     return common_ftell_nolock<__int64>(__crt_stdio_stream(public_stream), ptd);
395 }
396 
_ftelli64_nolock_internal(FILE * const public_stream,__crt_cached_ptd_host & ptd)397 extern "C" __int64 __cdecl _ftelli64_nolock_internal(FILE* const public_stream, __crt_cached_ptd_host& ptd)
398 {
399     return common_ftell_nolock<__int64>(__crt_stdio_stream(public_stream), ptd);
400 }
401