1 // 2 // ftell.cpp 3 // 4 // Copyright (c) Microsoft Corporation. All rights reserved. 5 // 6 // Defines the ftell() family of functions, which computes the current position 7 // of the file pointer of a stream. 8 // 9 #include <corecrt_internal_stdio.h> 10 #include <corecrt_internal_ptd_propagation.h> 11 12 13 static bool __cdecl buffer_contains_wide_characters(__crt_lowio_text_mode const text_mode) throw() 14 { 15 return text_mode == __crt_lowio_text_mode::utf8 16 || text_mode == __crt_lowio_text_mode::utf16le; 17 } 18 19 static size_t __cdecl buffer_character_size(__crt_lowio_text_mode const text_mode) throw() 20 { 21 return buffer_contains_wide_characters(text_mode) ? sizeof(wchar_t) : sizeof(char); 22 } 23 24 // This function counts the number of newlines (LFs) in the buffer that contains 25 // elements of type Character. When sizeof(Character) != 1, the caller must 26 // ensure that the buffer consists of a whole number of Character elements. 27 template <typename Character> 28 static __int64 __cdecl count_newlines_of_type( 29 _In_reads_(buffer_last - buffer_first) char const* const buffer_first, 30 _In_reads_(0) char const* const buffer_last 31 ) throw() 32 { 33 // This invariant is maintained by the lowio library. When Character==wchar_t, 34 // all writes to the buffer are in wide characters. 35 _ASSERTE((buffer_last - buffer_first) % sizeof(Character) == 0); 36 37 Character const* const typed_first = reinterpret_cast<Character const*>(buffer_first); 38 Character const* const typed_last = reinterpret_cast<Character const*>(buffer_last); 39 40 __int64 newline_count = 0; 41 for (Character const* it = typed_first; it != typed_last; ++it) 42 { 43 if (*it == '\n') 44 { 45 ++newline_count; 46 } 47 } 48 49 return newline_count; 50 } 51 52 static __int64 __cdecl count_newline_bytes( 53 _In_reads_(buffer_last - buffer_first) char const* const buffer_first, 54 _In_reads_(0) char const* const buffer_last, 55 _In_ __crt_lowio_text_mode const text_mode 56 ) throw() 57 { 58 if (buffer_contains_wide_characters(text_mode)) 59 { 60 return count_newlines_of_type<wchar_t>(buffer_first, buffer_last) * sizeof(wchar_t); 61 } 62 else 63 { 64 return count_newlines_of_type<char>(buffer_first, buffer_last); 65 } 66 } 67 68 // This function handles the case where the file is open in UTF-8 text mode and 69 // the translated UTF-16 form of the text has a different number of characters 70 // than the original UTF-8 text (remember: when reading a file in UTF-8 mode, the 71 // lowio library converts the UTF-8 to UTF-16). 72 static __int64 __cdecl common_ftell_translated_utf8_nolock( 73 __crt_stdio_stream const stream, 74 __int64 const lowio_position, 75 __crt_cached_ptd_host& ptd 76 ) throw() 77 { 78 int const fh = _fileno(stream.public_stream()); 79 80 // If the buffer has been exhausted, then the current lowio position is also 81 // the current stdio position: 82 if (stream->_cnt == 0) 83 { 84 return lowio_position; 85 } 86 87 __int64 const current_buffer_position = (stream->_ptr - stream->_base) / static_cast<__int64>(sizeof(wchar_t)); 88 89 // Otherwise, we have to re-read the buffer, in binary mode, so that we can 90 // analyze the original UTF-8 text to compute the actual position in the 91 // file. To do this, we seek the lowio pointer back to the beginning of 92 // the stdio buffer, re-read the buffer, then seek the lowio pointer back 93 // to its original location: 94 __int64 const base_buffer_position = _lseeki64_internal(fh, _startpos(fh), SEEK_SET, ptd); 95 if (base_buffer_position != _startpos(fh)) 96 { 97 return -1; 98 } 99 100 DWORD bytes_read; 101 char raw_buffer[_INTERNAL_BUFSIZ]; 102 if (!ReadFile(reinterpret_cast<HANDLE>(_osfhnd(fh)), raw_buffer, _INTERNAL_BUFSIZ, &bytes_read, nullptr)) 103 return -1; 104 105 // Seek back to where we were, to ensure the stdio stream is left in a 106 // consistent state (and "unmodified" from before the call): 107 if (_lseeki64_internal(fh, lowio_position, SEEK_SET, ptd) < 0) 108 { 109 return -1; 110 } 111 112 // This should not normally happen: we should always read enough bytes: 113 if (current_buffer_position > static_cast<__int64>(bytes_read)) 114 { 115 return -1; 116 } 117 118 // Scan the raw, untranslated buffer to find the current position, updating 119 // the file pointer to account for newline translation in the buffer: 120 char const* const raw_first = raw_buffer; 121 #pragma warning(disable:__WARNING_UNUSED_POINTER_ASSIGNMENT) // 28930 122 char const* const raw_last = raw_buffer + bytes_read; 123 124 char const* raw_it = raw_first; 125 for (__int64 i = 0; i != current_buffer_position && raw_it < raw_last; ++i, ++raw_it) 126 { 127 if (*raw_it == CR) 128 { 129 if (raw_it < raw_last - 1 && *(raw_it + 1) == LF) 130 ++raw_it; 131 } 132 else 133 { 134 raw_it += _utf8_no_of_trailbytes(static_cast<const unsigned char>(*raw_it)); 135 } 136 } 137 138 return base_buffer_position + (raw_it - raw_first); 139 } 140 141 142 143 // This function handles the extra adjustments that need to be made to the file 144 // position returned by ftell when a stream is opened in read mode. 145 static __int64 __cdecl common_ftell_read_mode_nolock( 146 __crt_stdio_stream const stream, 147 __int64 const lowio_position, 148 __int64 const buffer_offset, 149 __crt_cached_ptd_host& ptd 150 ) throw() 151 { 152 int const fh = _fileno(stream.public_stream()); 153 154 // We will need to adjust the file position of UTF-8 files to account for 155 // UTF-8 to UTF-16 translation: 156 __crt_lowio_text_mode const text_mode = _textmode(fh); 157 158 __int64 const translation_factor = text_mode == __crt_lowio_text_mode::utf8 159 ? static_cast<__int64>(sizeof(wchar_t)) 160 : static_cast<__int64>(sizeof(char)); 161 162 // If the buffer has been exhausted, then the current lowio position is also 163 // the current stdio position: 164 if (stream->_cnt == 0) 165 { 166 return lowio_position; 167 } 168 169 // The lowio position points one-past-the-end of the current stdio buffer. 170 // We need to find the position of the beginning of the buffer. To start, 171 // we compute the number of bytes in the buffer. Note that we cannot just 172 // use the buffer size, because the buffer will not be full if EOF is 173 // readhed before the buffer is full. 174 __int64 bytes_read = stream->_cnt + static_cast<__int64>(stream->_ptr - stream->_base); 175 176 // If this is a binary mode stream, we can simply subtract this from the 177 // lowio position, and combine it with the buffer offset to get the stdio 178 // position: 179 if ((_osfile(fh) & FTEXT) == 0) 180 { 181 return lowio_position 182 - (bytes_read / translation_factor) 183 + (buffer_offset / translation_factor); 184 } 185 186 // If this is a text mode stream, we need to adjust the number of bytes that 187 // were read into the buffer to account for newline translation. 188 // 189 // If we are _not_ at EOF, the number of untranslated characters read is the 190 // buffer size. However, if we are not at EOF, the buffer may not be full, 191 // so we need to scan the buffer to count newline characters. (Note: we 192 // only count newline characters if the stream is at EOF, because doing so 193 // is more expensive than seeking to the end and seeking back). 194 195 // Seek to the end of the file. If the current position is the end of the 196 // file, then scan the buffer for newlines and adjust bytes_read: 197 if (_lseeki64_internal(fh, 0, SEEK_END, ptd) == lowio_position) 198 { 199 bytes_read += count_newline_bytes(stream->_base, stream->_base + bytes_read, text_mode); 200 201 // If the last byte was a ^Z, that character will not be present in the 202 // buffer (it is omitted by lowio): 203 if (stream.ctrl_z()) 204 { 205 bytes_read += buffer_character_size(text_mode); 206 } 207 } 208 // Otherwise, the current position is not at the end of the file; we need to 209 // seek back to the original position and compute the size of the buffer: 210 else 211 { 212 if (_lseeki64_internal(fh, lowio_position, SEEK_SET, ptd) == -1) 213 return -1; 214 215 // If the number of bytes read is smaller than the small buffer and was 216 // not user-provided, the buffer size was set to _SMALL_BUFSIZ during 217 // the last call to __acrt_stdio_refill_and_read_{narrow,wide}_nolock: 218 if (bytes_read <= _SMALL_BUFSIZ && 219 stream.has_crt_buffer() && 220 !stream.has_setvbuf_buffer()) 221 { 222 bytes_read = _SMALL_BUFSIZ; 223 } 224 // Otherwise, the buffer size is what is stated in the stream object: 225 else 226 { 227 bytes_read = stream->_bufsiz; 228 } 229 230 // If the first byte in the untranslated buffer was a '\n', we assume it 231 // was preceded by a '\r', which was discarded by the previous read 232 // operation: 233 if (_osfile(fh) & FCRLF) 234 { 235 bytes_read += buffer_character_size(text_mode); 236 } 237 } 238 239 return lowio_position 240 - (bytes_read / translation_factor) 241 + (buffer_offset / translation_factor); 242 } 243 244 245 246 template <typename Integer> 247 static Integer __cdecl common_ftell_nolock(__crt_stdio_stream, __crt_cached_ptd_host& ptd) throw(); 248 249 template <> 250 __int64 __cdecl common_ftell_nolock(__crt_stdio_stream const stream, __crt_cached_ptd_host& ptd) throw() 251 { 252 _UCRT_VALIDATE_RETURN(ptd, stream.public_stream(), EINVAL, -1); 253 254 int const fh = _fileno(stream.public_stream()); 255 256 if (stream->_cnt < 0) 257 { 258 stream->_cnt = 0; 259 } 260 261 // Get the current lowio file position. If stdio is buffering the stream, 262 // this position will point one past the end of the current stdio buffer. 263 __int64 const lowio_position = _lseeki64_internal(fh, 0, SEEK_CUR, ptd); 264 if (lowio_position < 0) 265 { 266 return -1; 267 } 268 269 // If the stream is unbuffered or no buffering is designated, we can simply 270 // compute the stdio position via the remaining stdio stream count: 271 if (!stream.has_big_buffer()) 272 { 273 return lowio_position - stream->_cnt; 274 } 275 276 // The above lseek validates the handle, so it's okay to get the text mode: 277 __crt_lowio_text_mode const text_mode = _textmode(fh); 278 279 // This is the current offset into the stdio buffer; we will adjust this to 280 // account for translation and updates as this function progresses: 281 __int64 buffer_offset = stream->_ptr - stream->_base; 282 283 // If the file is in read or write mode, we need special handling for UTF-8 284 // and text mode files, to account for newline translation and UTF-8 to 285 // UTF-16 conversion: 286 if (stream.has_any_of(_IOWRITE | _IOREAD)) 287 { 288 if (text_mode == __crt_lowio_text_mode::utf8 && _utf8translations(fh)) 289 { 290 return common_ftell_translated_utf8_nolock(stream, lowio_position, ptd); 291 } 292 293 // For text mode files, adjust the buffer offset to account for newline 294 // translation: 295 if (_osfile(fh) & FTEXT) 296 { 297 buffer_offset += count_newline_bytes(stream->_base, stream->_ptr, text_mode); 298 } 299 } 300 // Otherwise, if the file is not in read/write mode, ftell cannot proceed: 301 else if (!stream.has_all_of(_IOUPDATE)) 302 { 303 ptd.get_errno().set(EINVAL); 304 return -1; 305 } 306 307 // If the current lowio position is at the beginning of the file, the stdio 308 // position is whatever the offset is: 309 if (lowio_position == 0) 310 { 311 return buffer_offset; 312 } 313 314 if (stream.has_all_of(_IOREAD)) 315 { 316 return common_ftell_read_mode_nolock(stream, lowio_position, buffer_offset, ptd); 317 } 318 319 if (text_mode == __crt_lowio_text_mode::utf8) 320 { 321 buffer_offset /= sizeof(wchar_t); 322 } 323 324 return lowio_position + buffer_offset; 325 } 326 327 template <> 328 long __cdecl common_ftell_nolock(__crt_stdio_stream const stream, __crt_cached_ptd_host& ptd) throw() 329 { 330 __int64 const result = common_ftell_nolock<__int64>(stream, ptd); 331 if (result > LONG_MAX) 332 { 333 ptd.get_errno().set(EINVAL); 334 return -1; 335 } 336 337 return static_cast<long>(result); 338 } 339 340 341 342 // Queries the position of the file pointer of a stream. This function computes 343 // the position of the pointer, accounting for stdio buffering. This is not the 344 // same as fseek(stream, 0, SEEK_SET), because fseek will remove an ungetc and 345 // may flush buffers. 346 // 347 // Returns the present file position on success; returns -1 and sets errno on 348 // failure. 349 template <typename Integer> 350 static Integer __cdecl common_ftell(__crt_stdio_stream const stream, __crt_cached_ptd_host& ptd) throw() 351 { 352 _UCRT_VALIDATE_RETURN(ptd, stream.valid(), EINVAL, -1); 353 354 Integer return_value = 0; 355 356 _lock_file(stream.public_stream()); 357 __try 358 { 359 return_value = common_ftell_nolock<Integer>(stream, ptd); 360 } 361 __finally 362 { 363 _unlock_file(stream.public_stream()); 364 } 365 __endtry 366 367 return return_value; 368 } 369 370 371 372 373 extern "C" long __cdecl ftell(FILE* const public_stream) 374 { 375 __crt_cached_ptd_host ptd; 376 return common_ftell<long>(__crt_stdio_stream(public_stream), ptd); 377 } 378 379 extern "C" long __cdecl _ftell_nolock(FILE* const public_stream) 380 { 381 __crt_cached_ptd_host ptd; 382 return common_ftell_nolock<long>(__crt_stdio_stream(public_stream), ptd); 383 } 384 385 extern "C" __int64 __cdecl _ftelli64(FILE* const public_stream) 386 { 387 __crt_cached_ptd_host ptd; 388 return common_ftell<__int64>(__crt_stdio_stream(public_stream), ptd); 389 } 390 391 extern "C" __int64 __cdecl _ftelli64_nolock(FILE* const public_stream) 392 { 393 __crt_cached_ptd_host ptd; 394 return common_ftell_nolock<__int64>(__crt_stdio_stream(public_stream), ptd); 395 } 396 397 extern "C" __int64 __cdecl _ftelli64_nolock_internal(FILE* const public_stream, __crt_cached_ptd_host& ptd) 398 { 399 return common_ftell_nolock<__int64>(__crt_stdio_stream(public_stream), ptd); 400 } 401