1 /*======================================================================*\
2 |* Editor mined *|
3 |* file handling functions *|
4 \*======================================================================*/
5
6 /*to get a prototype for readlink and symlink:*/
7 /*#define _POSIX_C_SOURCE 200112*/
8 /*#define _XOPEN_SOURCE 500*/
9 /*#define _XOPEN_SOURCE & #define _XOPEN_SOURCE_EXTENDED*/
10 #define _BSD_SOURCE
11
12 #ifdef __TURBOC__
13 #include <utime.h>
14 #endif
15
16 #include "mined.h"
17
18 #ifndef FORK
19 #define fork vfork
20 #endif
21
22 #ifndef vms
23 #include <dirent.h>
24 #endif
25 #ifndef VAXC
26 #include <sys/stat.h>
27 #endif
28 #include <errno.h>
29
30
31 #include "textfile.h"
32 #include "charprop.h"
33 #include "termprop.h" /* utf8_screen */
34 #include "io.h"
35
36 #define use_locking
37
38 #if defined (msdos) || defined (vms) || defined (__MINGW32__)
39 #undef use_locking
40 #endif
41
42
43 #ifdef vms_without_access_fix
44 #define dont_check_modtime
45 #warning [41m obsolete, fixed: avoiding O_RDWR on VMS [0m
46 #endif
47
48
49 #ifdef __clang__
50 #pragma clang diagnostic ignored "-Wincompatible-pointer-types"
51 #endif
52
53
54 /*======================================================================*\
55 |* Data section *|
56 \*======================================================================*/
57
58 static int open_linum; /* line # to re-position to */
59 static int open_col; /* line column to re-position to */
60 static int open_pos; /* character index to re-position to */
61
62 FLAG overwriteOK = False; /* Set if current file is OK for overwrite */
63 static FLAG file_locked = False; /* lock file status */
64 static FLAG recovery_exists = False; /* Set if newer #file# exists for current file */
65 static FLAG backup_pending = True;
66 static struct stat filestat; /* Properties of file read for editing */
67
68 char * default_text_encoding = "";
69
70 static FLAG save_restricted;
71 static char save_file_name [maxFILENAMElen];
72 static int save_cur_pos;
73 static int save_cur_line;
74
75 FLAG viewing_help = False;
76
77
78 /* options */
79 static FLAG multiexit = True; /* Should exit command go to next file? */
80 char * preselect_quote_style = NIL_PTR;
81
82 FLAG lineends_LFtoCRLF = False;
83 FLAG lineends_CRLFtoLF = False;
84 FLAG lineends_CRtoLF = False;
85 FLAG lineends_detectCR = False;
86
87
88 /* state */
89 FLAG pasting = False;
90 #define pasting_encoded (pasting && ! pastebuf_utf8)
91
92 lineend_type got_lineend;
93
94 static FLAG loaded_from_filename = False;
95
96
97 /*======================================================================*\
98 |* File loading performance monitoring *|
99 \*======================================================================*/
100
101 #define dont_debug_timing
102 #ifdef __TURBOC__
103 #undef debug_timing
104 #endif
105
106 #ifdef debug_timing
107
108 #include <sys/time.h>
109
110 static
111 long
gettime()112 gettime ()
113 {
114 struct timeval now;
115 gettimeofday (& now, 0);
116 return ((long) now.tv_sec) * 1000000 + now.tv_usec;
117 }
118
119 #define mark_time(timer) timer -= gettime ()
120 #define elapsed_time(timer) timer += gettime ()
121 #define elapsed_mark_time(timer1, timer2) {long t = gettime (); timer1 += t; timer2 -= t;}
122
123 #else
124
125 #define mark_time(timer)
126 #define elapsed_time(timer)
127 #define elapsed_mark_time(timer1, timer2)
128
129 #endif
130
131
132 /*======================================================================*\
133 |* Text string routines *|
134 \*======================================================================*/
135
136 int
UTF8_len(c)137 UTF8_len (c)
138 char c;
139 {
140 if ((c & 0x80) == 0x00) {
141 return 1;
142 } else if ((c & 0xE0) == 0xC0) {
143 return 2;
144 } else if ((c & 0xF0) == 0xE0) {
145 return 3;
146 } else if ((c & 0xF8) == 0xF0) {
147 return 4;
148 } else if ((c & 0xFC) == 0xF8) {
149 return 5;
150 } else if ((c & 0xFE) == 0xFC) {
151 return 6;
152 } else { /* illegal UTF-8 code */
153 return 1;
154 }
155 }
156
157 int
CJK_len(text)158 CJK_len (text)
159 character * text;
160 {
161 if (multichar (* text)) {
162 if (* text == 0x8E && text_encoding_tag == 'C') {
163 return 4;
164 } else if (* text == 0x8F &&
165 (text_encoding_tag == 'J' || text_encoding_tag == 'X')) {
166 return 3;
167 } else if (text_encoding_tag == 'G'
168 && * (text + 1) <= '9'
169 && * (text + 1) >= '0') {
170 return 4;
171 } else {
172 return 2;
173 }
174 } else {
175 return 1;
176 }
177 }
178
179
180 /*
181 * char_count () returns the number of characters in the string
182 * excluding the '\0'.
183 */
184 int
char_count(string)185 char_count (string)
186 char * string;
187 {
188 int count = 0;
189
190 if (string != NIL_PTR) {
191 while (* string != '\0') {
192 advance_char (& string);
193 count ++;
194 }
195 }
196 return count;
197 }
198
199
200 /*
201 * col_count () returns the number of screen columns in the string
202 */
203 int
col_count(string)204 col_count (string)
205 char * string;
206 {
207 int count = 0;
208 char * start = string;
209
210 if (string != NIL_PTR) {
211 while (* string != '\0') {
212 advance_char_scr (& string, & count, start);
213 }
214 }
215 return count;
216 }
217
218 /**
219 determine Unicode information from UTF-8 character
220 return parameters:
221 length: the number of UTF-8 bytes in the character
222 ucs: its Unicode value
223 */
224 void
utf8_info(u,length,ucs)225 utf8_info (u, length, ucs)
226 char * u;
227 int * length;
228 unsigned long * ucs;
229 {
230 char * textpoi = u;
231 character c = * textpoi;
232 int utfcount;
233 unsigned long unichar;
234
235 if ((c & 0x80) == 0x00) {
236 utfcount = 1;
237 unichar = c;
238 } else if ((c & 0xE0) == 0xC0) {
239 utfcount = 2;
240 unichar = c & 0x1F;
241 } else if ((c & 0xF0) == 0xE0) {
242 utfcount = 3;
243 unichar = c & 0x0F;
244 } else if ((c & 0xF8) == 0xF0) {
245 utfcount = 4;
246 unichar = c & 0x07;
247 } else if ((c & 0xFC) == 0xF8) {
248 utfcount = 5;
249 unichar = c & 0x03;
250 } else if ((c & 0xFE) == 0xFC) {
251 utfcount = 6;
252 unichar = c & 0x01;
253 } else if (c == 0xFE) {
254 /* illegal UTF-8 code */
255 utfcount = 1;
256 unichar = '4';
257 } else if (c == 0xFF) {
258 /* illegal UTF-8 code */
259 utfcount = 1;
260 unichar = '5';
261 } else {
262 /* illegal UTF-8 sequence character */
263 utfcount = 1;
264 unichar = '8';
265 }
266
267 * length = utfcount;
268
269 utfcount --;
270 textpoi ++;
271 while (utfcount > 0 && (* textpoi & 0xC0) == 0x80) {
272 unichar = (unichar << 6) | (* textpoi & 0x3F);
273 utfcount --;
274 textpoi ++;
275 }
276 if (utfcount > 0) {
277 /* too short UTF-8 sequence */
278 unichar = (character) '<'; /* '«' may raise a CJK terminal width problem */
279 * length -= utfcount;
280 }
281
282 * ucs = unichar;
283 }
284
285 /**
286 Determine if a Unicode character is joined to a ligature
287 with the previous character in the string or line
288 (which may be in any encoding).
289 */
290 int
isjoined(unichar,charpos,linebegin)291 isjoined (unichar, charpos, linebegin)
292 unsigned long unichar;
293 char * charpos;
294 char * linebegin;
295 {
296 if ((joining_screen || apply_joining) && encoding_has_combining ()) {
297 if (unichar == 0x0622 || unichar == 0x0623 || unichar == 0x0625 || unichar == 0x0627) {
298 /* ALEF may be joined to a ligature with preceding LAM */
299 register unsigned long prev_unichar;
300
301 precede_char (& charpos, linebegin);
302 prev_unichar = unicodevalue (charpos);
303 if (prev_unichar == 0x0644) {
304 /* LAM joins to a ligature with any of the above */
305 return 1;
306 }
307 }
308 }
309 return 0;
310 }
311
312 /**
313 Determine if a Unicode character is effectively of zero width, i.e.
314 if it combines with the previous character in the string or line
315 (which may be in any encoding).
316 */
317 int
iscombined(unichar,charpos,linebegin)318 iscombined (unichar, charpos, linebegin)
319 unsigned long unichar;
320 char * charpos;
321 char * linebegin;
322 {
323 if (mapped_term && no_char (mappedtermchar (unichar))) {
324 return False;
325 }
326
327 return isjoined (unichar, charpos, linebegin) || iscombining (unichar);
328 }
329
330 int
iscombining(ucs)331 iscombining (ucs)
332 unsigned long ucs;
333 {
334 if (mapped_term && no_char (mappedtermchar (ucs))) {
335 return False;
336 } else {
337 return term_iscombining (ucs);
338 }
339 }
340
341 int
iswide(ucs)342 iswide (ucs)
343 unsigned long ucs;
344 {
345 if (ucs & 0x80000000) {
346 /* special encoding of 2 Unicode chars, mapped from 1 CJK character */
347 if ((ucs & 0xFFF3) == 0x02E1) {
348 /* 0x02E9 0x02E5 or 0x02E5 0x02E9 */
349 return 1;
350 } else {
351 /* strip accent indication for width lookup */
352 ucs &= 0xFFFF;
353 }
354 }
355
356 return term_iswide (ucs);
357 }
358
359 /**
360 Determine the effective screen width of a Unicode character.
361 */
362 int
uniscrwidth(unichar,charpos,linebegin)363 uniscrwidth (unichar, charpos, linebegin)
364 unsigned long unichar;
365 char * charpos;
366 char * linebegin;
367 {
368 if (combining_mode && iscombined (unichar, charpos, linebegin)) {
369 if (separate_isolated_combinings) {
370 if (charpos == linebegin || * (charpos - 1) == '\t') {
371 if (iswide (unichar)) {
372 return 2;
373 } else {
374 return 1;
375 }
376 }
377 }
378 return 0;
379 }
380
381 if (mapped_term || (cjk_term && ! cjk_uni_term)) {
382 unsigned long cjktermchar = mappedtermchar (unichar);
383 if (! no_char (cjktermchar)) {
384 if (cjktermchar < 0x100
385 || /*GB18030 kludge*/ (unichar >= 0x80 && unichar <= 0x9F)) {
386 return 1;
387 } else if ((term_encoding_tag == 'J' || term_encoding_tag == 'X')
388 && (cjktermchar >> 8) == 0x8E) {
389 return 1;
390 } else {
391 return 2;
392 }
393 }
394 }
395
396 if (iswide (unichar)) {
397 return 2;
398 } else {
399 return 1;
400 }
401 }
402
403 #define dont_debug_cjkscrwidth
404
405 #ifdef debug_cjkscrwidth
406 static unsigned long unichar = -1;
407 #define trace_cjk(tag, w) if (cjkchar > 0x80) printf ("[%s] %04lX (U+%04lX): %d\n", tag, cjkchar, unichar, w);
408 #else
409 #define trace_cjk(tag, w)
410 #endif
411
412 /**
413 Determine the effective screen width of a CJK character.
414 */
415 int
cjkscrwidth(cjkchar,charpos,linebegin)416 cjkscrwidth (cjkchar, charpos, linebegin)
417 unsigned long cjkchar;
418 char * charpos;
419 char * linebegin;
420 {
421 char encoding_tag;
422
423 if (! cjk_term || cjk_uni_term) {
424 unsigned long unichar = lookup_encodedchar (cjkchar);
425 if (no_unichar (unichar) && ! valid_cjk (cjkchar, NIL_PTR)) {
426 trace_cjk ("noval", 1);
427 return 1;
428 } else if (combining_mode && iscombined (unichar, charpos, linebegin)) {
429 if (separate_isolated_combinings) {
430 if (charpos == linebegin || * (charpos - 1) == '\t') {
431 if (utf_cjk_wide_padding || iswide (unichar)) {
432 trace_cjk ("no pad", 2);
433 return 2;
434 } else {
435 trace_cjk ("comb", 1);
436 return 1;
437 }
438 }
439 }
440 trace_cjk ("comb", 0);
441 return 0;
442 } else if (utf_cjk_wide_padding || iswide (unichar)) {
443 trace_cjk ("pad", 2);
444 return 2;
445 } else if (no_unichar (unichar) && cjk_term) {
446 trace_cjk ("nouni", 2);
447 return 2;
448 } else {
449 trace_cjk ("ut", 1);
450 return 1;
451 }
452 }
453
454 encoding_tag = text_encoding_tag;
455 if (mapped_term || (cjk_term && remapping_chars ())) {
456 unsigned long unichar = lookup_encodedchar (cjkchar);
457 if (! no_unichar (unichar)) {
458 unsigned long cjktermchar = mappedtermchar (unichar);
459 if (! no_char (cjktermchar)) {
460 cjkchar = cjktermchar;
461 encoding_tag = term_encoding_tag;
462 }
463 }
464 }
465
466 if (cjkchar < 0x100) {
467 trace_cjk ("<", 1);
468 return 1;
469 } else if ((encoding_tag == 'J' || encoding_tag == 'X')
470 && (cjkchar >> 8) == 0x8E) {
471 trace_cjk ("jx", 1);
472 return 1;
473 } else {
474 trace_cjk ("2", 2);
475 return 2;
476 }
477 }
478
479 /*
480 * Advance only character pointer to next character.
481 * UTF-8 mode.
482 */
483 void
advance_utf8(poipoi)484 advance_utf8 (poipoi)
485 char * * poipoi;
486 {
487 int follow = UTF8_len (* * poipoi) - 1;
488
489 (* poipoi) ++;
490 while (follow > 0 && (* * poipoi & 0xC0) == 0x80) {
491 (* poipoi) ++;
492 follow --;
493 }
494 }
495
496 /*
497 * Advance only character pointer to next character.
498 * CJK mode.
499 */
500 static
501 void
advance_cjk(poipoi)502 advance_cjk (poipoi)
503 char * * poipoi;
504 {
505 int len = CJK_len (* poipoi);
506
507 (* poipoi) ++;
508 len --;
509 while (len > 0 && * * poipoi != '\0' && * * poipoi != '\n') {
510 (* poipoi) ++;
511 len --;
512 }
513 }
514
515 /*
516 * Advance only character pointer to next character.
517 * Handle tab characters and different character encodings correctly.
518 */
519 void
advance_char(poipoi)520 advance_char (poipoi)
521 char * * poipoi;
522 {
523 if (utf8_text) {
524 advance_utf8 (poipoi);
525 } else if (cjk_text) {
526 advance_cjk (poipoi);
527 } else {
528 (* poipoi) ++;
529 }
530 }
531
532 /*
533 charbegin () determines the first byte of the character pointed to
534 in the given line
535 */
536 char *
charbegin(line,s)537 charbegin (line, s)
538 char * line;
539 char * s;
540 {
541 char * char_search;
542 char * char_prev;
543
544 if (utf8_text || cjk_text) {
545 char_search = line;
546 char_prev = char_search;
547 while (char_search < s) {
548 char_prev = char_search;
549 advance_char (& char_search);
550 }
551 if (char_search > s) {
552 return char_prev;
553 } else {
554 return s;
555 }
556 }
557 return s;
558 }
559
560 /*
561 * precede_char () moves the character pointer within line "begin_line"
562 * left by 1 character
563 */
564 void
precede_char(poipoi,begin_line)565 precede_char (poipoi, begin_line)
566 char * * poipoi;
567 char * begin_line;
568 {
569 if (utf8_text) {
570 char * char_search = * poipoi;
571 int l = 0;
572 while (char_search != begin_line && l < 6) {
573 char_search --;
574 l ++;
575 if ((* char_search & 0xC0) != 0x80) {
576 break;
577 }
578 }
579 if (l > 0 && l > UTF8_len (* char_search)) {
580 (* poipoi) --;
581 } else {
582 * poipoi = char_search;
583 }
584 } else if (cjk_text) {
585 char * char_search = begin_line;
586 char * char_prev = char_search;
587 while (char_search < * poipoi) {
588 char_prev = char_search;
589 advance_cjk (& char_search);
590 }
591 * poipoi = char_prev;
592 } else if (* poipoi != begin_line) {
593 (* poipoi) --;
594 }
595 }
596
597 /*
598 * utf8value () determines the value of the UTF-8 character pointed to
599 */
600 unsigned long
utf8value(poi)601 utf8value (poi)
602 character * poi;
603 {
604 int len;
605 unsigned long unichar;
606 utf8_info (poi, & len, & unichar);
607 return unichar;
608 }
609
610 /*
611 * charvalue () determines the value of the character pointed to
612 */
613 unsigned long
charvalue(poi)614 charvalue (poi)
615 character * poi;
616 {
617 int len;
618
619 if (utf8_text) {
620 unsigned long unichar;
621 utf8_info (poi, & len, & unichar);
622 return unichar;
623 } else if (cjk_text && multichar (* poi)) {
624 unsigned long cjkchar;
625 len = CJK_len (poi);
626 cjkchar = * poi ++;
627 len --;
628 while (len > 0 && * poi != '\0' && * poi != '\n') {
629 cjkchar = (cjkchar << 8) | * poi ++;
630 len --;
631 }
632 if (len > 0) {
633 return CHAR_INVALID;
634 } else {
635 return cjkchar;
636 }
637 } else {
638 return * poi;
639 }
640 }
641
642 /**
643 unicode () returns the Unicode value of the character code
644 */
645 unsigned long
unicode(code)646 unicode (code)
647 unsigned long code;
648 {
649 if (cjk_text || mapped_text) {
650 return lookup_encodedchar (code);
651 } else {
652 return code;
653 }
654 }
655
656 /**
657 unicodevalue () determines the Unicode value of the character pointed to
658 */
659 unsigned long
unicodevalue(poi)660 unicodevalue (poi)
661 character * poi;
662 {
663 return unicode (charvalue (poi));
664 }
665
666 /*
667 * precedingchar () determines the preceding character value
668 */
669 unsigned long
precedingchar(curpoi,begin_line)670 precedingchar (curpoi, begin_line)
671 char * curpoi;
672 char * begin_line;
673 {
674 char * poi;
675
676 if (curpoi == begin_line) {
677 return '\n';
678 } else {
679 poi = curpoi;
680 precede_char (& poi, begin_line);
681 return charvalue (poi);
682 }
683 }
684
685 /*
686 * Advance character pointer and screen column counter to next character.
687 * UTF-8 mode.
688 */
689 void
advance_utf8_scr(poipoi,colpoi,linebegin)690 advance_utf8_scr (poipoi, colpoi, linebegin)
691 char * * poipoi;
692 int * colpoi;
693 char * linebegin;
694 {
695 unsigned long unichar;
696 int follow;
697
698 utf8_info (* poipoi, & follow, & unichar);
699 (* colpoi) += uniscrwidth (unichar, * poipoi, linebegin);
700 follow --;
701 (* poipoi) ++;
702 while (follow > 0 && (* * poipoi & 0xC0) == 0x80) {
703 (* poipoi) ++;
704 follow --;
705 }
706 }
707
708 /*
709 * Advance character pointer and screen column counter to next character.
710 * Handle tab characters and different character encodings correctly.
711 */
712 void
advance_char_scr(poipoi,colpoi,linebegin)713 advance_char_scr (poipoi, colpoi, linebegin)
714 char * * poipoi;
715 int * colpoi;
716 char * linebegin;
717 {
718 if (ebcdic_text ? * * poipoi == code_TAB : * * poipoi == '\t') {
719 * colpoi = tab (* colpoi);
720 (* poipoi) ++;
721 } else if (utf8_text) {
722 advance_utf8_scr (poipoi, colpoi, linebegin);
723 } else if (cjk_text) {
724 int len = CJK_len (* poipoi);
725
726 (* colpoi) += cjkscrwidth (charvalue (* poipoi), * poipoi, linebegin);
727
728 /* make sure pointer is incremented at least once in case it's \n */
729 (* poipoi) ++;
730 len --;
731 while (len > 0 && * * poipoi != '\0' && * * poipoi != '\n') {
732 (* poipoi) ++;
733 len --;
734 }
735 } else if (mapped_text) {
736 unsigned long unichar = lookup_encodedchar ((character) * * poipoi);
737 if (combining_mode && iscombined (unichar, * poipoi, linebegin)) {
738 if (separate_isolated_combinings) {
739 if (* poipoi == linebegin || * (* poipoi - 1) == '\t') {
740 if (iswide (unichar)) {
741 (* colpoi) += 2;
742 } else {
743 (* colpoi) ++;
744 }
745 }
746 } else {
747 /* * colpoi stays where it is */
748 }
749 } else if (cjk_term || cjk_width_data_version) {
750 (* colpoi) += uniscrwidth (unichar, * poipoi, linebegin);
751 } else {
752 (* colpoi) ++;
753 }
754 (* poipoi) ++;
755 } else if (cjk_term || cjk_width_data_version) {
756 (* colpoi) += uniscrwidth ((character) * * poipoi, * poipoi, linebegin);
757 (* poipoi) ++;
758 } else {
759 (* colpoi) ++;
760 (* poipoi) ++;
761 }
762 }
763
764
765 /*======================================================================*\
766 |* UTF-8 character statistics and quote style detection *|
767 \*======================================================================*/
768
769 /**
770 determine if character precedes opening rather than closing quotation
771 - add further characters (=, /) to quote openers?
772 */
773 FLAG
opensquote(prevchar)774 opensquote (prevchar)
775 unsigned long prevchar;
776 {
777 switch (prevchar) {
778 case '\n':
779 case '(':
780 case '[':
781 case '{':
782 return True;
783 }
784 if (iswhitespace (prevchar) || isdash (prevchar) || isopeningparenthesis (prevchar)) {
785 return True;
786 }
787 return False;
788 }
789
790 /**
791 determine if current position (if quote mark) is opening
792 */
793 static
794 FLAG
isopeningquote(s,beg)795 isopeningquote (s, beg)
796 char * s;
797 char * beg;
798 {
799 /* simplified approach; don't consider quotes after quotes
800 or CJK embedded quotes
801 */
802 return opensquote (precedingchar (s, beg));
803 }
804
805 /**
806 language-specific quotation mark counters
807 */
808 static unsigned long count_plain = 0;
809 static unsigned long count_English = 0;
810 static unsigned long count_German = 0;
811 static unsigned long count_Swiss = 0;
812 static unsigned long count_inwards = 0;
813 static unsigned long count_Dutch = 0;
814 static unsigned long count_SwedFinn_q = 0;
815 static unsigned long count_SwedFinn_g = 0;
816 static unsigned long count_Greek = 0;
817 static unsigned long count_CJKcorners = 0;
818 static unsigned long count_CJKtitles = 0;
819 static unsigned long count_CJKsquares = 0;
820 static unsigned long count_Danish = 0;
821 static unsigned long count_French = 0;
822 static unsigned long count_Norwegian = 0;
823 static unsigned long count_Russian = 0;
824 static unsigned long count_Polish = 0;
825 static unsigned long count_Macedonian = 0;
826 static unsigned long count_Serbian = 0;
827
828 static
829 void
reset_quote_statistics()830 reset_quote_statistics ()
831 {
832 count_plain = 0;
833 count_English = 0;
834 count_German = 0;
835 count_Swiss = 0;
836 count_inwards = 0;
837 count_Dutch = 0;
838 count_SwedFinn_q = 0;
839 count_SwedFinn_g = 0;
840 count_Greek = 0;
841 count_CJKcorners = 0;
842 count_CJKtitles = 0;
843 count_CJKsquares = 0;
844 count_Danish = 0;
845 count_French = 0;
846 count_Norwegian = 0;
847 count_Russian = 0;
848 count_Polish = 0;
849 count_Macedonian = 0;
850 count_Serbian = 0;
851 }
852
853 static unsigned long count_quotes;
854
855 static
856 void
check_quote_style(c,s)857 check_quote_style (c, s)
858 unsigned long c;
859 char * s;
860 {
861 /*printf ("%4ld %s\n", c, s);*/
862 if (c > count_quotes) {
863 count_quotes = c;
864 set_quote_style (s);
865 }
866 }
867
868 /* Unicode quotation marks
869 “ 201C; LEFT DOUBLE QUOTATION MARK; DOUBLE TURNED COMMA QUOTATION MARK
870 ” 201D; RIGHT DOUBLE QUOTATION MARK; DOUBLE COMMA QUOTATION MARK
871 „ 201E; DOUBLE LOW-9 QUOTATION MARK; LOW DOUBLE COMMA QUOTATION MARK
872 ‟ 201F; DOUBLE HIGH-REVERSED-9 QUOTATION MARK; DOUBLE REVERSED COMMA QUOTATION MARK
873 ‘ 2018; LEFT SINGLE QUOTATION MARK; SINGLE TURNED COMMA QUOTATION MARK
874 ’ 2019; RIGHT SINGLE QUOTATION MARK; SINGLE COMMA QUOTATION MARK
875 ‚ 201A; SINGLE LOW-9 QUOTATION MARK; LOW SINGLE COMMA QUOTATION MARK
876 ‛ 201B; SINGLE HIGH-REVERSED-9 QUOTATION MARK; SINGLE REVERSED COMMA QUOTATION MARK
877 « 00AB; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK; LEFT POINTING GUILLEMET
878 » 00BB; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK; RIGHT POINTING GUILLEMET
879 ‹ 2039; SINGLE LEFT-POINTING ANGLE QUOTATION MARK; LEFT POINTING SINGLE GUILLEMET
880 › 203A; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK; RIGHT POINTING SINGLE GUILLEMET
881 〈3008; LEFT ANGLE BRACKET; OPENING ANGLE BRACKET
882 〉3009; RIGHT ANGLE BRACKET; CLOSING ANGLE BRACKET
883 《300A; LEFT DOUBLE ANGLE BRACKET; OPENING DOUBLE ANGLE BRACKET
884 》300B; RIGHT DOUBLE ANGLE BRACKET; CLOSING DOUBLE ANGLE BRACKET
885 「300C; LEFT CORNER BRACKET; OPENING CORNER BRACKET
886 」300D; RIGHT CORNER BRACKET; CLOSING CORNER BRACKET
887 『300E; LEFT WHITE CORNER BRACKET; OPENING WHITE CORNER BRACKET
888 』300F; RIGHT WHITE CORNER BRACKET; CLOSING WHITE CORNER BRACKET
889 【 U+3010; LEFT BLACK LENTICULAR BRACKET; OPENING BLACK LENTICULAR BRACKET
890 】 U+3011; RIGHT BLACK LENTICULAR BRACKET; CLOSING BLACK LENTICULAR BRACKET
891 〖 U+3016; LEFT WHITE LENTICULAR BRACKET; OPENING WHITE LENTICULAR BRACKET
892 〗 U+3017; RIGHT WHITE LENTICULAR BRACKET; CLOSING WHITE LENTICULAR BRACKET
893 */
894
895 static
896 void
determine_quote_style()897 determine_quote_style ()
898 {
899 count_quotes = 0;
900 check_quote_style (count_plain, "\"\"");
901 check_quote_style (count_English, "“”");
902 check_quote_style (count_inwards, "»«");
903 check_quote_style (count_German, "„“");
904 check_quote_style (count_Swiss, "«» ‹›");
905 check_quote_style (count_Dutch, "„”");
906 check_quote_style (count_SwedFinn_q, "””");
907 check_quote_style (count_SwedFinn_g, "»»");
908 check_quote_style (count_Greek, "«» ‟”");
909 check_quote_style (count_CJKcorners, "『』");
910 check_quote_style (count_CJKtitles, "《》");
911 check_quote_style (count_CJKsquares, "【】");
912 check_quote_style (count_Danish, "»« ’’"); /* alt Danish */
913 check_quote_style (count_French, "« » “ ”");
914 check_quote_style (count_Norwegian, "«» ‘’");
915 check_quote_style (count_Russian, "«» „“");
916 check_quote_style (count_Polish, "„” «»");
917 check_quote_style (count_Macedonian, "„“ ’‘");
918 check_quote_style (count_Serbian, "„“ ’’");
919 }
920
921 /*
922 * utf8_count () returns the number of UTF-8 characters in the string
923 (like char_count) and also detects quotation marks and updates
924 their statistics.
925 */
926 static
927 int
utf8_count(string)928 utf8_count (string)
929 char * string;
930 {
931 char * start = string;
932 int count = 0;
933 unsigned long unichar = 0;
934 unsigned long prev_unichar;
935 int utflen;
936
937 if (string != NIL_PTR) {
938 while (* string != '\0') {
939 /* Detect quotation marks.
940 The UTF-8 codes of all quotation marks are either
941 C2AB or C2BB or start with either E280 or E380.
942 This may help for efficient detection during file loading.
943 */
944 if ((((character) * string) <= 0x27
945 && (* string == '\'' || * string == '"')
946 )
947 ||
948 ((* string & 0xDE) == 0xC2
949 && (((character) * string) == 0xC2 || ((character) * (string + 1)) == 0x80)
950 )
951 )
952 {
953 prev_unichar = unichar;
954 utf8_info (string, & utflen, & unichar);
955 switch ((unsigned int) unichar) {
956 case (character) '"':
957 case (character) '\'':
958 count_plain ++;
959 break;
960 case 0x201C: /* “ LEFT DOUBLE QUOTATION MARK; DOUBLE TURNED COMMA QUOTATION MARK */
961 if (isopeningquote (string, start)) {
962 count_English ++;
963 count_French ++;
964 } else {
965 count_German ++;
966 count_Russian ++;
967 count_Macedonian ++;
968 count_Serbian ++;
969 }
970 break;
971 case 0x2018: /* ‘ LEFT SINGLE QUOTATION MARK; SINGLE TURNED COMMA QUOTATION MARK */
972 if (isopeningquote (string, start)) {
973 count_English ++;
974 count_Norwegian ++;
975 } else {
976 count_German ++;
977 count_Macedonian ++;
978 }
979 break;
980 case 0x201D: /* ” RIGHT DOUBLE QUOTATION MARK; DOUBLE COMMA QUOTATION MARK */
981 count_SwedFinn_q ++;
982 if (! isopeningquote (string, start)) {
983 count_English ++;
984 count_Dutch ++;
985 count_Greek ++;
986 count_Polish ++;
987 count_French ++;
988 if (iswhitespace (prev_unichar)) {
989 count_French ++;
990 }
991 }
992 break;
993 case 0x2019: /* ’ RIGHT SINGLE QUOTATION MARK; SINGLE COMMA QUOTATION MARK */
994 count_SwedFinn_q ++;
995 count_Danish ++;
996 count_Serbian ++;
997 if (isopeningquote (string, start)) {
998 count_Macedonian ++;
999 } else {
1000 count_English ++;
1001 count_Dutch ++;
1002 count_Norwegian ++;
1003 }
1004 break;
1005 case 0x201E: /* „ DOUBLE LOW-9 QUOTATION MARK; LOW DOUBLE COMMA QUOTATION MARK */
1006 if (isopeningquote (string, start)) {
1007 count_German ++;
1008 count_Dutch ++;
1009 count_Russian ++;
1010 count_Polish ++;
1011 count_Macedonian ++;
1012 count_Serbian ++;
1013 }
1014 break;
1015 case 0x201A: /* ‚ SINGLE LOW-9 QUOTATION MARK; LOW SINGLE COMMA QUOTATION MARK */
1016 if (isopeningquote (string, start)) {
1017 count_German ++;
1018 count_Dutch ++;
1019 }
1020 break;
1021 case 0x201F: /* ‟ DOUBLE HIGH-REVERSED-9 QUOTATION MARK; DOUBLE REVERSED COMMA QUOTATION MARK */
1022 if (isopeningquote (string, start)) {
1023 count_Greek ++;
1024 }
1025 break;
1026 case 0x201B: /* ‛ SINGLE HIGH-REVERSED-9 QUOTATION MARK; SINGLE REVERSED COMMA QUOTATION MARK */
1027 break;
1028 case 0x00AB: /* « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK; LEFT POINTING GUILLEMET */
1029 if (isopeningquote (string, start)) {
1030 count_Swiss ++;
1031 count_French ++;
1032 count_Norwegian ++;
1033 count_Russian ++;
1034 count_Polish ++;
1035 count_Greek ++;
1036 } else {
1037 count_inwards ++;
1038 count_Danish ++;
1039 }
1040 break;
1041 case 0x2039: /* ‹ SINGLE LEFT-POINTING ANGLE QUOTATION MARK; LEFT POINTING SINGLE GUILLEMET */
1042 if (isopeningquote (string, start)) {
1043 count_Swiss ++;
1044 } else {
1045 count_inwards ++;
1046 }
1047 break;
1048 case 0x00BB: /* » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK; RIGHT POINTING GUILLEMET */
1049 count_SwedFinn_g ++;
1050 if (isopeningquote (string, start)) {
1051 count_inwards ++;
1052 count_Danish ++;
1053 } else {
1054 count_Swiss ++;
1055 count_Norwegian ++;
1056 count_Russian ++;
1057 count_Polish ++;
1058 count_Greek ++;
1059 count_French ++;
1060 if (iswhitespace (prev_unichar)) {
1061 count_French ++;
1062 }
1063 }
1064 break;
1065 case 0x203A: /* › SINGLE RIGHT-POINTING ANGLE QUOTATION MARK; RIGHT POINTING SINGLE GUILLEMET */
1066 count_SwedFinn_g ++;
1067 if (isopeningquote (string, start)) {
1068 count_inwards ++;
1069 } else {
1070 count_Swiss ++;
1071 }
1072 break;
1073 case 0x300A: /* 《 LEFT DOUBLE ANGLE BRACKET; OPENING DOUBLE ANGLE BRACKET */
1074 case 0x3008: /* 〈 LEFT ANGLE BRACKET; OPENING ANGLE BRACKET */
1075 case 0x300B: /* 》 RIGHT DOUBLE ANGLE BRACKET; CLOSING DOUBLE ANGLE BRACKET */
1076 case 0x3009: /* 〉 RIGHT ANGLE BRACKET; CLOSING ANGLE BRACKET */
1077 count_CJKtitles ++;
1078 break;
1079 case 0x300C: /* 「 LEFT CORNER BRACKET; OPENING CORNER BRACKET */
1080 case 0x300E: /* 『 LEFT WHITE CORNER BRACKET; OPENING WHITE CORNER BRACKET */
1081 case 0x300D: /* 」 RIGHT CORNER BRACKET; CLOSING CORNER BRACKET */
1082 case 0x300F: /* 』 RIGHT WHITE CORNER BRACKET; CLOSING WHITE CORNER BRACKET */
1083 count_CJKcorners ++;
1084 break;
1085 case 0x3010: /* 【 LEFT BLACK LENTICULAR BRACKET; OPENING BLACK LENTICULAR BRACKET */
1086 case 0x3011: /* 】 RIGHT BLACK LENTICULAR BRACKET; CLOSING BLACK LENTICULAR BRACKET */
1087 case 0x3016: /* 〖 LEFT WHITE LENTICULAR BRACKET; OPENING WHITE LENTICULAR BRACKET */
1088 case 0x3017: /* 〗 RIGHT WHITE LENTICULAR BRACKET; CLOSING WHITE LENTICULAR BRACKET */
1089 count_CJKsquares ++;
1090 break;
1091 }
1092 }
1093
1094 /* Advance and count */
1095 advance_utf8 (& string);
1096 count ++;
1097 }
1098 }
1099 return count;
1100 }
1101
1102
1103 /*======================================================================*\
1104 |* Auxiliary functions *|
1105 \*======================================================================*/
1106
1107 static
1108 int
strcaseeq(s1,s2)1109 strcaseeq (s1, s2)
1110 char * s1;
1111 char * s2;
1112 {
1113 do {
1114 char c1, c2;
1115 if (! * s1 && ! * s2) {
1116 return True;
1117 }
1118 c1 = * s1;
1119 if (c1 >= 'a' && c1 <= 'z') {
1120 c1 = c1 - 'a' + 'A';
1121 }
1122 c2 = * s2;
1123 if (c2 >= 'a' && c2 <= 'z') {
1124 c2 = c2 - 'a' + 'A';
1125 }
1126 if (c1 != c2) {
1127 return False;
1128 }
1129 s1 ++;
1130 s2 ++;
1131 } while (True);
1132 }
1133
1134
1135 /*
1136 Set flags depending on file type.
1137 */
1138 static
1139 void
set_file_type_flags()1140 set_file_type_flags ()
1141 {
1142 char * bn = getbasename (file_name);
1143 char * suffix = strrchr (file_name, '.');
1144 if (suffix != NIL_PTR) {
1145 suffix ++;
1146 } else {
1147 suffix = "";
1148 }
1149
1150 if (hide_passwords == UNSURE) {
1151 hide_passwords = * bn == '.';
1152 }
1153
1154 if (viewing_help) {
1155 mark_HTML = True;
1156 } else if (mark_HTML == UNSURE) {
1157 if (
1158 strcaseeq (suffix, "html")
1159 || strcaseeq (suffix, "htm")
1160 || strcaseeq (suffix, "xhtml")
1161 || strcaseeq (suffix, "shtml")
1162 || strcaseeq (suffix, "mhtml")
1163 || strcaseeq (suffix, "sgml")
1164 || strcaseeq (suffix, "xml")
1165 || strcaseeq (suffix, "xul")
1166 || strcaseeq (suffix, "xsd")
1167 || strcaseeq (suffix, "xsl")
1168 || strcaseeq (suffix, "xslt")
1169 || strcaseeq (suffix, "wsdl")
1170 || strcaseeq (suffix, "dtd")
1171 )
1172 {
1173 mark_HTML = True;
1174 mark_JSP = False; /* no effect (not checked yet) */
1175 } else if
1176 ( strcaseeq (suffix, "jsp")
1177 || strcaseeq (suffix, "php")
1178 || strcaseeq (suffix, "asp")
1179 || strcaseeq (suffix, "aspx")
1180 )
1181 {
1182 mark_HTML = True;
1183 mark_JSP = True;
1184 } else {
1185 mark_HTML = False;
1186 mark_JSP = False;
1187 }
1188 }
1189
1190 if (! strop_selected) {
1191 if (strcaseeq (suffix, "a68")) {
1192 lowcapstrop = True;
1193 dispunstrop = True;
1194 } else {
1195 lowcapstrop = False;
1196 dispunstrop = False;
1197 }
1198 }
1199 }
1200
1201
1202 /*======================================================================*\
1203 |* File I/O buffer *|
1204 \*======================================================================*/
1205
1206 #define dont_debug_filebuf
1207 #ifdef debug_filebuf
1208 #define filebuflen 16
1209 #else
1210 #define filebuflen (20 * 1024)
1211 #endif
1212 static char filebuf [filebuflen + 1]; /* allow for 1 byte overflow, e.g. from Cygwin /dev/clipboard */
1213
1214
1215 /* filebuf read parameters */
1216 static char * last_bufpos = NIL_PTR;
1217 static char * current_bufpos = NIL_PTR;
1218 static char * UTF16buf = NIL_PTR;
1219 static char * fini_byte = NIL_PTR;
1220 static char * next_byte = NIL_PTR;
1221 static long read_bytes;
1222 static long read_chars;
1223 /*long long file_position;*/
1224
1225 /**
1226 Clear filebuf read parameters
1227 */
1228 static
1229 void
clear_get_line()1230 clear_get_line ()
1231 {
1232 last_bufpos = NIL_PTR;
1233 current_bufpos = NIL_PTR;
1234 }
1235
1236
1237 /* filebuf write parameters */
1238 static unsigned int filebuf_count = 0;
1239
1240 /**
1241 Clear filebuf write parameters
1242 */
1243 void
clear_filebuf()1244 clear_filebuf ()
1245 {
1246 filebuf_count = 0;
1247 /* need to clear buffer completely (see break in configure_preferences) */
1248 clear_get_line ();
1249 }
1250
1251
1252 /*======================================================================*\
1253 |* File line reading *|
1254 \*======================================================================*/
1255
1256 int
line_gotten(ret)1257 line_gotten (ret)
1258 int ret;
1259 {
1260 return ret != ERRORS && ret != NO_INPUT;
1261 }
1262
1263 /*
1264 * get_line reads one line from filedescriptor fd. If EOF is reached on fd,
1265 * get_line () returns ERRORS, else it returns the length of the string.
1266 */
1267
1268 static long count_good_utf; /* count good UTF-8 sequences */
1269 static long count_bad_utf; /* count bad UTF-8 sequences */
1270 static long count_utf_bytes; /* count UTF-8 sequence bytes */
1271 static long counted_utf_bytes; /* count UTF-8 sequence bytes */
1272 static long count_good_iso; /* count good ISO-8859 bytes */
1273 static long count_good_cp1252; /* count good CP1252 (Windows Western) bytes */
1274 static long count_good_cp850; /* count good CP850 (DOS) bytes */
1275 static long count_good_mac; /* count good MacRoman bytes */
1276 static long count_good_ebcdic; /* count good EBCDIC bytes */
1277 static long count_good_viscii; /* count good VISCII bytes */
1278 static long count_good_tcvn; /* count good TCVN bytes */
1279 static long count_1read_op; /* count 1st read operation by get_line */
1280 static long count_lineend_LF; /* count Unix lines */
1281 static long count_lineend_CRLF; /* count MSDOS lines */
1282 static long count_lineend_CR; /* count Mac lines */
1283 static long count_lineend_NL; /* count ISO 8859 lines */
1284 static FLAG BOM; /* BOM found at beginning of file? */
1285 static FLAG consider_transform; /* consider UTF-16 or EBCDIC when reading? */
1286
1287 /*
1288 CJK character encoding auto-detection
1289 */
1290 static character last_cjkbyte = '\0';
1291 static character last2_cjkbyte = '\0';
1292 static long count_good_cjk; /* count good CJK codes */
1293 static long count_weak_cjk; /* count weak (unsure) CJK codes */
1294 static long count_bad_cjk; /* count bad CJK codes */
1295 static long count_big5; /* count Big5 codes */
1296 static long count_gb; /* count GB (GB2312, GBK, GB18030) codes */
1297 static int detect_gb18030 = 0; /* observe GB18030 byte state */
1298 static long count_uhc; /* count UHC (KS C 5601/KS X 1001) codes */
1299 static long count_jp; /* count EUC-JP codes */
1300 static long count_jisx; /* count JIS X 0213 codes */
1301 static long count_sjis; /* count Shift-JIS/CP932 codes */
1302 static long count_sjisx; /* count Shift JIS X 0213 codes */
1303 static long count_sjis1; /* count Shift-JIS single-byte codes */
1304 static long count_johab; /* count Johab codes */
1305 static long count_cns; /* count CNS codes */
1306
1307 static long count_max_cjk;
1308 static char max_cjk_tag;
1309
1310 static char * get_line_error;
1311 static int get_line_errno;
1312
1313 static
1314 void
set_error(err)1315 set_error (err)
1316 char * err;
1317 {
1318 if (err == NIL_PTR || get_line_error == NIL_PTR) {
1319 get_line_error = err;
1320 get_line_errno = -1;
1321 }
1322 }
1323
1324 static
1325 void
set_errno(err)1326 set_errno (err)
1327 char * err;
1328 {
1329 set_error (err);
1330 get_line_errno = geterrno ();
1331 }
1332
1333 static
1334 void
max_cjk_count(cnt,tag)1335 max_cjk_count (cnt, tag)
1336 long cnt;
1337 char tag;
1338 {
1339 if (cnt > count_max_cjk) {
1340 count_max_cjk = cnt;
1341 max_cjk_tag = tag;
1342 }
1343 }
1344
1345 /*
1346 UTF-16 transformation
1347 */
1348 static unsigned long surrogate = 0;
1349
1350 static
1351 void
clear_UTF16_transform()1352 clear_UTF16_transform ()
1353 {
1354 surrogate = 0;
1355 }
1356
1357 #define dont_debug_auto_detect
1358 #define dont_debug_read
1359
1360 /**
1361 if preparing for text file reading,
1362 make sure utf16_file is adjusted afterwards,
1363 e.g. by calling set_text_encoding (default_text_encoding)
1364 */
1365 void
reset_get_line(from_text_file)1366 reset_get_line (from_text_file)
1367 FLAG from_text_file; /* consider UTF-16/EBCDIC transformation ? */
1368 {
1369 set_error (NIL_PTR);
1370
1371 count_good_utf = 0;
1372 count_bad_utf = 0;
1373 count_utf_bytes = 0;
1374
1375 count_good_cjk = 0;
1376 count_weak_cjk = 0;
1377 count_bad_cjk = 0;
1378 last_cjkbyte = '\0';
1379 last2_cjkbyte = '\0';
1380
1381 count_good_iso = 0;
1382 count_good_cp1252 = 0;
1383 count_good_cp850 = 0;
1384 count_good_mac = 0;
1385 count_good_ebcdic = 0;
1386 count_good_viscii = 0;
1387 count_good_tcvn = 0;
1388 count_big5 = 0;
1389 count_gb = 0;
1390 count_uhc = 0;
1391 count_jp = 0;
1392 count_jisx = 0;
1393 count_sjis = 0;
1394 count_sjis1 = 0;
1395 count_sjisx = 0;
1396 count_johab = 0;
1397 count_cns = 0;
1398
1399 count_lineend_LF = 0;
1400 count_lineend_CRLF = 0;
1401 count_lineend_CR = 0;
1402 count_lineend_NL = 0;
1403
1404 count_1read_op = 0;
1405
1406 reset_quote_statistics ();
1407
1408 BOM = False;
1409 consider_transform = from_text_file;
1410
1411 clear_UTF16_transform ();
1412
1413 /* file_position = 0;*/
1414 }
1415
1416 static FLAG save_utf16_file;
1417 static FLAG save_BOM;
1418
1419 /**
1420 save_text_info and restore_text_info are needed
1421 to avoid spoiling UTF-16 information while pasting
1422 -> refactoring!
1423 */
1424 void
save_text_info()1425 save_text_info ()
1426 {
1427 save_utf16_file = utf16_file;
1428 save_BOM = BOM;
1429 }
1430
1431 void
restore_text_info()1432 restore_text_info ()
1433 {
1434 utf16_file = save_utf16_file;
1435 BOM = save_BOM;
1436 }
1437
1438 void
show_get_l_errors()1439 show_get_l_errors ()
1440 {
1441 if (! only_detect_text_encoding && get_line_error != NIL_PTR) {
1442 ring_bell ();
1443 status_fmt2 (get_line_error, " - Loading failed!");
1444 /*
1445 while (readcharacter () != ' ' && quit == False) {
1446 ring_bell ();
1447 flush ();
1448 }
1449 */
1450 }
1451 }
1452
1453
1454 #define dont_debug_utf16
1455
1456 #ifdef debug_utf16
1457 #define trace_utf16(params) printf params
1458 #else
1459 #define trace_utf16(params)
1460 #endif
1461
1462 /*
1463 Transform UTF-16 input into UTF-8.
1464 */
1465 static
1466 int
UTF16_transform(little_endian,UTF8buf,maxbufl,next_byte_poi,fini_byte)1467 UTF16_transform (little_endian, UTF8buf, maxbufl, next_byte_poi, fini_byte)
1468 FLAG little_endian;
1469 char * UTF8buf;
1470 int maxbufl;
1471 character * * next_byte_poi;
1472 character * fini_byte;
1473 {
1474 register char * ptr = UTF8buf;
1475 int trans_bytes = 0;
1476 unsigned int utf16char;
1477
1478 while (trans_bytes + 4 < maxbufl && * next_byte_poi < fini_byte) {
1479 utf16char = * * next_byte_poi;
1480 (* next_byte_poi) ++;
1481 if (* next_byte_poi < fini_byte) {
1482 if (little_endian) {
1483 utf16char |= (* * next_byte_poi) << 8;
1484 } else {
1485 utf16char = (utf16char << 8) | (* * next_byte_poi);
1486 }
1487 (* next_byte_poi) ++;
1488 } else if (! little_endian) {
1489 utf16char = 0;
1490 }
1491
1492 if ((utf16char & 0xFC00) == 0xD800) {
1493 /* high surrogates */
1494 surrogate = (unsigned long) (utf16char - 0xD7C0) << 10;
1495 trace_utf16 (("%04X -> surrogate %04lX\n", utf16char, surrogate));
1496 } else if ((utf16char & 0xFC00) == 0xDC00) {
1497 /* low surrogates */
1498 unsigned long unichar = surrogate | (utf16char & 0x03FF);
1499 trace_utf16 (("surrogate %04lX + %04X -> %04lX -> %02lX %02lX %02lX %02lX\n", surrogate, utf16char, unichar, 0xF0 | (unichar >> 18), 0x80 | ((unichar >> 12) & 0x3F), 0x80 | ((unichar >> 6) & 0x3F), 0x80 | (unichar & 0x3F)));
1500 surrogate = 0;
1501 * ptr ++ = 0xF0 | (unichar >> 18);
1502 * ptr ++ = 0x80 | ((unichar >> 12) & 0x3F);
1503 * ptr ++ = 0x80 | ((unichar >> 6) & 0x3F);
1504 * ptr ++ = 0x80 | (unichar & 0x3F);
1505 trans_bytes += 4;
1506 } else if (utf16char < 0x80) {
1507 trace_utf16 (("%04X -> (1) %02X\n", utf16char, utf16char));
1508 * ptr ++ = utf16char;
1509 trans_bytes ++;
1510 } else if (utf16char < 0x800) {
1511 trace_utf16 (("%04X -> (2) %02X %02X\n", utf16char, 0xC0 | (utf16char >> 6), 0x80 | (utf16char & 0x3F)));
1512 * ptr ++ = 0xC0 | (utf16char >> 6);
1513 * ptr ++ = 0x80 | (utf16char & 0x3F);
1514 trans_bytes += 2;
1515 } else {
1516 trace_utf16 (("%04X -> (3) %02X %02X %02X\n", utf16char, 0xE0 | (utf16char >> 12), 0x80 | ((utf16char >> 6) & 0x3F), 0x80 | (utf16char & 0x3F)));
1517 * ptr ++ = 0xE0 | (utf16char >> 12);
1518 * ptr ++ = 0x80 | ((utf16char >> 6) & 0x3F);
1519 * ptr ++ = 0x80 | (utf16char & 0x3F);
1520 trans_bytes += 3;
1521 }
1522 }
1523
1524 return trans_bytes;
1525 }
1526
1527 #ifdef debug_read
1528 #define trace_read(params) printf params
1529 #else
1530 #define trace_read(params)
1531 #endif
1532
1533 static
1534 int
alloc_UTF16buf()1535 alloc_UTF16buf ()
1536 {
1537 if (UTF16buf == NIL_PTR) {
1538 UTF16buf = alloc (filebuflen + 1);
1539 if (UTF16buf == NIL_PTR) {
1540 set_error ("Not enough memory for UTF-16 transformation");
1541 viewonly_err = True;
1542 modified = False;
1543 return ERRORS;
1544 }
1545 }
1546 return FINE;
1547 }
1548
1549
1550 /**
1551 Tables supporting auto-detection of some 8-bit character encodings;
1552 for each character >= 0x80 they indicate whether the character is
1553 a letter (cl), a valid character (cv), or invalid (nc).
1554 */
1555
1556 #define cl 2 /* count weight of letter */
1557 #define cc 1 /* count weight of non-letter */
1558 #define nc -3 /* count weight of invalid character */
1559
1560 static signed char good_iso [0x80] = {
1561 /*80*/ nc, nc, nc, nc, nc, cl, nc, nc, nc, nc, nc, nc, nc, nc, nc, nc,
1562 /*90*/ nc, nc, nc, nc, nc, nc, nc, nc, nc, nc, nc, nc, nc, nc, nc, nc,
1563 /*A0*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1564 /*B0*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1565 /*C0*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl,
1566 /*D0*/ cl, cl, cl, cl, cl, cl, cl, cc, cl, cl, cl, cl, cl, cl, cl, cl,
1567 /*E0*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl,
1568 /*F0*/ cl, cl, cl, cl, cl, cl, cl, cc, cl, cl, cl, cl, cl, cl, cl, cl,
1569 };
1570
1571 static signed char good_cp1252 [0x80] = {
1572 /*80*/ cc, nc, cc, cc, cc, cc, cc, cc, cc, cc, cl, cc, cl, nc, cl, nc,
1573 /*90*/ nc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cl, cc, cl, nc, cl, cl,
1574 /*A0*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1575 /*B0*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1576 /*C0*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl,
1577 /*D0*/ cl, cl, cl, cl, cl, cl, cl, cc, cl, cl, cl, cl, cl, cl, cl, cl,
1578 /*E0*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl,
1579 /*F0*/ cl, cl, cl, cl, cl, cl, cl, cc, cl, cl, cl, cl, cl, cl, cl, cl,
1580 };
1581
1582 static signed char good_cp850 [0x80] = {
1583 /*80*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl,
1584 /*90*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cc, cl, cc, cc,
1585 /*A0*/ cl, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1586 /*B0*/ cc, cc, cc, cc, cc, cl, cl, cl, cc, cc, cc, cc, cc, cc, cc, cc,
1587 /*C0*/ cc, cc, cc, cc, cc, cc, cl, cl, cc, cc, cc, cc, cc, cc, cc, cc,
1588 /*D0*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cl, cc,
1589 /*E0*/ cl, cl, cl, cl, cl, cl, cc, cl, cl, cl, cl, cl, cl, cl, cc, cc,
1590 /*F0*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1591 };
1592
1593 static signed char good_mac [0x80] = {
1594 /*80*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl,
1595 /*90*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl,
1596 /*A0*/ cc, cc, cc, cc, cc, cc, cc, cl, cc, cc, cc, cc, cc, cc, cl, cl,
1597 /*B0*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cl, cl,
1598 /*C0*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cl, cl, cl, cl, cl,
1599 /*D0*/ cc, cc, cc, cc, cc, cc, cc, cc, cl, cl, cc, cc, cc, cc, cc, cc,
1600 /*E0*/ cc, cc, cc, cc, cc, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cl,
1601 /*F0*/ nc, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1602 };
1603
1604 static signed char good_ebcdic [0x100] = {
1605 /*00*/ nc, nc, nc, nc, cc, cc, cc, cc, cc, cc, cc, nc, nc, nc, nc, nc,
1606 /*10*/ nc, nc, nc, nc, cc, cc, cc, cc, nc, nc, cc, cc, nc, nc, nc, nc,
1607 /*20*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1608 /*30*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1609 /*40*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1610 /*50*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1611 /*60*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1612 /*70*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1613 /*80*/ cc, cl, cl, cl, cl, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cc,
1614 /*90*/ cc, cl, cl, cl, cl, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cc,
1615 /*A0*/ cc, cc, cl, cl, cl, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cc,
1616 /*B0*/ cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc, cc,
1617 /*C0*/ cc, cl, cl, cl, cl, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cc,
1618 /*D0*/ cc, cl, cl, cl, cl, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cc,
1619 /*E0*/ cc, cc, cl, cl, cl, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cc,
1620 /*F0*/ cl, cl, cl, cl, cl, cl, cl, cl, cl, cl, cc, cc, cc, cc, cc, cc,
1621 };
1622
1623 static
auto_detect_byte(curbyte,do_auto_detect)1624 void auto_detect_byte (curbyte, do_auto_detect)
1625 character curbyte;
1626 FLAG do_auto_detect;
1627 {
1628 /* begin character encoding auto-detection */
1629 character followbyte = curbyte;
1630
1631 /* UTF-8 auto-detection */
1632 #define dont_debug_utf_detect
1633
1634 #ifdef debug_utf_detect
1635 printf ("count_utf_bytes %d cur %02X\n", count_utf_bytes, curbyte);
1636 #endif
1637
1638 if (count_utf_bytes == 0) {
1639 if ((curbyte & 0xC0) == 0x80) {
1640 count_bad_utf ++;
1641 } else {
1642 count_utf_bytes = UTF8_len (curbyte) - 1;
1643 counted_utf_bytes = count_utf_bytes;
1644 }
1645 } else if ((curbyte & 0xC0) == 0x80) {
1646 count_utf_bytes --;
1647 if (count_utf_bytes == 0) {
1648 #ifdef ignore_ambigous_utf__does_not_work
1649 if (counted_utf_bytes == 2
1650 && strchr ("���������������", curbyte)
1651 ) {
1652 /* ignoring some sequences that could
1653 as well be Latin-1 */
1654 } else {
1655 count_good_utf ++;
1656 }
1657 #else
1658 count_good_utf ++;
1659 #endif
1660 }
1661 } else {
1662 count_utf_bytes = 0;
1663 count_bad_utf ++;
1664 }
1665
1666 /* VISCII and ISO-8859 auto-detection */
1667 if (curbyte >= 0x80) {
1668
1669 if (do_auto_detect) {
1670 count_good_viscii += cl;
1671 count_good_tcvn += cl;
1672
1673 count_good_iso += good_iso [curbyte - 0x80];
1674 count_good_cp1252 += good_cp1252 [curbyte - 0x80];
1675 count_good_cp850 += good_cp850 [curbyte - 0x80];
1676 count_good_mac += good_mac [curbyte - 0x80];
1677 count_good_ebcdic += good_ebcdic [curbyte];
1678 #ifdef debug_auto_detect
1679 printf ("%02X -> iso %ld viscii %ld\n", curbyte, count_good_iso, count_good_viscii);
1680 #endif
1681 } else {
1682 /* Defending ISO-8859 vs. CJK auto-detection */
1683 count_good_iso += good_iso [curbyte - 0x80];
1684 }
1685
1686 } else if (do_auto_detect) {
1687 switch (curbyte) {
1688 case '':
1689 case '':
1690 case '':
1691 case '':
1692 count_good_viscii += 2 * cl;
1693 count_good_tcvn += 2 * cl;
1694 break;
1695
1696 case '':
1697 case '':
1698 count_good_viscii += 2 * cl;
1699 count_good_tcvn += nc;
1700 break;
1701
1702 case '':
1703 case '':
1704 case '':
1705 case '':
1706 case '':
1707 case '':
1708 case '':
1709 case '':
1710 count_good_viscii += nc;
1711 count_good_tcvn += 2 * cl;
1712 break;
1713
1714 case '\000':
1715 case '':
1716 case '':
1717 case '':
1718 case '':
1719 case '':
1720 case '':
1721 case '':
1722 case '':
1723 case '':
1724 case 0x1A:
1725 case '':
1726 case '':
1727 case '':
1728 case '':
1729 count_good_viscii += nc;
1730 count_good_tcvn += nc;
1731 break;
1732 }
1733 }
1734
1735 /* CJK/Han encoding auto-detection */
1736 /* maintain GB18030 detection state */
1737 if (detect_gb18030 != 0) {
1738 detect_gb18030 --;
1739 }
1740
1741 /* perform detection on bytes after non-ASCII bytes */
1742 if (last_cjkbyte >= 0x80) {
1743 if (curbyte >= 0x80) {
1744 if (curbyte != 0xA0 || last_cjkbyte != 0xA0) {
1745 count_good_cjk += cl;
1746 }
1747 } else if (curbyte < 0x30) {
1748 count_bad_cjk += cl;
1749 } else if (curbyte <= 0x39) {
1750 if (detect_gb18030 == 1) {
1751 count_good_cjk += cl;
1752 }
1753 } else {
1754 count_weak_cjk ++;
1755 }
1756
1757 /* detect specific CJK encoding */
1758 if (do_auto_detect && ! utf16_file) {
1759
1760 /* CJK character set ranges
1761 GB GBK 81-FE 40-7E, 80-FE
1762 GB18030 81-FE 30-39 81-FE 30-39
1763 Big5 Big5-HKSCS 87-FE 40-7E, A1-FE
1764 CNS EUC-TW A1-FE A1-FE
1765 8E A1-A7 A1-FE A1-FE
1766
1767 EUC-JP 8E A1-DF
1768 A1-A8 A1-FE
1769 B0-F4 A1-FE
1770 8F A2,A6,A7,A9-AB,B0-ED A1-FE
1771 8F A1-FE A1-FE
1772 EUC-JIS X 0213 8E A1-DF
1773 A1-FE A1-FE
1774 8F A1,A3-A5,A8,AC-AF,EE-FE A1-FE
1775 Shift-JIS A1-DF
1776 81-84, 87-9F 40-7E, 80-FC
1777 E0-EA, ED-EE, FA-FC 40-7E, 80-FC
1778 Shift-JIS X 0213 A1-DF
1779 81-9F 40-7E, 80-FC
1780 E0-FC 40-7E, 80-FC
1781
1782 UHC UHC 81-FE 41-5A, 61-7A, 81-FE
1783 Johab 84-DE 31-7E, 81-FE
1784 E0-F9 31-7E, 81-FE
1785 */
1786
1787 if (last_cjkbyte >= 0x81 && last_cjkbyte <= 0xFE) {
1788
1789 /* Big5 Big5-HKSCS 87-FE 40-7E, A1-FE
1790 */
1791 if (last_cjkbyte >= 0x87 /* && last_cjkbyte <= 0xFE */
1792 && ((curbyte >= 0x40 && curbyte <= 0x7E)
1793 || (curbyte >= 0xA1 && curbyte <= 0xFE)))
1794 {
1795 count_big5 ++;
1796 followbyte = 0;
1797 } else {
1798 /*count_big5 --;*/
1799 }
1800
1801 /* GB GBK 81-FE 40-7E, 80-FE
1802 GB18030 81-FE 30-39 81-FE 30-39
1803 UHC UHC 81-FE 41-5A, 61-7A, 81-FE
1804 */
1805 /* if (last_cjkbyte >= 0x81 && last_cjkbyte <= 0xFE) { */
1806 if ((curbyte >= 0x40 && curbyte <= 0x7E)
1807 || (curbyte >= 0x80 && curbyte <= 0xFE)) {
1808 count_gb ++;
1809 followbyte = 0;
1810
1811 if ((curbyte >= 0x41 && curbyte <= 0x5A)
1812 || (curbyte >= 0x61 && curbyte <= 0x7A)
1813 || (curbyte >= 0x81 /* && curbyte <= 0xFE */))
1814 {
1815 count_uhc ++;
1816 } else {
1817 count_uhc --;
1818 }
1819
1820 } else if (curbyte >= 0x30 && curbyte <= 0x39) {
1821 if (detect_gb18030 == 1) {
1822 count_gb += 5;
1823 } else {
1824 count_gb --;
1825 detect_gb18030 = 3;
1826 }
1827 }
1828 /* } */
1829
1830 /* JIS EUC-JP A1-FE A1-FE
1831 8F A1-FE A1-FE
1832 8E A1-DF
1833 */
1834 if (last_cjkbyte >= 0xA1 /* && last_cjkbyte <= 0xFE */
1835 && curbyte >= 0xA1 && curbyte <= 0xFE)
1836 {
1837 followbyte = 0;
1838 if (last2_cjkbyte == 0x8F) {
1839 if (last_cjkbyte == 0xA1
1840 || (last_cjkbyte >= 0xA3 && last_cjkbyte <= 0xA5)
1841 || last_cjkbyte == 0xA8
1842 || (last_cjkbyte >= 0xAC && last_cjkbyte <= 0xAF)
1843 || last_cjkbyte >= 0xEE
1844 ) {
1845 count_jisx += 3;
1846 count_jp --;
1847 } else {
1848 count_jp += 3;
1849 count_jisx --;
1850 }
1851 } else {
1852 count_jisx ++;
1853 if (last_cjkbyte <= 0xA8 ||
1854 (last_cjkbyte >= 0xB0 && last_cjkbyte <= 0xF4)
1855 ) {
1856 count_jp ++;
1857 } else {
1858 count_jp --;
1859 }
1860 }
1861 }
1862 if (last_cjkbyte == 0x8E
1863 && curbyte >= 0xA1 && curbyte <= 0xDF)
1864 {
1865 followbyte = 0;
1866 count_jisx += 3;
1867 count_jp += 3;
1868 } else {
1869 count_jisx --;
1870 count_jp --;
1871 }
1872
1873 /* JIS Shift-JIS A1-DF
1874 81-9F, E0-EF 40-7E, 80-FC
1875 */
1876 if (last_cjkbyte >= 0xA1 && last_cjkbyte <= 0xDF) {
1877 if (curbyte >= 0xA1 && curbyte <= 0xDF) {
1878 /* two consecutive single-byte SJIS characters */
1879 followbyte = 0;
1880 count_sjis += 1;
1881 count_sjisx += 1;
1882 count_sjis1 ++;
1883 } else {
1884 count_sjis --;
1885 count_sjisx --;
1886 }
1887 } else {
1888 if (curbyte >= 0x40 && curbyte <= 0xFC && curbyte != 0x7F) {
1889 followbyte = 0;
1890 count_sjisx ++;
1891 if (last_cjkbyte <= 0x84
1892 || (last_cjkbyte >= 0x87 && last_cjkbyte <= 0x9F)
1893 || (last_cjkbyte >= 0xE0 && last_cjkbyte <= 0xEA)
1894 || (last_cjkbyte >= 0xED && last_cjkbyte <= 0xEE)
1895 || last_cjkbyte >= 0xFA
1896 ) {
1897 count_sjis ++;
1898 }
1899 } else {
1900 count_sjis --;
1901 }
1902 }
1903
1904 /* Johab Johab 84-DE, E0-F9 31-7E, 81-FE
1905 */
1906 if (((last_cjkbyte >= 0x84 && last_cjkbyte <= 0xDE)
1907 || (last_cjkbyte >= 0xE0 && last_cjkbyte <= 0xF9))
1908 && ((curbyte >= 0x31 && curbyte <= 0x7E)
1909 || (curbyte >= 0x81 && curbyte <= 0xFE)))
1910 {
1911 count_johab ++;
1912 } else {
1913 count_johab --;
1914 }
1915
1916 #ifdef sjis_broken
1917 } /* if (last_cjkbyte >= 0x81 && last_cjkbyte <= 0xFE) */
1918 else {
1919 if (curbyte >= 0x40 && curbyte <= 0xFC && curbyte != 0x7F) {
1920 followbyte = 0;
1921 count_sjis ++;
1922 } else {
1923 count_sjis --;
1924 }
1925 #endif
1926 }
1927 } /* if do_auto_detect */
1928 }
1929
1930 /* shift CJK byte state */
1931 last2_cjkbyte = last_cjkbyte;
1932 last_cjkbyte = followbyte;
1933
1934 /* end character encoding auto-detection */
1935 }
1936
1937 #define dont_debug_read_error
1938
1939 int
get_line(fd,buffer,len,do_auto_detect)1940 get_line (fd, buffer, len, do_auto_detect)
1941 int fd;
1942 char buffer [maxLINElen];
1943 int * len;
1944 FLAG do_auto_detect;
1945 {
1946 register char * cur_pos = current_bufpos;
1947 char * begin = buffer;
1948 char * fini = (char *) (buffer + maxLINElen - 2) /* leave space for '\n\0' */;
1949 int Ulineend_state = 0;
1950 character curbyte = '\0';
1951 int ret = FINE;
1952 got_lineend = lineend_NONE;
1953
1954 #define dont_debug_buffer_overflow
1955 #ifdef debug_buffer_overflow
1956 fini = (char *) (buffer + 20) /* debug overlong line input */;
1957 #endif
1958
1959 /* read one line */
1960 do { /* read one byte */
1961 if (cur_pos == last_bufpos) {
1962 if (utf16_file && consider_transform) {
1963 if (next_byte >= fini_byte) {
1964 do {
1965 interrupted = False;
1966 if (alloc_UTF16buf () == ERRORS) {
1967 return ERRORS;
1968 }
1969 read_bytes = read (fd, UTF16buf, filebuflen);
1970 /* if (read_bytes > 0) {
1971 file_position += read_bytes;
1972 }
1973 */
1974 trace_read (("read utf16 (%d, %X, %d) %d\n", fd, UTF16buf, filebuflen, read_bytes));
1975 } while (
1976 (read_chars == -1 && geterrno () == EINTR)
1977 || (read_chars <= 0 && interrupted)
1978 );
1979
1980 if (read_bytes <= 0) {
1981 read_chars = read_bytes;
1982 break;
1983 }
1984 next_byte = UTF16buf;
1985 fini_byte = & UTF16buf [read_bytes];
1986 }
1987 read_chars = UTF16_transform (utf16_little_endian, filebuf, filebuflen, & next_byte, fini_byte);
1988 if (count_1read_op == 0) {
1989 if (strncmp (filebuf, "", 3) == 0) {
1990 /* already transformed UTF-8 BOM */
1991 BOM = True;
1992 }
1993 count_1read_op = 1;
1994 }
1995 } else {
1996 do {
1997 interrupted = False;
1998 read_chars = read (fd, filebuf, filebuflen);
1999 #ifdef debug_read_error
2000 read_chars = -1;
2001 errno = EIO;
2002 #endif
2003 /* if (read_chars > 0) {
2004 file_position += read_chars;
2005 }
2006 */
2007 trace_read (("read (%d, %X, %d) %d\n", fd, filebuf, filebuflen, read_chars));
2008 } while (
2009 (read_chars == -1 && geterrno () == EINTR)
2010 || (read_chars <= 0 && interrupted)
2011 );
2012
2013 if (read_chars <= 0) {
2014 break;
2015 }
2016
2017 if (ebcdic_file && consider_transform) {
2018 character * epoi = filebuf;
2019 int i;
2020 mapped_text = True; /* enable lookup_encodedchar */
2021 for (i = 0; i < read_chars; i ++) {
2022 * epoi = lookup_encodedchar (* epoi);
2023 epoi ++;
2024 }
2025 mapped_text = False;
2026 }
2027 }
2028 last_bufpos = & filebuf [read_chars];
2029 cur_pos = filebuf;
2030
2031 if (count_1read_op == 0 && consider_transform) {
2032 if (! (utf8_text && utf16_file)
2033 && strncmp (filebuf, "", 3) == 0) {
2034 /* UTF-8 BOM */
2035 BOM = True;
2036 } else if (strncmp (filebuf, "\376\377", 2) == 0
2037 && do_auto_detect) {
2038 /* big endian UTF-16 BOM */
2039 (void) set_text_encoding (":16", ' ', "BOM 16");
2040 BOM = True;
2041 /* strip converted UTF-8 BOM from text */
2042 cur_pos += 3;
2043 } else if (strncmp (filebuf, "\377\376", 2) == 0
2044 && do_auto_detect) {
2045 /* little-endian UTF-16 BOM */
2046 (void) set_text_encoding (":61", ' ', "BOM 61");
2047 BOM = True;
2048 /* strip converted UTF-8 BOM from text */
2049 cur_pos += 3;
2050 } else if (utf8_text && utf16_file) { /* UTF-16 pre-selection */
2051 } else if (do_auto_detect) {
2052 /* UTF-16 auto-detection */
2053 char * sp = filebuf;
2054 int even_0 = 0;
2055 int odd_0 = 0;
2056 FLAG odd = False;
2057 while (sp < last_bufpos) {
2058 if (* sp ++ == '\0') {
2059 if (odd) {
2060 odd_0 ++;
2061 } else {
2062 even_0 ++;
2063 }
2064 }
2065 odd = ! odd;
2066 }
2067 if (even_0 > read_chars / 133
2068 && even_0 > 2 * (odd_0 + 1)) {
2069 /* big endian UTF-16 */
2070 (void) set_text_encoding (":16", ' ', "detect 16");
2071 } else if (odd_0 > read_chars / 133
2072 && odd_0 > 2 * (even_0 + 1)) {
2073 /* little-endian UTF-16 */
2074 (void) set_text_encoding (":61", ' ', "detect 61");
2075 }
2076 }
2077
2078 if (utf16_file) {
2079 /* do_auto_detect = False; */
2080
2081 /* move UTF-16 input to UTF-16 buffer */
2082 if (alloc_UTF16buf () == ERRORS) {
2083 return ERRORS;
2084 }
2085 memcpy (UTF16buf, filebuf, (unsigned int) read_chars);
2086 read_bytes = read_chars;
2087 next_byte = UTF16buf;
2088 fini_byte = & UTF16buf [read_bytes];
2089
2090 /* transform to UTF-8 */
2091 read_chars = UTF16_transform (utf16_little_endian, filebuf, filebuflen, & next_byte, fini_byte);
2092 last_bufpos = & filebuf [read_chars];
2093 }
2094 count_1read_op = 1;
2095 }
2096 }
2097
2098 /* detect if no more lines available */
2099 if (cur_pos == last_bufpos) {
2100 read_chars = 0;
2101 break;
2102 }
2103
2104 curbyte = * cur_pos ++;
2105
2106
2107 auto_detect_byte (curbyte, do_auto_detect);
2108
2109
2110 /* detect lineends */
2111
2112 /* NUL character handling */
2113 if (curbyte == '\0') {
2114 * buffer ++ = '\n';
2115 got_lineend = lineend_NUL;
2116 ret = NUL_LINE;
2117 break;
2118 }
2119
2120 /* handle CR/CRLF lookahead */
2121 if (lineends_detectCR && curbyte != '\n' && buffer != begin && * (buffer - 1) == '\r') {
2122 * (buffer - 1) = '\n';
2123 got_lineend = lineend_CR;
2124 cur_pos --;
2125 count_lineend_CR ++; /* count Mac lines */
2126 if (lineends_CRtoLF) {
2127 set_modified ();
2128 got_lineend = lineend_LF;
2129 if (lineends_LFtoCRLF) {
2130 got_lineend = lineend_CRLF;
2131 }
2132 }
2133 break;
2134 }
2135 if (curbyte == '\n' && buffer != begin && * (buffer - 1) == '\r') {
2136 * (buffer - 1) = '\n';
2137 got_lineend = lineend_CRLF;
2138 count_lineend_CRLF ++; /* count MSDOS lines */
2139 if (lineends_CRLFtoLF) {
2140 set_modified ();
2141 got_lineend = lineend_LF;
2142 }
2143 break;
2144 }
2145
2146 /* handle LF */
2147 if (curbyte == '\n') {
2148 * buffer ++ = '\n';
2149 got_lineend = lineend_LF;
2150 count_lineend_LF ++; /* count Unix lines */
2151 if (lineends_LFtoCRLF) {
2152 set_modified ();
2153 got_lineend = lineend_CRLF;
2154 }
2155 break;
2156 }
2157
2158 /* check multi-byte line ends (other than CRLF) */
2159 if ((loading && utf8_lineends && (do_auto_detect || utf8_text))
2160 || (pasting && (utf8_text || (pastebuf_utf8 && ! cjk_text)))
2161 ) {
2162 if (Ulineend_state == 0 && curbyte == (character) 0xE2) {
2163 Ulineend_state = 1;
2164 } else if (Ulineend_state == 0 && curbyte == (character) 0xC2) {
2165 Ulineend_state = 8;
2166 } else if (Ulineend_state == 8) {
2167 if (curbyte == (character) 0x85) {
2168 Ulineend_state = 9;
2169 /* Unicode NEXT LINE U+0085 detected */
2170 buffer --;
2171 * buffer ++ = '\n';
2172 if (pasting && pastebuf_utf8 && ! utf8_text) {
2173 got_lineend = lineend_NL1;
2174 } else {
2175 got_lineend = lineend_NL2;
2176 }
2177 break;
2178 } else {
2179 Ulineend_state = 0;
2180 }
2181 } else if (Ulineend_state > 0) {
2182 if (Ulineend_state == 1 && curbyte == (character) 0x80) {
2183 Ulineend_state = 2;
2184 } else if (Ulineend_state == 2 && curbyte == (character) 0xA8) {
2185 Ulineend_state = 3;
2186 /* Unicode LS U+2028 detected */
2187 buffer -= 2;
2188 * buffer ++ = '\n';
2189 got_lineend = lineend_LS;
2190 break;
2191 } else if (Ulineend_state == 2 && curbyte == (character) 0xA9) {
2192 Ulineend_state = 3;
2193 /* Unicode PS U+2029 detected */
2194 buffer -= 2;
2195 * buffer ++ = '\n';
2196 got_lineend = lineend_PS;
2197 break;
2198 } else {
2199 Ulineend_state = 0;
2200 }
2201 }
2202 }
2203
2204 /* handle NL */
2205 if (((! do_auto_detect && loading) || pasting_encoded)
2206 && (
2207 (ebcdic_file && curbyte == 0x85)
2208 || (! ebcdic_file && ! utf16_file && ! no_char (code_NL) && curbyte == code_NL)
2209 )
2210 ) {
2211 * buffer ++ = '\n';
2212 got_lineend = lineend_NL1;
2213 count_lineend_NL ++; /* count ISO 8859 lines */
2214 if (lineends_CRtoLF) {
2215 set_modified ();
2216 got_lineend = lineend_LF;
2217 }
2218 break;
2219 }
2220
2221
2222 /* handle if line buffer full */
2223 if (buffer > fini - 6 && * cur_pos != '\n') {
2224 /* try not to split within a multi-byte character sequence */
2225 if (buffer == fini || /* last chance to split! */
2226 (cjk_text && (! do_auto_detect || pasting)
2227 ? charbegin (begin, buffer) == buffer
2228 : (* cur_pos & 0xC0) != 0x80
2229 ))
2230 {
2231 * buffer ++ = '\n';
2232 got_lineend = lineend_NONE;
2233 ret = SPLIT_LINE;
2234 break;
2235 }
2236 }
2237
2238
2239 /* copy byte from input buffer into line buffer */
2240 * buffer ++ = curbyte;
2241
2242 } while (1);
2243
2244 current_bufpos = cur_pos;
2245
2246
2247 /* handle errors and EOF */
2248 if (read_chars < 0) {
2249 return ERRORS;
2250 } else if (read_chars == 0) {
2251 if (buffer == begin) {
2252 return NO_INPUT;
2253 } else {
2254 /* consider incomplete UTF-8 sequence for auto-detection */
2255 if (count_utf_bytes > 0) {
2256 count_bad_utf ++;
2257 }
2258
2259 if (lineends_detectCR && curbyte == '\r') {
2260 * (buffer - 1) = '\n';
2261 got_lineend = lineend_CR;
2262 count_lineend_CR ++; /* count Mac lines */
2263 if (lineends_CRtoLF) {
2264 set_modified ();
2265 got_lineend = lineend_LF;
2266 if (lineends_LFtoCRLF) {
2267 got_lineend = lineend_CRLF;
2268 }
2269 }
2270 } else {
2271 if (loading) {
2272 /* Add '\n' to last line of file, for internal handling */
2273 * buffer ++ = '\n';
2274 }
2275 got_lineend = lineend_NONE;
2276 ret = NO_LINE;
2277 }
2278 }
2279 }
2280
2281 * buffer = '\0';
2282 * len = (int) (buffer - begin);
2283 return ret;
2284 }
2285
2286
2287 /*======================================================================*\
2288 |* File position handling *|
2289 \*======================================================================*/
2290
2291 #ifdef vms
2292 #define info_fn ".$mined"
2293 #else
2294 #define info_fn ".@mined"
2295 #endif
2296 #define info_dosfn "@MINED~1"
2297 #define mark_fn "@mined.mar"
2298
2299 static char * mark_file_out;
2300
2301
2302 /*
2303 * get_open_pos and save_open_pos look up and save the current file
2304 position in file info file
2305 For save_open_pos, line_number must be up-to-date
2306 */
2307 static
2308 void
escape_filename(fn_escaped,fn)2309 escape_filename (fn_escaped, fn)
2310 char * fn_escaped;
2311 char * fn;
2312 {
2313 char * ipoi = fn;
2314 char * opoi = fn_escaped;
2315 while (* ipoi) {
2316 if (* ipoi == '\n') {
2317 * opoi ++ = '\\';
2318 * opoi ++ = 'n';
2319 } else {
2320 if (* ipoi == '\\' || * ipoi == ' ') {
2321 * opoi ++ = '\\';
2322 }
2323 * opoi ++ = * ipoi;
2324 }
2325 ipoi ++;
2326 }
2327 * opoi ++ = ' ';
2328 * opoi = '\0';
2329 }
2330
2331 static
2332 void
get_open_pos(fn)2333 get_open_pos (fn)
2334 char * fn;
2335 {
2336 char * mark_file_in;
2337 int mark_fd = -1;
2338 FLAG use_unix_fn = True;
2339 #ifdef msdos
2340 if (! is_Windows ()) {
2341 use_unix_fn = False;
2342 }
2343 #endif
2344
2345 if (use_unix_fn) {
2346 mark_file_in = info_fn;
2347 mark_fd = open (mark_file_in, O_RDONLY | O_BINARY, 0);
2348 }
2349 if (mark_fd < 0) {
2350 mark_file_in = info_dosfn;
2351 mark_fd = open (mark_file_in, O_RDONLY | O_BINARY, 0);
2352 }
2353 if (mark_fd < 0) {
2354 mark_file_in = mark_fn;
2355 mark_fd = open (mark_file_in, O_RDONLY | O_BINARY, 0);
2356 }
2357
2358 if (mark_fd >= 0) {
2359 FLAG modif = modified;
2360 int dumlen;
2361 char fn_escaped [maxFILENAMElen * 2 + 1];
2362 int fnlen;
2363 escape_filename (fn_escaped, fn);
2364 fnlen = strlen (fn_escaped);
2365
2366 reset_get_line (False);
2367
2368 while (line_gotten (get_line (mark_fd, text_buffer, & dumlen, False))) {
2369 if (strncmp (fn_escaped, text_buffer, fnlen) == 0) {
2370 char * spoi = text_buffer + fnlen;
2371 int v4, v5, v6 = -1;
2372 int vq = -1;
2373 int vt = -1;
2374 int vtabexp = -1;
2375 open_linum = -1;
2376 open_col = 0;
2377 open_pos = 0;
2378 lines_per_page = 0;
2379 spoi = scan_int (spoi, & open_linum);
2380 spoi = scan_int (spoi, & open_col);
2381 if (open_col < 0) {
2382 /* indicates new character index semantics */
2383 open_pos = - open_col;
2384 }
2385 spoi = scan_int (spoi, & lines_per_page);
2386 spoi = scan_int (spoi, & v4);
2387 if (v4 >= 0) {
2388 JUSlevel = 1;
2389 spoi = scan_int (spoi, & v5);
2390 spoi = scan_int (spoi, & v6);
2391 if (v6 > 0) {
2392 first_left_margin = v4;
2393 next_left_margin = v5;
2394 right_margin = v6;
2395 }
2396 } else {
2397 JUSlevel = 0;
2398 }
2399
2400 /* get quote type */
2401 spoi = scan_int (spoi, & vq);
2402 if (vq >= 0) {
2403 if (smart_quotes != VALID) {
2404 /* French spacing quote style and legacy int entries */
2405 if (vq == 70) {
2406 set_quote_style ("« »");
2407 } else if (vq == 1) {
2408 set_quote_style ("“” ‘’");
2409 } else if (vq == 2) {
2410 set_quote_style ("„“ ‚‘");
2411 } else if (vq == 3) {
2412 set_quote_style ("«» ‹›");
2413 } else if (vq == 4) {
2414 set_quote_style ("»« ›‹");
2415 } else if (vq == 5) {
2416 set_quote_style ("„” ‚’");
2417 } else if (vq == 6) {
2418 set_quote_style ("”” ’’");
2419 } else if (vq == 7) {
2420 set_quote_style ("»» ››");
2421 } else if (vq == 8) {
2422 set_quote_style ("『』 「」");
2423 } else {
2424 set_quote_type (default_quote_type);
2425 }
2426 }
2427 } else {
2428 /* string entries */
2429 char qs [maxMSGlen];
2430 char * qpoi = qs;
2431 /* skip leading space */
2432 while (* spoi == ' ') {
2433 spoi ++;
2434 }
2435 /* scan quote style indication */
2436 #define scan_French_quote_style
2437 #ifdef scan_French_quote_style
2438 qpoi = spoi;
2439 advance_utf8 (& spoi);
2440 if (* spoi == ' ') {
2441 spoi ++;
2442 }
2443 advance_utf8 (& spoi);
2444 if (* spoi == ' ') {
2445 spoi ++;
2446 }
2447 advance_utf8 (& spoi);
2448 if (* spoi == ' ') {
2449 spoi ++;
2450 }
2451 advance_utf8 (& spoi);
2452 * spoi ++ = '\0';
2453 if (smart_quotes != VALID) {
2454 set_quote_style (qpoi);
2455 }
2456 #else
2457 while ((character) * spoi > ' ') {
2458 * qpoi ++ = * spoi ++;
2459 }
2460 if (* spoi == ' ') {
2461 * qpoi ++ = * spoi ++;
2462 }
2463 while ((character) * spoi > ' ') {
2464 * qpoi ++ = * spoi ++;
2465 }
2466 * qpoi = '\0';
2467 if (smart_quotes != VALID) {
2468 set_quote_style (qs);
2469 }
2470 #endif
2471 }
2472
2473 /* get tab size */
2474 spoi = scan_int (spoi, & vt);
2475 if (vt >= 0 && ! tabsize_selected) {
2476 tabsize = vt;
2477 }
2478
2479 /* get keyboard mapping */
2480 /* skip leading space */
2481 while (* spoi == ' ') {
2482 spoi ++;
2483 }
2484 if ((character) * spoi > ' ' && ! explicit_keymap) {
2485 if (* spoi == '-' && * (spoi + 1) == '-') {
2486 spoi += 2;
2487 }
2488 setKEYMAP (spoi);
2489 }
2490 /* skip field */
2491 while ((character) * spoi > ' ') {
2492 spoi ++;
2493 }
2494
2495 /* get tab/space expand flag */
2496 spoi = scan_int (spoi, & vtabexp);
2497 if (vtabexp >= 0 && ! tabsize_selected) {
2498 if (vtabexp > 0) {
2499 expand_tabs = True;
2500 } else {
2501 expand_tabs = False;
2502 }
2503 }
2504 }
2505 }
2506 (void) close (mark_fd);
2507 clear_filebuf ();
2508
2509 /* prevent affecting the modified flag when loading the line number file */
2510 modified = modif;
2511 }
2512 }
2513
2514 /**
2515 Write file position and other information to file info file.
2516 Return False if writing to the file fails.
2517 */
2518 static
2519 FLAG
write_open_pos(fd,fn)2520 write_open_pos (fd, fn)
2521 int fd;
2522 char * fn;
2523 {
2524 int cur_pos = get_cur_pos ();
2525 char marktext [maxPROMPTlen];
2526 char * quote_style_marker;
2527
2528 #ifdef scan_French_quote_style
2529 quote_style_marker = quote_mark (quote_type, 0);
2530 #else
2531 if (spacing_quote_type (quote_type)) {
2532 /* French quote style */
2533 quote_style_marker = "70"; /* ASCII 'F' */
2534 } else {
2535 quote_style_marker = quote_mark (quote_type, 0);
2536 }
2537 #endif
2538
2539 if (JUSlevel > 0) {
2540 build_string (marktext, "%s%d %d %d %d %d %d %s %d %s-%s %d\n",
2541 fn, line_number, - cur_pos, lines_per_page,
2542 first_left_margin, next_left_margin, right_margin,
2543 quote_style_marker,
2544 tabsize,
2545 keyboard_mapping, last_keyboard_mapping,
2546 expand_tabs);
2547 } else {
2548 build_string (marktext, "%s%d %d %d -3 %s %d %s-%s %d\n",
2549 fn, line_number, - cur_pos, lines_per_page,
2550 quote_style_marker,
2551 tabsize,
2552 keyboard_mapping, last_keyboard_mapping,
2553 expand_tabs);
2554 }
2555 if (write (fd, marktext, strlen (marktext)) <= 0) {
2556 return False;
2557 } else {
2558 return True;
2559 }
2560 }
2561
2562 #ifdef old_marker_creation
2563 FLAG groom_info_files = True; /* groom once per dir */
2564 FLAG groom_info_file = True; /* groom once ... */
2565 #else
2566 FLAG groom_info_files = False; /* groom once per dir */
2567 FLAG groom_info_file = False; /* groom once ... */
2568 #endif
2569 FLAG groom_stat = False; /* stat files for grooming */
2570
2571 struct marker_entry {
2572 char * fn;
2573 char * info;
2574 struct marker_entry * next;
2575 };
2576
2577 static struct marker_entry * marker_list = 0;
2578
2579 static
2580 FLAG
put_marker_list(mlpoi,fn,info)2581 put_marker_list (mlpoi, fn, info)
2582 struct marker_entry * * mlpoi;
2583 char * fn;
2584 char * info;
2585 {
2586 if (! * mlpoi) {
2587 struct marker_entry * new = alloc (sizeof (struct marker_entry));
2588 if (new) {
2589 new->fn = dupstr (fn);
2590 new->info = dupstr (info);
2591 if (new->fn && new->info) {
2592 /* append new node */
2593 new->next = * mlpoi;
2594 * mlpoi = new;
2595 return True;
2596 }
2597 }
2598 } else if (streq ((* mlpoi)->fn, fn)) {
2599 char * _info = dupstr (info);
2600 if (_info) {
2601 /* update node with info */
2602 free_space ((* mlpoi)->info);
2603 (* mlpoi)->info = _info;
2604 return True;
2605 }
2606 } else {
2607 return put_marker_list (& (* mlpoi)->next, fn, info);
2608 }
2609 return False;
2610 }
2611
2612 static
2613 FLAG
write_marker_list(fd,ml)2614 write_marker_list (fd, ml)
2615 int fd;
2616 struct marker_entry * ml;
2617 {
2618 #ifndef VAXC
2619 FLAG ret = True;
2620 while (ml) {
2621 struct stat fstat_buf;
2622 if (! groom_stat || stat (ml->fn, & fstat_buf) == 0) {
2623 if (write (fd, ml->fn, strlen (ml->fn)) < 0) {
2624 ret = False;
2625 }
2626 if (write (fd, ml->info, strlen (ml->info)) < 0) {
2627 ret = False;
2628 }
2629 }
2630 ml = ml->next;
2631 }
2632 return ret;
2633 #else
2634 return True;
2635 #endif
2636 }
2637
2638 static
2639 void
clear_marker_list(mlpoi)2640 clear_marker_list (mlpoi)
2641 struct marker_entry * * mlpoi;
2642 {
2643 if (* mlpoi) {
2644 clear_marker_list (& (* mlpoi)->next);
2645 free_space ((* mlpoi)->fn);
2646 free_space ((* mlpoi)->info);
2647 free_space (* mlpoi);
2648 * mlpoi = 0;
2649 }
2650 }
2651
2652 /**
2653 Update file position and other information in file info file,
2654 reading all info and writing back.
2655 Housekeeping: clean up duplicate entries,
2656 also (if groom_stat) drop non-existing files.
2657 Return False if saving to the file fails.
2658 */
2659 static
2660 FLAG
rewrite_open_pos(fn,force)2661 rewrite_open_pos (fn, force)
2662 char * fn;
2663 int force;
2664 {
2665 char * mark_file_in;
2666 int mark_fd = -1;
2667 FLAG ret = True;
2668 FLAG use_unix_fn = True;
2669 #ifdef msdos
2670 if (! is_Windows ()) {
2671 use_unix_fn = False;
2672 }
2673 #endif
2674
2675 clear_marker_list (& marker_list); /* in case of previous error */
2676
2677 if (use_unix_fn) {
2678 mark_file_in = info_fn;
2679 mark_fd = open (mark_file_in, O_RDONLY);
2680 mark_file_out = info_fn;
2681 } else {
2682 mark_file_out = info_dosfn;
2683 }
2684 if (mark_fd < 0) {
2685 mark_file_in = info_dosfn;
2686 mark_fd = open (mark_file_in, O_RDONLY);
2687 }
2688 if (mark_fd < 0) {
2689 mark_file_in = mark_fn;
2690 mark_fd = open (mark_file_in, O_RDONLY);
2691 }
2692
2693 if (mark_fd < 0 && ! force) {
2694 return False;
2695 }
2696
2697 /* read file info */
2698 if (mark_fd >= 0) {
2699 /* scan mark file for old entries for same file and skip them;
2700 copy other entries to output file
2701 */
2702 int dumlen;
2703 int fnlen = strlen (fn);
2704 FLAG memerr = False;
2705 int ret;
2706
2707 reset_get_line (False); /* disable UTF-16 detection */
2708 while (line_gotten (ret = get_line (mark_fd, text_buffer, & dumlen, False))) {
2709 if (strncmp (fn, text_buffer, fnlen) == 0) {
2710 /* skip entry for this filename */
2711 } else {
2712 /* scan file name */
2713 char * mf = text_buffer;
2714 char fn_buffer [maxFILENAMElen];
2715 char * fnpoi = fn_buffer;
2716 int duml = -1;
2717 while (* mf && * mf != '\n' && * mf != ' ') {
2718 if (* mf == '\\') {
2719 mf ++;
2720 }
2721 * fnpoi ++ = * mf ++;
2722 }
2723 * fnpoi = '\0';
2724 /* put marker file line into list
2725 fn_buffer: file name (unescaped)
2726 mf: remainder of info line
2727 */
2728 (void) scan_int (mf, & duml);
2729 if (duml >= 0) {
2730 if (! put_marker_list (& marker_list, fn_buffer, mf)) {
2731 memerr = True;
2732 break;
2733 }
2734 }
2735 }
2736 }
2737 (void) close (mark_fd);
2738 clear_filebuf ();
2739 if (memerr || ret == ERRORS) {
2740 return False;
2741 }
2742 }
2743
2744 /* migrate to new info file, i.e. delete the old one if different */
2745 if (mark_fd >= 0 && mark_file_out != mark_file_in) {
2746 if (delete_file (mark_file_in) < 0) {
2747 /* deletion failed, stay with old file */
2748 mark_file_out = mark_file_in;
2749 }
2750 }
2751
2752 mark_fd = open (mark_file_out, O_WRONLY | O_TRUNC | O_BINARY | O_CREAT, fprot0);
2753 if (mark_fd < 0) {
2754 return False;
2755 }
2756
2757 /* write file info */
2758 ret = write_marker_list (mark_fd, marker_list);
2759 clear_marker_list (& marker_list);
2760
2761 groom_stat = groom_info_file; /* file checking once */
2762 groom_info_file = False; /* ... suppress next time */
2763
2764 if (* fn == ' ') {
2765 /* dummy file name, grooming only, skip writing new entry */
2766 } else {
2767 ret &= write_open_pos (mark_fd, fn);
2768 }
2769 if (close (mark_fd) < 0) {
2770 ret = False;
2771 }
2772 return ret;
2773 }
2774
2775
2776 /**
2777 Save file position and other information in file info file.
2778 Return False if saving to the file fails.
2779 */
2780 static
2781 FLAG
save_open_pos(fn,force)2782 save_open_pos (fn, force)
2783 char * fn;
2784 int force;
2785 {
2786 if (fn [0] != '\0') {
2787 char fn_escaped [maxFILENAMElen * 2 + 1];
2788 escape_filename (fn_escaped, fn);
2789 return rewrite_open_pos (fn_escaped, force);
2790 } else {
2791 return True;
2792 }
2793 }
2794
2795
2796 /*
2797 * Save current file pos
2798 */
2799 void
SAVPOS()2800 SAVPOS ()
2801 {
2802 if (file_name [0] != '\0') {
2803 fstatus ("Remembering file position", -1L, -1L);
2804 if (save_open_pos (file_name, 1) == False) {
2805 error2 ("Error when saving file position to ", mark_file_out);
2806 }
2807 }
2808 }
2809
2810 void
GROOM_INFO()2811 GROOM_INFO ()
2812 {
2813 (void) rewrite_open_pos (" ", True);
2814 }
2815
2816
2817 /*======================================================================*\
2818 |* Determine derived file names *|
2819 \*======================================================================*/
2820
2821 static
2822 char *
get_directory(dir)2823 get_directory (dir)
2824 char * dir;
2825 {
2826 if (is_absolute_path (dir)) {
2827 return dir;
2828 } else {
2829 if (mkdir (dir, 0700) == 0 || geterrno () == EEXIST) {
2830 /* local directory OK */
2831 return dir;
2832 } else {
2833 return ".";
2834 }
2835 }
2836 }
2837
2838 static
2839 char *
get_recovery_name(fn)2840 get_recovery_name (fn)
2841 char * fn;
2842 {
2843 #ifdef vms
2844 #define recovery_tag "$"
2845 #else
2846 #define recovery_tag "#"
2847 #endif
2848
2849 static char rn [maxFILENAMElen];
2850 char * dirname;
2851 char * bn = getbasename (fn);
2852
2853 /* "dir/name" -> "${AUTO_SAVE_DIRECTORY:-dir}", bn "name" */
2854 if (recover_directory) {
2855 dirname = get_directory (recover_directory);
2856 } else if (bn == fn) {
2857 dirname = ".";
2858 } else { /* copy and clip dir name from path name */
2859 char * fini = rn + (bn - fn) - 1;
2860 strcpy (rn, fn);
2861 dirname = rn;
2862 #ifdef vms
2863 if (* fini == ']' || * fini == '/' || * fini == ':') {
2864 fini ++;
2865 } else { /* just in case */
2866 fini ++;
2867 * fini = ':';
2868 fini ++;
2869 }
2870 #endif
2871 * fini = '\0';
2872 }
2873
2874 /* compose recovery dir and file name */
2875 if (streq (dirname, ".")) {
2876 #ifdef vms
2877 /* force $...$ to refer to local file
2878 in case it's a logical name
2879 */
2880 strcpy (rn, "[]");
2881 #else
2882 strcpy (rn, "");
2883 #endif
2884 } else {
2885 if (dirname != rn) {
2886 strcpy (rn, dirname);
2887 }
2888 #ifndef vms
2889 strappend (rn, "/", maxFILENAMElen);
2890 #endif
2891 }
2892 strappend (rn, recovery_tag, maxFILENAMElen);
2893 strappend (rn, bn, maxFILENAMElen);
2894 strappend (rn, recovery_tag, maxFILENAMElen);
2895 return rn;
2896 }
2897
2898 #ifndef vms
2899
2900 static
2901 char *
get_backup_name(file_name)2902 get_backup_name (file_name)
2903 char * file_name;
2904 {
2905 static char backup_name [maxFILENAMElen];
2906 char * dirname;
2907 char * prefix;
2908 char newsuffix [30];
2909
2910 if (! backup_mode) {
2911 return NIL_PTR;
2912 }
2913
2914 /* "dir/name" -> "${BACKUP_DIRECTORY:-dir}", prefix "name" */
2915 prefix = getbasename (file_name);
2916 if (backup_directory) {
2917 dirname = get_directory (backup_directory);
2918 } else if (prefix == file_name) {
2919 dirname = ".";
2920 } else { /* copy and clip dir name from path name */
2921 strcpy (backup_name, file_name);
2922 dirname = backup_name;
2923 dirname [prefix - file_name - 1] = '\0';
2924 }
2925
2926 if (backup_mode == 's') { /* simple */
2927 strcpy (newsuffix, "~");
2928 } else {
2929 DIR * dir;
2930 struct dirent * direntry;
2931
2932 int ver_e = 0;
2933 int ver_v = 0;
2934 int maxver;
2935
2936 dir = opendir (dirname);
2937 if (! dir) {
2938 error2 ("Cannot open directory ", dirname);
2939 return NIL_PTR;
2940 }
2941
2942 while ((direntry = readdir (dir)) != 0) {
2943 if (strisprefix (prefix, direntry->d_name)) {
2944 char * suffix = direntry->d_name + strlen (prefix);
2945 int ver = -1;
2946 char * afterver;
2947 if (streq (suffix, "~")) {
2948 /* simple backup file */
2949 } else if (* suffix == ';') {
2950 /* check VMS style numbered backup file */
2951 suffix ++;
2952 afterver = scan_int (suffix, & ver);
2953 if (ver > 0 && * afterver == '\0') {
2954 /* VMS style numbered backup file */
2955 if (ver > ver_v) {
2956 ver_v = ver;
2957 }
2958 }
2959 } else if (* suffix == '.' && * (suffix + 1) == '~') {
2960 /* check emacs style numbered backup file */
2961 suffix += 2;
2962 afterver = scan_int (suffix, & ver);
2963 if (ver > 0 && * afterver == '~' && * (afterver + 1) == '\0') {
2964 /* emacs style numbered backup file */
2965 if (ver > ver_e) {
2966 ver_e = ver;
2967 }
2968 }
2969 }
2970 }
2971 }
2972 closedir (dir);
2973 maxver = ver_e > ver_v ? ver_e : ver_v;
2974
2975 if (backup_mode == 'v' || (ver_v > ver_e && backup_mode != 'e')) {
2976 /* VMS style backup filename */
2977 build_string (newsuffix, ";%d", maxver + 1);
2978 } else if (backup_mode == 'e' || backup_mode == 'n' || ver_e > 0) {
2979 /* emacs style backup filename */
2980 build_string (newsuffix, ".~%d~", maxver + 1);
2981 } else { /* backup_mode == 'a' going simple mode */
2982 /* simple backup filename */
2983 strcpy (newsuffix, "~");
2984 }
2985 }
2986
2987 /* compose backup dir and file name */
2988 if (streq (dirname, ".")) {
2989 strcpy (backup_name, "");
2990 } else {
2991 if (dirname != backup_name) {
2992 strcpy (backup_name, dirname);
2993 }
2994 strappend (backup_name, "/", maxFILENAMElen);
2995 }
2996 strappend (backup_name, prefix, maxFILENAMElen);
2997
2998 /* append backup suffix */
2999 if (strlen (backup_name) + strlen (newsuffix) >= sizeof (backup_name)) {
3000 return NIL_PTR;
3001 } else {
3002 strcat (backup_name, newsuffix);
3003 return backup_name;
3004 }
3005 }
3006
3007 #endif
3008
3009 #ifdef use_locking
3010
3011 static
3012 char *
get_lockfile_name(fn)3013 get_lockfile_name (fn)
3014 char * fn;
3015 {
3016 static char lf [maxFILENAMElen];
3017 char * bn = getbasename (fn);
3018
3019 /* "dir/name" -> "dir", bn "name" */
3020 if (bn == fn) {
3021 #ifdef vms
3022 strcpy (lf, "$$"); /* not used anyway */
3023 #else
3024 strcpy (lf, ".#");
3025 #endif
3026 } else { /* copy and clip dir name from path name */
3027 strcpy (lf, fn);
3028 #ifdef vms
3029 #error fix this, see get_recovery_name
3030 #endif
3031 lf [bn - fn - 1] = '\0';
3032 #ifdef vms
3033 strappend (lf, "$$", maxFILENAMElen); /* not used anyway */
3034 #else
3035 strappend (lf, "/.#", maxFILENAMElen);
3036 #endif
3037 }
3038 strappend (lf, bn, maxFILENAMElen);
3039
3040 return lf;
3041 }
3042
3043 #endif
3044
3045
3046 /*======================================================================*\
3047 |* Handle file locking *|
3048 \*======================================================================*/
3049
3050 #ifdef use_locking
3051
3052 static
3053 int
getsymboliclink(name,target,size)3054 getsymboliclink (name, target, size)
3055 char * name;
3056 char * target;
3057 int size;
3058 {
3059 int ret = readlink (name, target, size - 1);
3060 if (ret >= 0) {
3061 target [ret] = '\0';
3062 } else {
3063 /* try to read cygwin pseudo symbolic link;
3064 on network file systems, cygwin is likely to create a text file
3065 rather than a symbolic link
3066 */
3067 int fd = open (name, O_RDONLY | O_BINARY, 0);
3068 int rd;
3069 char linkbuf [maxLINElen];
3070 if (fd < 0) {
3071 return fd;
3072 }
3073 rd = read (fd, linkbuf, sizeof (linkbuf));
3074 if (rd > 0 && strisprefix ("!<symlink>��", linkbuf)) {
3075 char * linkpoi = linkbuf + 12;
3076 char * linkend = linkbuf + rd;
3077 clear_UTF16_transform ();
3078 rd = UTF16_transform (True, target, size, & linkpoi, linkend);
3079 target [rd] = '\0';
3080 ret = rd;
3081 } else if (rd == 0) {
3082 /* indicate empty pseudo-link (plain file) */
3083 ret = 0;
3084 } else {
3085 ret = -1;
3086 }
3087 (void) close (fd);
3088 }
3089 return ret;
3090 }
3091
3092 static
3093 void
setlocktarget(target)3094 setlocktarget (target)
3095 char * target;
3096 {
3097 char hn [maxFILENAMElen];
3098 if (gethostname (hn, sizeof (hn)) == 0) {
3099 hn [sizeof (hn) - 1] = '\0';
3100 } else {
3101 strcpy (hn, "?");
3102 }
3103 build_string (target, "%s@%s.%d", getusername (), hn, (int) getpid ());
3104 }
3105
3106 #endif
3107
3108 /**
3109 Don't modify? - check whether file is view-only or locked
3110 */
3111 FLAG
dont_modify()3112 dont_modify ()
3113 {
3114 #ifndef use_locking
3115 if (viewonly) {
3116 viewonlyerr ();
3117 return True;
3118 } else {
3119 return False;
3120 }
3121 #else
3122 if (viewonly) {
3123 viewonlyerr ();
3124 return True;
3125 } else if (file_locked != False) {
3126 /* True (locked myself) or NOT_VALID (ignored / file readonly) */
3127 return False;
3128 } else if (file_name [0] == '\0') {
3129 return False;
3130 } else if (writable == False) {
3131 file_locked = NOT_VALID;
3132 return False;
3133 } else {
3134 char * lf = get_lockfile_name (file_name);
3135 char target [maxFILENAMElen];
3136 int ret = getsymboliclink (lf, target, sizeof (target));
3137 if (ret > 0) {
3138 /* non-empty lock file found: notify and set viewonly */
3139 char * dot;
3140 char msg [maxMSGlen];
3141
3142 dot = strchr (target, '.');
3143 if (dot) {
3144 /* shorten lock info display, keep unlock hint visible */
3145 * dot = '\0';
3146 }
3147 viewonly_locked = True;
3148 #ifdef delay_flags
3149 flags_changed = True;
3150 #else
3151 displayflags ();
3152 #endif
3153
3154 build_string (msg, "Notice: File is locked by %s; Unlock from File menu", target);
3155 status_fmt2 ("", msg);
3156 sleep (2); /* give time to see the notice */
3157
3158 return True;
3159 } else if (ret == 0) {
3160 /* empty pseudo (plain) lock file found;
3161 assume unlock workaround */
3162 file_locked = NOT_VALID; /* ? */
3163 return False;
3164 } else {
3165 /* no lock file, create one */
3166 setlocktarget (target);
3167 if (symlink (target, lf) == 0) {
3168 file_locked = True;
3169 } else {
3170 int err = geterrno ();
3171 if (err == EEXIST) {
3172 /* race condition after readlink () */
3173 viewonly_locked = True;
3174 flags_changed = True;
3175
3176 status_fmt2 ("", "Notice: File has just been locked by someone; Unlock from File menu");
3177 sleep (2); /* time to see notice */
3178 return True;
3179 } else if (0
3180 #ifdef ENOSYS
3181 || err == ENOSYS /* not on VMS */
3182 #endif
3183 #ifdef EOPNOTSUPP
3184 || err == EOPNOTSUPP /* ? */
3185 #endif
3186 #ifdef ENOTSUP
3187 || err == ENOTSUP /* ? */
3188 #endif
3189 ) {
3190 /* file system does not support symbolic links */
3191 file_locked = NOT_VALID;
3192 } else {
3193 error ("Cannot lock file");
3194 /* no sleep() here, editing effect would be postponed */
3195 file_locked = NOT_VALID; /* ? */
3196 }
3197 }
3198
3199 return False;
3200 }
3201 }
3202 #endif
3203 }
3204
3205 #ifdef use_locking
3206 static
3207 void
delete_lockfile(lf)3208 delete_lockfile (lf)
3209 char * lf;
3210 {
3211 if (delete_file (lf) < 0) {
3212 /* if the link cannot be deleted although created by mined before
3213 (unless if grabbing lock), that is likely due to weird
3214 network file system configuration and the link is likely not
3215 even a symbolic link but rather a plain file;
3216 check whether it's indeed a plain file and make it empty
3217 to indicate 'unlocked'
3218 */
3219 char target [maxFILENAMElen];
3220 if (readlink (lf, target, sizeof (target) - 1) < 0) {
3221 (void) truncate (lf, 0);
3222 }
3223 }
3224 }
3225 #endif
3226
3227 void
unlock_file()3228 unlock_file ()
3229 {
3230 #ifdef use_locking
3231 if (file_locked == True) {
3232 char * lf = get_lockfile_name (file_name);
3233 char target [maxFILENAMElen];
3234 if (getsymboliclink (lf, target, sizeof (target)) >= 0) {
3235 char mylocktext [maxFILENAMElen];
3236 setlocktarget (mylocktext);
3237 if (streq (target, mylocktext)) {
3238 delete_lockfile (lf);
3239 } else {
3240 /* don't delete if grabbed by somebody else */
3241 }
3242 #ifdef __CYGWIN__force_unlock_workaround
3243 } else {
3244 /* now obsolete by generic handling in getsymboliclink */
3245 /* on nfs file systems, cygwin is likely to create a
3246 text file rather than a symbolic link;
3247 delete anyway, as it is probably "my lock"
3248 (file_locked == True)
3249 */
3250 delete_lockfile (lf);
3251 #endif
3252 }
3253 }
3254 #endif
3255 file_locked = False;
3256 }
3257
3258 void
relock_file()3259 relock_file ()
3260 {
3261 loaded_from_filename = False;
3262 if (modified) {
3263 (void) dont_modify ();
3264 }
3265 }
3266
3267
3268 /**
3269 Grab file lock by other user.
3270 (emacs, joe: "steal")
3271 */
3272 void
grab_lock()3273 grab_lock ()
3274 {
3275 #ifdef use_locking
3276 char * lf = get_lockfile_name (file_name);
3277 char target [maxFILENAMElen];
3278 if (getsymboliclink (lf, target, sizeof (target)) >= 0) {
3279 /* it's really a lock file, maybe a pseudo (plain) lock file,
3280 or at least empty */
3281 delete_lockfile (lf);
3282 }
3283 #endif
3284 file_locked = False;
3285 viewonly_locked = False;
3286 }
3287
3288 /**
3289 Ignore file lock by other user.
3290 (emacs: "proceed", joe: "edit anyway")
3291 */
3292 void
ignore_lock()3293 ignore_lock ()
3294 {
3295 file_locked = NOT_VALID;
3296 viewonly_locked = False;
3297 }
3298
3299
3300 /*======================================================================*\
3301 |* Time check debugging *|
3302 \*======================================================================*/
3303
3304 #define dont_debug_check_modtime
3305
3306 #ifdef debug_check_modtime
3307 #undef dont_check_modtime
3308
3309 #include <time.h>
3310
3311 static
3312 void
trace_modtime(timepoi,tag,fn)3313 trace_modtime (timepoi, tag, fn)
3314 time_t * timepoi;
3315 char * tag;
3316 char * fn;
3317 {
3318 char timbuf [99];
3319 strftime (timbuf, sizeof (timbuf), "%G-%m-%d %H:%M:%S", localtime (timepoi));
3320 printf ("%s (%s) %s\n", timbuf, tag, fn);
3321 debuglog (tag, timbuf, fn);
3322 }
3323
3324 #else
3325 #define trace_modtime(timepoi, tag, fn)
3326 #endif
3327
3328
3329 /*======================================================================*\
3330 |* File operations *|
3331 \*======================================================================*/
3332
3333 #define dont_debug_filter
3334
3335 static
3336 int
is_dev(fn,fsbufpoi)3337 is_dev (fn, fsbufpoi)
3338 char * fn;
3339 struct stat * fsbufpoi;
3340 {
3341 #ifndef VAXC
3342 if (S_ISCHR (fsbufpoi->st_mode) || S_ISBLK (fsbufpoi->st_mode)) {
3343 #ifdef __CYGWIN__
3344 if (streq (fn, "/dev/clipboard")) {
3345 return 0;
3346 }
3347 #endif
3348 return 1;
3349 }
3350 #endif
3351 return 0;
3352 }
3353
3354 static
3355 FLAG
file_changed(fn,fstatpoi)3356 file_changed (fn, fstatpoi)
3357 char * fn;
3358 struct stat * fstatpoi;
3359 {
3360 if (filestat.st_mtime) {
3361 if (fstatpoi->st_mtime != filestat.st_mtime
3362 || fstatpoi->st_size != filestat.st_size
3363 #ifndef vms
3364 || fstatpoi->st_dev != filestat.st_dev
3365 || fstatpoi->st_ino != filestat.st_ino
3366 #endif
3367 ) {
3368 /*overwriteOK = False; will ask anyway; this would toggle wrong question */
3369 #ifdef flush_writable
3370 if (access (fn, W_OK) >= 0) {
3371 writable = True;
3372 } else {
3373 writable = False;
3374 }
3375 #else
3376 writable = True; /* may try on new or modified file */
3377 #endif
3378
3379 set_modified ();
3380 relock_file ();
3381
3382 #ifndef vms
3383 if (fstatpoi->st_dev != filestat.st_dev || fstatpoi->st_ino != filestat.st_ino) {
3384 return NOT_VALID; /* file replaced */
3385 }
3386 #endif
3387 return True; /* file modified */
3388 } else {
3389 return False; /* file unchanged */
3390 }
3391 } else {
3392 /* file appeared (if it did not exist before) */
3393 return VALID;
3394 }
3395 }
3396
3397
3398 static
3399 void
check_recovery_file(delay_msg)3400 check_recovery_file (delay_msg)
3401 FLAG delay_msg;
3402 {
3403 #ifndef VAXC
3404 char * recovery_fn = get_recovery_name (file_name);
3405 struct stat fstat_buf;
3406 int statres = stat (recovery_fn, & fstat_buf);
3407
3408 /* does a recovery file exist and is it newer? */
3409 if (statres == 0 && fstat_buf.st_mtime > filestat.st_mtime) {
3410 recovery_exists = True;
3411 if (delay_msg) {
3412 sleep (2) /* give time to see read error msg */;
3413 }
3414 status_fmt2 ("", "Notice: A newer recovery file exists; Recover from File menu");
3415 sleep (2); /* give time to see the notice */
3416 }
3417 #endif
3418 }
3419
3420 static
3421 void
update_file_name(newname,update_display,addtolist)3422 update_file_name (newname, update_display, addtolist)
3423 char * newname;
3424 FLAG update_display;
3425 FLAG addtolist;
3426 {
3427 loaded_from_filename = False;
3428
3429 /* Save file name */
3430 strncpy (file_name, newname, maxFILENAMElen);
3431 file_name [maxFILENAMElen - 1] = '\0';
3432
3433 if (addtolist) {
3434 filelist_add (dupstr (file_name), False);
3435 }
3436
3437 /* configure file name specific preferences */
3438 configure_preferences (False); /* restoring initial preferences first */
3439 /* fix yet unclear filename suffix-specific options... */
3440 if (update_display) { /* after writing */
3441 set_file_type_flags ();
3442 }
3443
3444 if (update_display) {
3445 /* fix syntax state in case highlighting was switched on */
3446 if (mark_HTML /* && not before... */) {
3447 update_syntax_state (header->next);
3448 }
3449
3450 /* could check whether a display-relevant option has been changed
3451 by configure_preferences above, e.g. hide_passwords or tabsize
3452 or ... - but better go the safe way and always:
3453 */
3454 RDwin ();
3455 }
3456 }
3457
3458
3459 /**
3460 called before a file is loaded.
3461 frees allocated space (linked list), initializes header/tail pointer
3462 */
3463 static
3464 void
clear_textbuf()3465 clear_textbuf ()
3466 {
3467 register LINE * line;
3468 register LINE * next_line;
3469
3470 /* Delete the whole list */
3471 for (line = header->next; line != tail; line = next_line) {
3472 next_line = line->next;
3473 if (line->allocated) {
3474 free_space (line->text);
3475 free_header (line);
3476 }
3477 }
3478
3479 /* header and tail should point to itself */
3480 line->next = line->prev = line;
3481 }
3482
3483
3484 static char prev_encoding_tag;
3485 static char * prev_text_encoding;
3486 static long nr_of_bytes = 0L;
3487 static long nr_of_chars = 0L;
3488 static long nr_of_utfchars = 0L;
3489 static long nr_of_cjkchars = 0L;
3490
3491 static
3492 void
encoding_auto_detection(empty,do_auto_detect)3493 encoding_auto_detection (empty, do_auto_detect)
3494 FLAG empty;
3495 FLAG do_auto_detect;
3496 {
3497 /* auto-detection stuff */
3498
3499 #ifdef debug_auto_detect
3500 printf ("good CJK %ld, weak CJK %ld, bad CJK %ld, good UTF %ld, bad UTF %ld\n",
3501 count_good_cjk, count_weak_cjk, count_bad_cjk, count_good_utf, count_bad_utf);
3502 printf ("iso %ld, cp1252 %ld, cp850 %ld, mac %ld, viscii %ld, tcvn %ld\n",
3503 count_good_iso, count_good_cp1252, count_good_cp850, count_good_mac, count_good_viscii, count_good_tcvn);
3504 printf ("big5 %ld, gb %ld, jp %ld, jis x %ld, sjis %ld, sjis x %ld, uhc %ld, johab %ld\n",
3505 count_big5, count_gb, count_jp, count_jisx, count_sjis, count_sjisx, count_uhc, count_johab);
3506 #endif
3507
3508 /* consider line-end types */
3509 count_good_cp1252 += 5 * count_lineend_CRLF;
3510 count_good_cp850 += 5 * count_lineend_CRLF;
3511 count_good_mac += 5 * count_lineend_CR;
3512 count_good_iso += count_lineend_LF;
3513 count_good_ebcdic += 5 * count_lineend_NL;
3514 count_sjis += 12 * count_lineend_CRLF;
3515 count_sjisx += 12 * count_lineend_CRLF;
3516
3517 /* heuristic adjustments */
3518 /*
3519 count_good_viscii *= 0.9;
3520 count_big5 *= 2;
3521 count_uhc *= 2;
3522 count_jp *= 3;
3523 count_jisx *= 3;
3524 count_sjis *= 1.5;
3525 count_sjisx *= 1.5;
3526 */
3527 count_sjis += count_sjis1 / 2;
3528 count_sjisx += count_sjis1 / 2;
3529
3530 #ifdef debug_auto_detect
3531 printf ("-> iso %ld, cp1252 %ld, cp850 %ld, mac %ld, viscii %ld, tcvn %ld\n",
3532 count_good_iso, count_good_cp1252, count_good_cp850, count_good_mac, count_good_viscii, count_good_tcvn);
3533 printf ("-> big5 %ld, gb %ld, jp %ld, jis x %ld, sjis %ld, sjis x %ld, uhc %ld, johab %ld\n",
3534 count_big5, count_gb, count_jp, count_jisx, count_sjis, count_sjisx, count_uhc, count_johab);
3535 #endif
3536
3537 /* remember text encoding before auto-detection (for CJK char counting) */
3538 prev_encoding_tag = text_encoding_tag;
3539
3540 /* filter out encodings that shall not be auto-detected */
3541 if (detect_encodings != NIL_PTR && * detect_encodings != '\0') {
3542 if (strchr (detect_encodings, 'G') == NIL_PTR) {
3543 count_gb = 0;
3544 }
3545 if (strchr (detect_encodings, 'B') == NIL_PTR) {
3546 count_big5 = 0;
3547 }
3548 if (strchr (detect_encodings, 'K') == NIL_PTR) {
3549 count_uhc = 0;
3550 }
3551 if (strchr (detect_encodings, 'J') == NIL_PTR) {
3552 count_jp = 0;
3553 }
3554 if (strchr (detect_encodings, 'X') == NIL_PTR) {
3555 count_jisx = 0;
3556 }
3557 if (strchr (detect_encodings, 'S') == NIL_PTR) {
3558 count_sjis = 0;
3559 count_sjis1 = 0;
3560 }
3561 if (strchr (detect_encodings, 'x') == NIL_PTR) {
3562 count_sjisx = 0;
3563 }
3564 if (strchr (detect_encodings, 'C') == NIL_PTR) {
3565 count_cns = 0;
3566 }
3567 if (strchr (detect_encodings, 'H') == NIL_PTR) {
3568 count_johab = 0;
3569 }
3570
3571 if (strpbrk (detect_encodings, "V8") == NIL_PTR) {
3572 count_good_viscii = 0;
3573 }
3574 if (strpbrk (detect_encodings, "N8") == NIL_PTR) {
3575 count_good_tcvn = 0;
3576 }
3577
3578 if (strpbrk (detect_encodings, "L8") == NIL_PTR) {
3579 count_good_iso = 0;
3580 }
3581 if (strpbrk (detect_encodings, "W8") == NIL_PTR) {
3582 count_good_cp1252 = 0;
3583 }
3584 if (strpbrk (detect_encodings, "P8") == NIL_PTR) {
3585 count_good_cp850 = 0;
3586 }
3587 if (strpbrk (detect_encodings, "M8") == NIL_PTR) {
3588 count_good_mac = 0;
3589 }
3590 if (strpbrk (detect_encodings, "E") == NIL_PTR) {
3591 count_good_ebcdic = 0;
3592 }
3593 #ifdef debug_auto_detect
3594 printf ("!> iso %ld, cp1252 %ld, cp850 %ld, mac %ld, viscii %ld, tcvn %ld\n",
3595 count_good_iso, count_good_cp1252, count_good_cp850, count_good_mac, count_good_viscii, count_good_tcvn);
3596 printf ("!> big5 %ld, gb %ld, jp %ld, jis x %ld, sjis %ld, sjis x %ld, uhc %ld, johab %ld\n",
3597 count_big5, count_gb, count_jp, count_jisx, count_sjis, count_sjisx, count_uhc, count_johab);
3598 #endif
3599 /* disable EBCDIC auto-detection after loading */
3600 count_good_ebcdic = 0;
3601 }
3602
3603 count_max_cjk = 0;
3604 max_cjk_tag = ' ';
3605 max_cjk_count (count_cns, 'C');
3606 max_cjk_count (count_johab, 'H');
3607 max_cjk_count (count_sjis, 'S');
3608 max_cjk_count (count_sjisx, 'x');
3609 max_cjk_count (count_jp, 'J');
3610 max_cjk_count (count_jisx, 'X');
3611 max_cjk_count (count_big5, 'B');
3612 max_cjk_count (count_uhc, 'K');
3613 max_cjk_count (count_gb, 'G');
3614
3615 /* Unicode encoding detection */
3616 if (! empty && do_auto_detect && ! utf16_file && ! ebcdic_file) {
3617 if (BOM ||
3618 #ifdef utf_preference_in_utf_term
3619 utf8_screen ? count_good_utf >= count_bad_utf
3620 : count_good_utf > count_bad_utf
3621 #else
3622 count_good_utf >= count_bad_utf
3623 #endif
3624 )
3625 {
3626 nr_of_chars = nr_of_utfchars;
3627 (void) set_text_encoding (NIL_PTR, 'U', "load: U");
3628 } else {
3629 if (strisprefix ("UTF", get_text_encoding ())) {
3630 (void) set_text_encoding (NIL_PTR, 'L', "load: L");
3631 } else {
3632 /* allow fallback to default encoding set on function entry */
3633 }
3634
3635 if (count_good_cjk >
3636 #ifdef poor_tuning_attempt
3637 count_bad_cjk * 2 + count_weak_cjk * 5 / nr_of_cjkchars
3638 #else
3639 count_bad_cjk * 2 + count_weak_cjk / 5
3640 #endif
3641 && count_good_cjk > count_good_iso / 2
3642 /* at least one CJK encoding is enabled for auto-detection */
3643 && max_cjk_tag != ' '
3644 ) {
3645 /* setting cjk_text = True; */
3646 (void) set_text_encoding (":??", ' ', "load: CJK");
3647 nr_of_chars = nr_of_cjkchars;
3648 } else if (
3649 /* count_good_viscii / 2 > count_good_cjk + count_weak_cjk && */
3650 /* count_good_viscii / 3 > count_good_cjk + count_weak_cjk - count_bad_cjk && */
3651 count_good_viscii > count_good_iso
3652 && count_good_viscii > count_good_cp1252
3653 && count_good_viscii > count_good_cp850
3654 && count_good_viscii > count_good_mac
3655 ) {
3656 (void) set_text_encoding ("VISCII", 'V', "detect");
3657 } else if (count_good_ebcdic > count_good_cp1252 &&
3658 count_good_ebcdic > count_good_cp850 &&
3659 count_good_ebcdic > count_good_mac) {
3660 (void) set_text_encoding ("CP1047", ' ', "detect");
3661 } else if (count_good_cp1252 > count_good_iso &&
3662 count_good_cp1252 > count_good_cp850 &&
3663 count_good_cp1252 > count_good_mac) {
3664 (void) set_text_encoding ("CP1252", 'W', "detect");
3665 } else if (count_good_cp850 > count_good_iso &&
3666 count_good_cp850 > count_good_mac) {
3667 (void) set_text_encoding ("CP850", 'P', "detect");
3668 } else if (count_good_mac > count_good_iso) {
3669 (void) set_text_encoding ("MacRoman", 'M', "detect");
3670 } else {
3671 /* leave default/fallback encoding */
3672 }
3673 }
3674 }
3675
3676 /* detect CJK encoding based on counters */
3677 if (! empty && cjk_text && do_auto_detect && ! (utf8_text && utf16_file)) {
3678 if (max_cjk_tag != ' ') {
3679 (void) set_text_encoding (NIL_PTR, max_cjk_tag, "detect CJK");
3680 }
3681 }
3682
3683 /* detect style of quotation marks */
3684 if (quote_type == 0) {
3685 determine_quote_style ();
3686 }
3687
3688 /* if text encoding changed, reset previous one, then toggle */
3689 if (! streq (prev_text_encoding, get_text_encoding ())) {
3690 char * new_text_encoding = get_text_encoding ();
3691 (void) set_text_encoding (prev_text_encoding, ' ', "load: prev");
3692 change_encoding (new_text_encoding);
3693 /* -> ... set_text_encoding (new_text_encoding, ' ', "men: change_encoding"); */
3694 }
3695
3696 /* end auto-detection stuff */
3697 }
3698
3699 static
3700 LINE *
read_file(fd,retpoi,do_auto_detect)3701 read_file (fd, retpoi, do_auto_detect)
3702 int fd;
3703 int * retpoi;
3704 FLAG do_auto_detect;
3705 {
3706 int ret = * retpoi;
3707 int len;
3708 LINE * line = header;
3709
3710 #ifdef debug_timing
3711 long time_get = 0;
3712 long time_check = 0;
3713 long time_line = 0;
3714 long time_count = 0;
3715 #endif
3716 mark_time (time_get);
3717
3718 while (line != NIL_LINE
3719 && line_gotten (ret = get_line (fd, text_buffer, & len, do_auto_detect)))
3720 {
3721 lineend_type new_return_type;
3722
3723 elapsed_mark_time (time_get, time_check);
3724 if (ret == NO_LINE || ret == SPLIT_LINE) {
3725 new_return_type = lineend_NONE;
3726 } else if (ret == NUL_LINE) {
3727 new_return_type = lineend_NUL;
3728 } else {
3729 new_return_type = got_lineend;
3730 }
3731 elapsed_mark_time (time_check, time_line);
3732 line = line_insert_after (line, text_buffer, len, new_return_type);
3733 elapsed_time (time_line);
3734
3735 /* if editing buffer cannot be filled (out of memory),
3736 assure that file is not accidentally overwritten
3737 with truncated buffer version
3738 */
3739 if (line == NIL_LINE) {
3740 set_error ("Out of memory for new lines when reading file");
3741 /* double-prevent incomplete overwriting */
3742 file_name [0] = '\0';
3743 }
3744
3745 mark_time (time_count);
3746 nr_of_bytes += (long) len;
3747
3748 if (do_auto_detect) {
3749 nr_of_chars += char_count (text_buffer);
3750 nr_of_utfchars += utf8_count (text_buffer);
3751 cjk_text = True;
3752 nr_of_cjkchars += char_count (text_buffer);
3753 cjk_text = False;
3754 } else if (utf8_text) {
3755 nr_of_chars += utf8_count (text_buffer);
3756 } else {
3757 nr_of_chars += char_count (text_buffer);
3758 }
3759
3760 if (new_return_type == lineend_NONE) {
3761 nr_of_chars --;
3762 nr_of_utfchars --;
3763 nr_of_cjkchars --;
3764 nr_of_bytes --;
3765 }
3766 elapsed_mark_time (time_count, time_get);
3767 }
3768 elapsed_time (time_get);
3769 #ifdef debug_timing
3770 printf ("get %ld, check %ld, line_insert %ld, count %ld\n", time_get, time_check, time_line, time_count);
3771 #endif
3772
3773 * retpoi = ret;
3774 return line;
3775 }
3776
3777 /*
3778 * Load_file loads the file with given name or the input pipe into memory.
3779 * If the file couldn't be opened, just an empty line is installed.
3780 * Buffer pointers are initialized.
3781 * The current position is set to the last saved position.
3782 *
3783 * Load_file_position loads and positions as follows:
3784 if to_open_linum > 0, the given position is used
3785 if to_open_linum == 0, the last saved position is used
3786 if to_open_linum < 0, no position is set (stays at beginning)
3787 Beware: length of file name argument is not limited!
3788 * aux = False: include in filename list
3789 True: don't "
3790 OPEN: include in filename list, append after current file
3791 */
3792 static
3793 int
load_file_position(file,aux,from_pipe,with_display,to_open_linum,to_open_pos)3794 load_file_position (file, aux, from_pipe, with_display, to_open_linum, to_open_pos)
3795 char * file;
3796 FLAG aux; /* e.g. loading recovery file */
3797 FLAG from_pipe;
3798 FLAG with_display;
3799 int to_open_linum;
3800 int to_open_pos;
3801 {
3802 LINE * line = header;
3803 int fd = -1; /* Filedescriptor for file */
3804 FLAG fcntl_locked = False; /* lock acquired ? */
3805 FLAG do_auto_detect = auto_detect;
3806 FLAG empty = False;
3807 int ret = FINE;
3808 FLAG errshown = False;
3809 FLAG restore_cmd_options = False;
3810
3811 nr_of_bytes = 0L;
3812 nr_of_chars = 0L;
3813 nr_of_utfchars = 0L;
3814 nr_of_cjkchars = 0L;
3815 if (aux == OPEN) {
3816 /* todo: notify/mark filelist add mode */
3817 aux = False;
3818 }
3819 prev_text_encoding = get_text_encoding ();
3820
3821 /* Remove previous file lock (if any) */
3822 unlock_file ();
3823 viewonly_locked = False;
3824
3825 clearscreen ();
3826 flush ();
3827 clear_textbuf ();
3828 /* initialize cursor position */
3829 x = 0;
3830 y = 0;
3831 /* initialize some file mode and status flags */
3832 modified = reading_pipe = from_pipe;
3833 backup_pending = True;
3834 overwriteOK = False;
3835 file_locked = False;
3836 recovery_exists = False;
3837 viewonly_err = False;
3838 total_lines = 0;
3839 total_chars = 0L;
3840
3841 open_linum = to_open_linum;
3842 open_pos = to_open_pos;
3843 open_col = -1;
3844
3845 writable = True;
3846 file_is_dir = False;
3847 file_is_dev = False;
3848 file_is_fifo = False;
3849
3850 fprot1 = fprot0; /* default file properties */
3851 filestat.st_mtime = 0; /* fall-back modification time */
3852 trace_modtime (& filestat.st_mtime, "default", file);
3853
3854 set_quote_type (default_quote_type);
3855 if (preselect_quote_style != NIL_PTR && smart_quotes != VALID) {
3856 set_quote_style (preselect_quote_style);
3857 }
3858
3859 if (file == NIL_PTR) {
3860 /* This is called at most once; otherwise we would have to
3861 consider configure_preferences (False) (see update_file_name)
3862 */
3863 if (reading_pipe == False) {
3864 status_msg ("New file");
3865 empty = True;
3866 } else {
3867 fd = 0;
3868 file = "standard input";
3869 }
3870 file_name [0] = '\0';
3871 } else {
3872 update_file_name (file, False, ! aux);
3873 restore_cmd_options = True;
3874 status_line ("Accessing ", file);
3875
3876 if (! from_pipe) {
3877 /* Determine file type and properties */
3878 #ifndef VAXC
3879 struct stat fstat_buf;
3880 if (stat (file, & fstat_buf) == 0) {
3881 if (S_ISDIR (fstat_buf.st_mode)) {
3882 file_is_dir = True;
3883 }
3884 if (S_ISFIFO (fstat_buf.st_mode)) {
3885 file_is_fifo = True;
3886 }
3887 if (is_dev (file, & fstat_buf)) {
3888 file_is_dev = True;
3889 }
3890
3891 /* Determine file protection in case text
3892 is saved to other file */
3893 # ifdef msdos
3894 /* fstat retrieval does not work properly with djgpp */
3895 # else
3896 fprot1 = fstat_buf.st_mode;
3897 # endif
3898 /* Determine file modification time */
3899 if (! file_is_fifo) {
3900 memcpy (& filestat, & fstat_buf, sizeof (struct stat));
3901 }
3902 trace_modtime (& filestat.st_mtime, "load is", file);
3903 }
3904 #endif
3905 }
3906
3907 if (file_is_dev) {
3908 error ("Cannot edit char/block device file");
3909 ret = ERRORS;
3910 overwriteOK = False;
3911 viewonly_err = True;
3912 empty = True;
3913 } else if (access (file, F_OK) < 0) { /* Cannot access file */
3914 status_line ("New file ", file);
3915 overwriteOK = False;
3916 empty = True;
3917 #ifndef vms
3918 /* On VMS, open with O_RDWR would already change the
3919 'modified' file timestamp and thus spoil the 'modified' checks
3920 */
3921 } else if (! viewonly_mode && ! file_is_fifo
3922 && (fd = open (file, O_RDWR | O_BINARY, 0)) >= 0) {
3923 #define dont_use_fcntl_locking
3924 # ifdef use_fcntl_locking
3925 struct flock wlock;
3926 wlock.l_type = F_WRLCK;
3927 wlock.l_whence = SEEK_SET;
3928 wlock.l_start = 0;
3929 wlock.l_len = 0;
3930 if (fcntl (fd, F_SETLK, & wlock) < 0) {
3931 status_line ("Other program claims lock on ", file);
3932 sleep (2) /* give time to see warning */;
3933 } else {
3934 fcntl_locked = True;
3935 }
3936 # endif
3937 overwriteOK = True;
3938 writable = True;
3939 if (open_linum == 0) {
3940 get_open_pos (file_name);
3941 restore_cmd_options = True;
3942 }
3943 #endif
3944 } else if ((fd = open (file, O_RDONLY | O_BINARY, 0)) >= 0) {
3945 overwriteOK = True;
3946 if (access (file, W_OK) < 0) { /* Cannot write file */
3947 writable = False;
3948 }
3949
3950 if (open_linum == 0) {
3951 get_open_pos (file_name);
3952 restore_cmd_options = True;
3953 }
3954 } else {
3955 error2 ("Cannot open: " /*, file */, serror ());
3956 ret = ERRORS;
3957 viewonly_err = True;
3958 empty = True;
3959 }
3960 }
3961
3962 /* set file type specific options */
3963 set_file_type_flags ();
3964 if (restore_cmd_options) {
3965 /* this call for preferences management is currently
3966 not effective as it would still interfere with
3967 cumulative options
3968 */
3969 configure_preferences (OPEN); /* restore command line options */
3970 }
3971
3972 /* initiate reading file */
3973 loading = True; /* Loading file, so set flag */
3974 loaded_from_filename = False;
3975 reset_get_line (True); /* must be called after get_open_pos ! */
3976
3977 /* restore determined default text encoding */
3978 /* must be set after reset_get_line ! */
3979 (void) set_text_encoding (default_text_encoding, ' ', "load: def");
3980
3981 if (fd >= 0) {
3982 top_line_dirty = True;
3983 splash_logo ();
3984 }
3985
3986 /* display header area already, even if later displayed again
3987 - to show something while loading
3988 -! to make sure proper height of filename tabs is calculated
3989 but do not show filelist yet
3990 */
3991 displaymenuline (False);
3992 flush ();
3993
3994 if (fd >= 0) {
3995 FLAG save_utf8_text = utf8_text;
3996 FLAG save_cjk_text = cjk_text;
3997 FLAG save_mapped_text = mapped_text;
3998 if (do_auto_detect) {
3999 utf8_text = False;
4000 cjk_text = False;
4001 }
4002 /* determine total_chars from here to change_encoding () */
4003
4004 #ifndef __TURBOC__
4005 if (fd > 0 && filtering_read) {
4006 int pfds [2];
4007 int pid;
4008 int status;
4009 int w = -1; /* avoid -Wmaybe-uninitialized */
4010 int waiterr = 0; /* avoid -Wmaybe-uninitialized */
4011
4012 (void) close (fd);
4013 if (pipe (pfds) < 0) {
4014 ret = ERRORS;
4015 pid = -1;
4016 } else {
4017 raw_mode (False);
4018 /* clean-up primary screen */
4019 set_cursor (0, YMAX);
4020 flush ();
4021
4022 pid = fork ();
4023 }
4024 if (ret == ERRORS) { /* pipe error */
4025 set_errno ("Cannot create filter pipe");
4026 } else if (pid < 0) { /* fork error */
4027 raw_mode (True);
4028 set_errno ("Cannot fork filter");
4029 ret = ERRORS;
4030 } else if (pid == 0) { /* child */
4031 (void) close (pfds [0]);
4032 /* attach stdout to pipe */
4033 (void) dup2 (pfds [1], 1);
4034 (void) close (pfds [1]);
4035 /* invoke filter */
4036 if (strchr (filter_read, ' ')) {
4037 /* filter spec includes parameters */
4038 char filter [maxFILENAMElen];
4039 if (strstr (filter_read, "%s")) {
4040 /* filter spec has filename placeholder */
4041 sprintf (filter, filter_read, file_name);
4042 } else {
4043 /* append filename */
4044 sprintf (filter, "%s %s", filter_read, file_name);
4045 }
4046 #ifdef debug_filter
4047 fprintf (stderr, "system ('%s')\n", filter);
4048 #endif
4049 status = system (filter);
4050 if (status >> 8) {
4051 _exit (status >> 8);
4052 } else {
4053 _exit (status);
4054 }
4055 } else {
4056 #ifdef debug_filter
4057 fprintf (stderr, "%s %s\n", filter_read, file_name);
4058 #endif
4059 execlp (filter_read, "filter_read", file_name, NIL_PTR);
4060 #ifdef debug_filter
4061 fprintf (stderr, "_exit (127) [%s]\n", serror ());
4062 #endif
4063 _exit (127);
4064 }
4065 } else { /* pid > 0: parent */
4066 (void) close (pfds [1]);
4067 /* read file contents from pipe */
4068 line = read_file (pfds [0], & ret, do_auto_detect);
4069 /* wait for filter to terminate */
4070 do {
4071 w = wait (& status);
4072 waiterr = geterrno ();
4073 } while (w != pid && (w != -1 || waiterr == EINTR));
4074 }
4075 quit = False;
4076 intr_char = False;
4077 #ifdef debug_filter
4078 printf ("wait status %04X\n", status);
4079 #endif
4080
4081 raw_mode (True);
4082 clearscreen (); /* clear double height splash logo area */
4083
4084 /* check child process errors */
4085 if (ret == ERRORS) {
4086 /* fork failed */
4087 } else if (w == -1) {
4088 status_fmt ("Filter wait error: ", serrorof (waiterr));
4089 errshown = True;
4090 ret = ERRORS;
4091 } else if ((status >> 8) == 127) { /* child could not exec filter */
4092 status_fmt2 (filter_read, ": Failed to start filter");
4093 errshown = True;
4094 ret = ERRORS;
4095 } else if ((status & 0x80) != 0) { /* filter dumped */
4096 status_fmt ("Filter dumped: ", dec_out (status & 0x7F));
4097 errshown = True;
4098 ret = ERRORS;
4099 } else if ((status & 0xFF) != 0) { /* filter aborted */
4100 status_fmt ("Filter aborted: ", dec_out (status & 0x7F));
4101 errshown = True;
4102 ret = ERRORS;
4103 } else if ((status >> 8) != 0) { /* filter reported error */
4104 status_fmt ("Filter error: ", serrorof (status >> 8));
4105 errshown = True;
4106 ret = ERRORS;
4107 }
4108 } else
4109 #endif
4110 {
4111 if (file_name [0] != '\0') {
4112 status_line ("Reading ", file_name);
4113 } else {
4114 status_line ("Reading ", file);
4115 }
4116
4117 line = read_file (fd, & ret, do_auto_detect);
4118
4119 clearscreen (); /* clear double height splash logo area */
4120 }
4121
4122
4123 if (utf16_file) {
4124 /* workaround: skip following restore;
4125 in this case, set_text_encoding has been called
4126 within the save/restore pair
4127 -> refactoring!
4128 */
4129 } else {
4130 utf8_text = save_utf8_text;
4131 cjk_text = save_cjk_text;
4132 mapped_text = save_mapped_text;
4133 }
4134
4135 if (line != NIL_LINE &&
4136 (total_lines == 0 /* empty file */
4137 || line->return_type == lineend_NUL /* NUL-terminated file */
4138 )) {
4139 line = line_insert_after (line, "\n", 1, lineend_NONE);
4140 }
4141 clear_filebuf ();
4142 cur_line = header->next;
4143 line_number = 1;
4144 if (fcntl_locked == False) {
4145 debuglog ("closing", "loaded file", "");
4146 (void) close (fd);
4147 }
4148
4149 if (count_lineend_CRLF > count_lineend_LF) {
4150 default_lineend = lineend_CRLF;
4151 } else {
4152 default_lineend = lineend_LF;
4153 }
4154 } else {
4155 /* Handle empty buffer / new file */
4156
4157 /* Restore default properties if file could not be opened */
4158 fprot1 = fprot0;
4159 filestat.st_mtime = 0;
4160 trace_modtime (& filestat.st_mtime, "loaded none", file);
4161
4162 default_lineend = newfile_lineend;
4163 if (ebcdic_text || ebcdic_file) {
4164 default_lineend = lineend_NL1;
4165 }
4166 if (lineends_LFtoCRLF) { /* +r */
4167 default_lineend = lineend_CRLF;
4168 } else if (lineends_CRLFtoLF) { /* -r */
4169 default_lineend = lineend_LF;
4170 }
4171
4172 /* Just install an empty line */
4173 line = line_insert_after (line, "\n", 1, default_lineend);
4174 if (line == NIL_LINE) {
4175 set_error ("Out of memory for new text buffer");
4176 }
4177 }
4178
4179
4180 encoding_auto_detection (empty, do_auto_detect);
4181 total_chars = nr_of_chars;
4182 total_chars = -1L; /* nr_of_chars counting does not work properly */
4183
4184
4185 /* show header area (with filelist) */
4186 displaymenuline (True);
4187
4188
4189 /* handle errors */
4190 if (line == NIL_LINE) {
4191 set_error ("Out of memory for new lines when loading file");
4192 file_name [0] = '\0'; /* double-prevent incomplete overwriting */
4193
4194 /* insert static emergency line to avoid crash */
4195 line = line_insert_after (tail->prev, "\n", -1, lineend_NONE);
4196
4197 viewonly_err = True;
4198 modified = False;
4199 ret = ERRORS;
4200 }
4201
4202 if (get_line_error != NIL_PTR && ! errshown) {
4203 /* show postponed set_error messages */
4204 show_get_l_errors ();
4205 errshown = True;
4206 ret = ERRORS;
4207 overwriteOK = False;
4208 }
4209
4210 if (fd >= 0) {
4211 if (! errshown && ret != ERRORS && line != NIL_LINE) {
4212 fstatus ("Loaded", nr_of_bytes, nr_of_chars);
4213 }
4214 }
4215
4216 if (fd >= 0 && ret == ERRORS) {
4217 if (! errshown) {
4218 ring_bell ();
4219 if (nr_of_bytes > 0) {
4220 status_fmt ("Reading failed (buffer incomplete): ", serror ());
4221 } else {
4222 status_fmt ("Reading failed (could not read): ", serror ());
4223 }
4224 errshown = True;
4225 overwriteOK = False;
4226 }
4227 file_name [0] = '\0'; /* double-prevent incomplete overwriting */
4228 viewonly_err = True;
4229 modified = False;
4230 }
4231
4232 /* finalize loading, find file and screen position */
4233 reset (header->next, 0); /* Initialize pointers */
4234 move_to (0, 0);
4235 loading = False; /* Stop loading, reset flag */
4236 loaded_from_filename = True; /* indicate relation of text and filename */
4237
4238 if (open_linum > 0) {
4239 LINE * open_line = proceed (header->next, open_linum - 1);
4240 char * cpoi;
4241 int cur_column = 0;
4242
4243 move_to (0, find_y_w_o_RD (open_line));
4244
4245 /* re-position within line */
4246 cpoi = cur_line->text;
4247 if (open_col >= 0) {
4248 char * prev_cpoi = cpoi;
4249 while (* cpoi != '\n' && cur_column <= open_col) {
4250 prev_cpoi = cpoi;
4251 advance_char_scr (& cpoi, & cur_column, cur_line->text);
4252 }
4253 if (cur_column <= open_col) {
4254 prev_cpoi = cpoi;
4255 }
4256 move_address (prev_cpoi, y);
4257 } else {
4258 while (* cpoi != '\n' && open_pos > 0) {
4259 advance_char (& cpoi);
4260 open_pos --;
4261 }
4262 move_address (cpoi, y);
4263 }
4264
4265 if (with_display) {
4266 display (0, top_line, last_y, 0);
4267 }
4268 if (! errshown && ret != ERRORS && line != NIL_LINE) {
4269 if (! cjk_text || prev_encoding_tag == text_encoding_tag) {
4270 fstatus ("Loaded", nr_of_bytes, nr_of_chars);
4271 } else {
4272 fstatus ("Loaded", nr_of_bytes, -1L);
4273 }
4274 }
4275 mark_n (0);
4276 } else if (with_display) {
4277 display (0, top_line, last_y, 0);
4278 move_to (0, 0);
4279 }
4280
4281 #ifdef unix
4282 RD_window_title ();
4283 #endif
4284
4285 /* check recovery file */
4286 if (! reading_pipe && ! aux) {
4287 check_recovery_file (ret == ERRORS);
4288 }
4289
4290 if (viewonly_err) {
4291 top_line_dirty = True;
4292 }
4293
4294 return ret;
4295 }
4296
4297 /**
4298 Load file.
4299 Beware: length of file name argument is not limited!
4300 */
4301 static
4302 int
load_file(file,aux,from_pipe,with_display)4303 load_file (file, aux, from_pipe, with_display)
4304 char * file;
4305 FLAG aux; /* e.g. loading recovery file */
4306 FLAG from_pipe;
4307 FLAG with_display;
4308 {
4309 return load_file_position (file, aux, from_pipe, with_display, 0, 0);
4310 }
4311
4312
4313 /**
4314 Load file name (which may be a wild card name in case of MSDOS).
4315 Beware: length of file name argument is not limited!
4316 */
4317 void
load_wild_file(file,from_pipe,with_display)4318 load_wild_file (file, from_pipe, with_display)
4319 char * file;
4320 FLAG from_pipe;
4321 FLAG with_display;
4322 {
4323 (void) load_file (file, False, from_pipe, with_display);
4324 }
4325
4326
4327 /**
4328 Check if disk file has been modified since loaded.
4329 Issue warning.
4330 Called if window gets mouse focus or after returning from external cmd.
4331 */
4332 void
check_file_modified()4333 check_file_modified ()
4334 {
4335 #ifndef dont_check_modtime
4336 /* VMS does not report the proper time of the existing file;
4337 (maybe because it's looking at a prospective new version??)
4338 fixed: avoiding O_RDWR on VMS
4339 */
4340 #ifndef VAXC
4341 struct stat fstat_buf;
4342 if (stat (file_name, & fstat_buf) == 0) {
4343 FLAG filechanged;
4344 FLAG wasmodified = modified;
4345 trace_modtime (& filestat.st_mtime, "check was", file_name);
4346 trace_modtime (& fstat_buf.st_mtime, "check now", file_name);
4347 filechanged = file_changed (file_name, & fstat_buf);
4348 if (filechanged) {
4349 char * hint = "";
4350 if (! wasmodified) {
4351 hint = " (Reload with ESC #/Alt-F3)";
4352 }
4353 if (filechanged == VALID) {
4354 status_fmt ("Warning: New file appeared on disk", hint);
4355 } else if (filechanged == NOT_VALID) {
4356 status_fmt ("Warning: File was replaced on disk", hint);
4357 } else {
4358 status_fmt ("Warning: File was modified meanwhile", hint);
4359 }
4360 }
4361 }
4362 #endif
4363 #endif
4364 }
4365
4366
4367 /*
4368 * Ask user if named file should be overwritten.
4369 */
4370 FLAG
checkoverwrite(name)4371 checkoverwrite (name)
4372 char * name;
4373 {
4374 character c;
4375 #ifdef use_locking
4376 char target [maxFILENAMElen];
4377 int ret;
4378 #endif
4379 char * ov_prompt = ": OK to overwrite? (y/n/ESC)";
4380
4381 status_line ("Checking ", name);
4382 if (access (name, F_OK) < 0) { /* Cannot access file */
4383 return NOT_VALID; /* no danger of unwanted damage */
4384 }
4385
4386 #ifdef use_locking
4387 ret = getsymboliclink (get_lockfile_name (file_name), target, sizeof (target));
4388 if (ret > 0) {
4389 char mylocktext [maxFILENAMElen];
4390 setlocktarget (mylocktext);
4391 if (! streq (target, mylocktext)) {
4392 ov_prompt = " (locked): OK to overwrite? (y/n/ESC)";
4393 }
4394 } else if (ret == 0) {
4395 /* empty pseudo (plain) lock file found;
4396 assume unlock workaround */
4397 }
4398 #endif
4399 c = status2_prompt ("yn", name [0] ? name : "[unknown file]",
4400 ov_prompt);
4401 clear_status ();
4402 if (c == 'y') {
4403 return True;
4404 } else if (c == 'n') {
4405 return False;
4406 } else {
4407 /* quit = False; abort character has been given */
4408 return False;
4409 }
4410 }
4411
4412 /*
4413 * Attach new file name to buffer
4414 */
4415 static
4416 FLAG
set_NN()4417 set_NN ()
4418 {
4419 char file [maxFILENAMElen]; /* Buffer for new file name */
4420
4421 if (restricted) {
4422 restrictederr ();
4423 return False;
4424 }
4425 if (get_filename ("Enter new file name:", file, False) == ERRORS) {
4426 return False;
4427 }
4428 writing_pipe = False; /* cancel pipe output if explicitly editing file */
4429
4430 /* Remove previous file lock (if any) */
4431 unlock_file ();
4432
4433 /* Clear forced viewonly */
4434 if (viewonly_err && ! streq (file, file_name)) {
4435 viewonly_err = False;
4436 flags_changed = True;
4437 }
4438
4439 overwriteOK = False;
4440 writable = True;
4441 update_file_name (file, True, True);
4442 #ifdef unix
4443 if (modified) {
4444 RD_window_title ();
4445 }
4446 #endif
4447 check_recovery_file (False);
4448
4449 set_modified (); /* referring to different file now */
4450 relock_file ();
4451
4452 clear_status ();
4453 return True;
4454 }
4455
4456 void
NN()4457 NN ()
4458 {
4459 (void) set_NN ();
4460 }
4461
4462
4463 /*======================================================================*\
4464 |* File I/O basics *|
4465 \*======================================================================*/
4466
4467 #define dont_debug_writefile
4468
4469
4470 /*
4471 * Show file write error
4472 */
4473 static
4474 void
msg_write_error(op)4475 msg_write_error (op)
4476 char * op;
4477 {
4478 char msg [maxMSGlen];
4479
4480 ring_bell ();
4481 build_string (msg, "%s failed (File incomplete): %s", op, serror ());
4482 status_fmt2 ("", msg);
4483 }
4484
4485 /*
4486 * Flush the I/O buffer on filedescriptor fd.
4487 */
4488 int
flush_filebuf(fd)4489 flush_filebuf (fd)
4490 int fd;
4491 {
4492 if (filebuf_count == 0) { /* There is nothing to flush */
4493 return FINE;
4494 } else {
4495 char * writepoi = filebuf;
4496 int written = 0;
4497 int none_count = 0;
4498
4499 while (filebuf_count > 0) {
4500 written = write (fd, writepoi, filebuf_count);
4501 #ifdef debug_writefile
4502 printf ("write -> %d: %s\n", written, serror ());
4503 #endif
4504 if (written == -1) {
4505 if (geterrno () == EINTR && winchg) {
4506 /* try again */
4507 } else if (geterrno () == EINTR) {
4508 none_count ++;
4509 if (none_count > 9) {
4510 return ERRORS;
4511 }
4512 /* try again */
4513 } else {
4514 return ERRORS;
4515 }
4516 } else if (written == 0) {
4517 none_count ++;
4518 if (none_count > 9) {
4519 return ERRORS;
4520 }
4521 /* try again */
4522 } else {
4523 none_count = 0;
4524 filebuf_count -= written;
4525 writepoi += written;
4526 }
4527 }
4528 }
4529 filebuf_count = 0;
4530 return FINE;
4531 }
4532
4533 /*
4534 * writechar does a buffered output to file.
4535 */
4536 int
writechar(fd,c)4537 writechar (fd, c)
4538 int fd;
4539 char c;
4540 {
4541 filebuf [filebuf_count ++] = c;
4542 if (filebuf_count == filebuflen) {
4543 return flush_filebuf (fd);
4544 }
4545 return FINE;
4546 }
4547
4548 /*
4549 * writeucs writes a UCS Unicode code in UTF-16
4550 * Return # words written or ERRORS.
4551 */
4552 static
4553 int
writeucs(fd,c)4554 writeucs (fd, c)
4555 int fd;
4556 unsigned long c;
4557 {
4558 int err = FINE;
4559
4560 if (c > (unsigned long) 0x10FFFF) {
4561 return writeucs (fd, 0xFFFD);
4562 } else if (c > (unsigned long) 0xFFFF) {
4563 err = 2;
4564 c -= 0x10000;
4565 err |= writeucs (fd, 0xD800 | (c >> 10));
4566 err |= writeucs (fd, 0xDC00 | (c & 0x03FF));
4567 } else {
4568 err = 1;
4569 if (utf16_little_endian) {
4570 err |= writechar (fd, c & 0xFF);
4571 err |= writechar (fd, c >> 8);
4572 } else {
4573 err |= writechar (fd, c >> 8);
4574 err |= writechar (fd, c & 0xFF);
4575 }
4576 }
4577
4578 return err;
4579 }
4580
4581 /*
4582 * writelechar writes a line-end character to file
4583 Only called by write_lineend.
4584 */
4585 static
4586 int
writelechar(fd,c,handle_utf16)4587 writelechar (fd, c, handle_utf16)
4588 int fd;
4589 character c;
4590 FLAG handle_utf16;
4591 {
4592 if (utf8_text && utf16_file && handle_utf16) {
4593 return writeucs (fd, (character) c);
4594 } else if (ebcdic_file && handle_utf16) {
4595 mapped_text = True;
4596 c = encodedchar (c);
4597 mapped_text = False;
4598 return writechar (fd, c);
4599 } else {
4600 return writechar (fd, c);
4601 }
4602 }
4603
4604 /*
4605 * write_lineend writes a line-end in the respective form to file
4606 Called by write_line, yank_text.
4607 */
4608 int
write_lineend(fd,return_type,handle_utf16)4609 write_lineend (fd, return_type, handle_utf16)
4610 register int fd;
4611 lineend_type return_type;
4612 FLAG handle_utf16;
4613 {
4614 switch (return_type) {
4615 case lineend_NONE:
4616 return 0;
4617 case lineend_NUL:
4618 if (writelechar (fd, '\0', handle_utf16) == ERRORS) {
4619 return ERRORS;
4620 }
4621 return 1;
4622 case lineend_CRLF:
4623 if (writelechar (fd, '\r', handle_utf16) == ERRORS) {
4624 return ERRORS;
4625 }
4626 if (writelechar (fd, ebcdic_text ? code_LF : '\n', handle_utf16) == ERRORS) {
4627 return ERRORS;
4628 }
4629 return 2;
4630 case lineend_CR:
4631 if (writelechar (fd, '\r', handle_utf16) == ERRORS) {
4632 return ERRORS;
4633 }
4634 return 1;
4635 case lineend_NL1: /* ISO 8859/EBCDIC NEXT LINE U+85 */
4636 if (handle_utf16 || pasting_encoded
4637 || (pasting && pastebuf_utf8 && utf8_text)
4638 ) {
4639 if (writelechar (fd, ebcdic_text ? code_NL : (character) 0x85, handle_utf16) == ERRORS) {
4640 return ERRORS;
4641 }
4642 return 1;
4643 } else {
4644 if (writelechar (fd, '\302', handle_utf16) == ERRORS) {
4645 return ERRORS;
4646 }
4647 if (writelechar (fd, '\205', handle_utf16) == ERRORS) {
4648 return ERRORS;
4649 }
4650 return 2;
4651 }
4652 case lineend_NL2: /* Unicode NEXT LINE U+85 */
4653 if (utf8_text && utf16_file && handle_utf16) {
4654 if (writeucs (fd, 0x0085) == ERRORS) {
4655 return ERRORS;
4656 }
4657 return 1;
4658 } else {
4659 if (writelechar (fd, '\302', handle_utf16) == ERRORS) {
4660 return ERRORS;
4661 }
4662 if (writelechar (fd, '\205', handle_utf16) == ERRORS) {
4663 return ERRORS;
4664 }
4665 return 2;
4666 }
4667 case lineend_LS: /* Unicode line separator U+2028 */
4668 if (utf8_text && utf16_file && handle_utf16) {
4669 if (writeucs (fd, 0x2028) == ERRORS) {
4670 return ERRORS;
4671 }
4672 return 1;
4673 } else {
4674 if (writelechar (fd, '\342', handle_utf16) == ERRORS) {
4675 return ERRORS;
4676 }
4677 if (writelechar (fd, '\200', handle_utf16) == ERRORS) {
4678 return ERRORS;
4679 }
4680 if (writelechar (fd, '\250', handle_utf16) == ERRORS) {
4681 return ERRORS;
4682 }
4683 return 3;
4684 }
4685 case lineend_PS: /* Unicode paragraph separator U+2029 */
4686 if (utf8_text && utf16_file && handle_utf16) {
4687 if (writeucs (fd, 0x2029) == ERRORS) {
4688 return ERRORS;
4689 }
4690 return 1;
4691 } else {
4692 if (writelechar (fd, '\342', handle_utf16) == ERRORS) {
4693 return ERRORS;
4694 }
4695 if (writelechar (fd, '\200', handle_utf16) == ERRORS) {
4696 return ERRORS;
4697 }
4698 if (writelechar (fd, '\251', handle_utf16) == ERRORS) {
4699 return ERRORS;
4700 }
4701 return 3;
4702 }
4703 case lineend_LF:
4704 default:
4705 if (writelechar (fd, ebcdic_text ? code_LF : '\n', handle_utf16) == ERRORS) {
4706 return ERRORS;
4707 }
4708 return 1;
4709 }
4710 }
4711
4712 /*
4713 * write_line writes the given string on the given filedescriptor.
4714 * (buffered via writechar via misused screen buffer!)
4715 * return # bytes written or ERRORS.
4716 Only called by write_file, so handle_utf16 is always True.
4717 */
4718 static
4719 int
write_line(fd,text,return_type,handle_utf16)4720 write_line (fd, text, return_type, handle_utf16)
4721 int fd;
4722 char * text;
4723 lineend_type return_type;
4724 FLAG handle_utf16;
4725 {
4726 int len;
4727 int ccount = 0;
4728
4729 while (* text != '\0') {
4730 if (* text == '\n') {
4731 /* handle different line ends */
4732 len = write_lineend (fd, return_type, handle_utf16);
4733 if (len == ERRORS) {
4734 return ERRORS;
4735 }
4736 text ++;
4737 ccount += len;
4738 } else {
4739 if (utf8_text && utf16_file && handle_utf16) {
4740 unsigned long unichar;
4741 int utflen;
4742 utf8_info (text, & utflen, & unichar);
4743 if (UTF8_len (* text) == utflen) {
4744 len = writeucs (fd, unichar);
4745 } else {
4746 len = writeucs (fd, 0xFFFD);
4747 }
4748 if (len == ERRORS) {
4749 return ERRORS;
4750 }
4751 advance_utf8 (& text);
4752 ccount += len;
4753 } else {
4754 character c = * text;
4755
4756 if (ebcdic_file && handle_utf16) {
4757 mapped_text = True;
4758 c = encodedchar (c);
4759 mapped_text = False;
4760 }
4761
4762 if (writechar (fd, c) == ERRORS) {
4763 return ERRORS;
4764 }
4765 text ++;
4766 ccount ++;
4767 }
4768 }
4769 }
4770
4771 if (utf8_text && utf16_file && handle_utf16) {
4772 return ccount * 2;
4773 } else {
4774 return ccount;
4775 }
4776 }
4777
4778
4779 /* Call graph for writing functions:
4780 panic --\------> panicwrite --------------------------\
4781 > QUED ----------\ \
4782 ESC q --/ > ask_save --\ > write_file
4783 ESC e ---> EDIT --> edit_file -/ \ /
4784 ESC v ---> VIEW -/ \ /
4785 ESC w -------------------> WT -------------> write_text
4786 ESC W -------------------> WTU -----------/
4787 ESC z -------------------> SUSP ---------/
4788 ESC ESC -> EXED ---------> EXFILE ------/
4789 \--------> EXMINED ----/
4790 ESC t -------------------> Stag ------/
4791 */
4792 long write_count; /* number of bytes written */
4793 long chars_written; /* number of chars written */
4794
4795 /*
4796 * Write text in memory to file.
4797 */
4798 static
4799 void
write_file(fd)4800 write_file (fd)
4801 int fd;
4802 {
4803 register LINE * line;
4804 int ret = FINE;
4805 static FLAG handle_utf16 = True;
4806
4807 write_count = 0L;
4808 chars_written = 0L;
4809 clear_filebuf ();
4810
4811 if (utf8_text && utf16_file && handle_utf16) {
4812 /* prepend BOM if there was one */
4813 if (BOM && strncmp (header->next->text, "", 3) != 0) {
4814 ret = write_line (fd, "", lineend_NONE, handle_utf16);
4815 if (ret == ERRORS) {
4816 msg_write_error ("Write");
4817 write_count = -1L;
4818 chars_written = -1L;
4819 } else {
4820 ret = FINE;
4821 write_count = 2;
4822 chars_written = 1;
4823 }
4824 }
4825 }
4826
4827 if (ret == FINE) {
4828 for (line = header->next; line != tail; line = line->next) {
4829 ret = write_line (fd, line->text, line->return_type, handle_utf16);
4830 if (ret == ERRORS) {
4831 msg_write_error ("Write");
4832 write_count = -1L;
4833 chars_written = -1L;
4834 break;
4835 }
4836 write_count += (long) ret;
4837 chars_written += (long) char_count (line->text);
4838 if (line->return_type == lineend_NONE) {
4839 chars_written --;
4840 }
4841 }
4842 }
4843
4844 if (write_count > 0L && flush_filebuf (fd) == ERRORS) {
4845 if (ret != ERRORS) {
4846 msg_write_error ("Write");
4847 ret = ERRORS;
4848 }
4849 write_count = -1L;
4850 chars_written = -1L;
4851 }
4852
4853 if (close (fd) == -1) {
4854 #ifdef debug_writefile
4855 printf ("close: %s\n", serror ());
4856 #endif
4857 if (ret != ERRORS) {
4858 msg_write_error ("Close");
4859 ret = ERRORS;
4860 }
4861 write_count = -1L;
4862 chars_written = -1L;
4863 }
4864 }
4865
4866 static
4867 void
write_recovery()4868 write_recovery ()
4869 {
4870 char * recovery_fn = get_recovery_name (file_name);
4871 int fd;
4872
4873 fd = open (recovery_fn, O_CREAT | O_TRUNC | O_WRONLY | O_BINARY, bufprot);
4874 write_file (fd);
4875 }
4876
4877 int
panicwrite()4878 panicwrite ()
4879 {
4880 int fd;
4881
4882 fd = open (panic_file, O_CREAT | O_TRUNC | O_WRONLY | O_BINARY, bufprot);
4883 write_file (fd);
4884
4885 write_recovery ();
4886
4887 if (write_count == -1L) {
4888 return ERRORS;
4889 } else {
4890 return FINE;
4891 }
4892 }
4893
4894
4895 #define use_touch
4896
4897 #if defined (msdos) || defined (__ANDROID__)
4898 #undef use_touch
4899 #endif
4900
4901 #ifdef __MINGW32__
4902 #define system_touch
4903 #endif
4904
4905 FLAG
do_backup(fn)4906 do_backup (fn)
4907 char * fn;
4908 {
4909 #ifdef vms
4910 /* VMS does the backups itself */
4911 return True;
4912 #else
4913 FLAG backup_ok = False;
4914 char * backup_name = get_backup_name (fn);
4915 if (backup_name) {
4916 status_line ("Copying to backup file ", backup_name);
4917 backup_ok = copyfile (fn, backup_name);
4918 }
4919 if (backup_ok == False) {
4920 error ("Could not copy to backup file");
4921 sleep (1); /* give some time to see the hint */
4922 return False;
4923 } else if (backup_ok == True) {
4924 /* let backup have the original file timestamp */
4925 #ifndef use_touch
4926 #include <time.h>
4927 /* what a hack!
4928 djgpp reports EIO and fails
4929 turbo-c (utime only) reports ENOENT but yet succeeds
4930 */
4931 struct stat fstat_buf;
4932 if (stat (fn, & fstat_buf) == 0) {
4933 # if defined (BSD)
4934 struct timeval times [2];
4935 times [0].tv_sec = 0;
4936 times [0].tv_usec = 0;
4937 (void) gettimeofday (& times [0], 0);
4938 times [1].tv_sec = fstat_buf.st_mtime;
4939 times [1].tv_usec = 0;
4940 (void) utimes (backup_name, times);
4941 # else
4942 # ifndef __TURBOC__
4943 # include <utime.h>
4944 # endif
4945 struct utimbuf times;
4946 # ifdef __TURBOC__
4947 times.actime = 0;
4948 # else
4949 struct timeval now;
4950 now.tv_sec = 0;
4951 (void) gettimeofday (& now, 0);
4952 times.actime = now.tv_sec;
4953 # endif
4954 times.modtime = fstat_buf.st_mtime;
4955 (void) utime (backup_name, & times);
4956 # endif
4957 }
4958 #else /* #ifndef use_touch */
4959 /* On Unix, could use utime (svr4) or utimes (bsd) anyway,
4960 but let's avoid the year 2038 problem here...
4961 */
4962 # ifdef system_touch
4963 char syscommand [maxCMDlen];
4964 build_string (syscommand, "touch -r '%s' '%s' 2> /dev/null", fn, backup_name);
4965 (void) system (syscommand);
4966 /*RDwin ();*/
4967 # else
4968 /* Avoid crap message "couldn't set locale correctly" on SunOS */
4969 (void) progcallpp (NIL_PTR, -1, (char * *) 0,
4970 0,
4971 "touch",
4972 # ifdef __ultrix
4973 /* Ultrix also links with utimes although it's undeclared */
4974 "-f",
4975 # else
4976 "-r",
4977 # endif
4978 fn, backup_name, NIL_PTR);
4979 # endif
4980 #endif /* #else defined (msdos) */
4981 } else { /* backup_ok == NOT_VALID */
4982 /* return True; */
4983 }
4984 return True;
4985 #endif
4986 }
4987
4988 /**
4989 Write text to its associated file.
4990 */
4991 static
4992 int
write_text_pos(force_write,force_savepos,keep_screenmode)4993 write_text_pos (force_write, force_savepos, keep_screenmode)
4994 FLAG force_write;
4995 FLAG force_savepos;
4996 FLAG keep_screenmode;
4997 {
4998 int fd; /* Filedescriptor of file */
4999
5000 if (writing_pipe) {
5001 fd = STD_OUT;
5002 status_line ("Writing ", "to standard output");
5003 writing_pipe = False; /* write to pipe only once */
5004
5005 /* avoid screen interference with program following in pipe */
5006 raw_mode (False);
5007 set_cursor (0, YMAX);
5008 flush ();
5009
5010 write_file (fd);
5011
5012 if (keep_screenmode) {
5013 raw_mode (True);
5014 }
5015 } else {
5016 int o_trunc;
5017 #ifndef VAXC
5018 struct stat fstat_buf;
5019 FLAG stat_pending = True;
5020 # ifdef backup_method_depends_nlinks
5021 int nlinks = 0;
5022 # endif
5023 #endif
5024
5025 if (force_write == False && modified == False) {
5026 if (file_name [0] != '\0') {
5027 fstatus ("(Write not necessary)", -1L, -1L);
5028 (void) save_open_pos (file_name, force_savepos | hop_flag);
5029 } else {
5030 status_msg ("Write not necessary.");
5031 }
5032 return FINE;
5033 }
5034 if (force_savepos == True && modified == False) {
5035 if (file_name [0] != '\0') {
5036 (void) save_open_pos (file_name, True);
5037 }
5038 }
5039
5040 /* Check if file_name is valid and if file can be written */
5041 if (file_name [0] == '\0' || writable == False) {
5042 char file_name2 [maxFILENAMElen]; /* Buffer for new file name */
5043 int ret;
5044 overwriteOK = False;
5045 ret = get_filename ("Saving edited text; Enter file name:", file_name2, False);
5046 if (ret != FINE) {
5047 return ret;
5048 }
5049 update_file_name (file_name2, True, True);
5050 #ifdef unix
5051 RD_window_title ();
5052 #endif
5053 check_recovery_file (False);
5054 }
5055
5056 if (overwriteOK == False) {
5057 FLAG ovw = checkoverwrite (file_name);
5058 if (ovw != False) {
5059 overwriteOK = True;
5060 #ifdef backup_only_edited_file
5061 backup_pending = False;
5062 #else
5063 if (ovw == NOT_VALID) {
5064 backup_pending = False;
5065 }
5066 #endif
5067 stat_pending = False;
5068 #ifndef VAXC
5069 if (stat (file_name, & fstat_buf) == 0) {
5070 if (is_dev (file_name, & fstat_buf)) {
5071 error ("Not writing to char/block device file");
5072 return ERRORS;
5073 }
5074 }
5075 #endif
5076 } else {
5077 if (quit == False) {
5078 writable = False;
5079 }
5080 return ERRORS;
5081 }
5082 } else {
5083 #ifndef dont_check_modtime
5084 /* VMS does not report the proper time of the existing file;
5085 (maybe because it's looking at a prospective new version??)
5086 fixed: avoiding O_RDWR on VMS
5087 */
5088 #ifndef VAXC
5089 stat_pending = False;
5090 if (stat (file_name, & fstat_buf) == 0) {
5091 FLAG filechanged;
5092 if (is_dev (file_name, & fstat_buf)) {
5093 overwriteOK = False;
5094 error ("Not writing to char/block device file");
5095 return ERRORS;
5096 }
5097 # ifdef backup_method_depends_nlinks
5098 nlinks = fstat_buf.st_nlink;
5099 # endif
5100 trace_modtime (& filestat.st_mtime, "write was", file_name);
5101 trace_modtime (& fstat_buf.st_mtime, "write now", file_name);
5102 filechanged = file_changed (file_name, & fstat_buf);
5103 if (filechanged) {
5104 character c = status2_prompt ("yn", file_name,
5105 filechanged == VALID
5106 ? ": New file appeared on disk - Overwrite? (y/n/ESC)"
5107 : filechanged == NOT_VALID
5108 ? ": File was replaced on disk - Overwrite? (y/n/ESC)"
5109 : ": File was modified on disk - Overwrite? (y/n/ESC)"
5110 );
5111 clear_status ();
5112 if (c == 'y') {
5113 /* go on */
5114 } else if (c == 'n') {
5115 SAVEAS ();
5116 return FINE;
5117 } else {
5118 return ERRORS;
5119 }
5120 }
5121 }
5122 #endif
5123 #endif
5124 }
5125
5126 if (overwriteOK && backup_mode && backup_pending && ! file_is_fifo) {
5127 #ifndef VAXC
5128 if (stat_pending) {
5129 stat_pending = False;
5130 if (stat (file_name, & fstat_buf) == 0) {
5131 # ifdef backup_method_depends_nlinks
5132 nlinks = fstat_buf.st_nlink;
5133 # endif
5134 }
5135 }
5136 #endif
5137 if (do_backup (file_name)) {
5138 backup_pending = False;
5139 }
5140 }
5141
5142 status_line ("Opening to write ", file_name);
5143 if (filtering_write) {
5144 /* let the filter truncate the file so the content is not lost
5145 if the filter fails to start */
5146 o_trunc = 0;
5147 } else {
5148 o_trunc = O_TRUNC;
5149 }
5150 fd = open (file_name, O_CREAT | o_trunc | O_WRONLY | O_BINARY, fprot1 | ((fprot1 >> 2) & xprot));
5151 #ifdef vms
5152 if (fd < 0) {
5153 char * version = strrchr (file_name, ';');
5154 if (version != NIL_PTR) {
5155 * version = '\0'; /* strip version */
5156 fd = open (file_name, O_CREAT | o_trunc | O_WRONLY | O_BINARY, fprot1 | ((fprot1 >> 2) & xprot));
5157 }
5158 }
5159 #endif
5160 if (fd < 0) { /* Opening for write failed */
5161 if (loaded_from_filename) {
5162 if (access (file_name, F_OK) < 0) {
5163 status_fmt ("File not accessible (retry or Save As...): ", serror ());
5164 } else {
5165 status_fmt ("File not writable (retry or Save As...): ", serror ());
5166 }
5167 } else {
5168 status_fmt ("Cannot create or write (try Save As...): ", serror ());
5169 }
5170 /* don't set writable = False as there might be a
5171 temporary network problem */
5172 return ERRORS;
5173 } else {
5174 writable = True;
5175 }
5176
5177 #ifndef __TURBOC__
5178 if (filtering_write) {
5179 int pfds [2];
5180 int pid;
5181 int status;
5182 int w;
5183 int waiterr;
5184
5185 (void) close (fd);
5186 if (pipe (pfds) < 0) {
5187 error2 ("Cannot create filter pipe: ", serror ());
5188 return ERRORS;
5189 }
5190
5191 raw_mode (False);
5192 /* clean-up primary screen */
5193 set_cursor (0, YMAX);
5194 flush ();
5195
5196 pid = fork ();
5197 if (pid < 0) { /* fork error */
5198 raw_mode (True);
5199 error2 ("Cannot fork filter: ", serror ());
5200 return ERRORS;
5201 } else if (pid == 0) { /* child */
5202 (void) close (pfds [1]);
5203 /* attach stdin to pipe */
5204 (void) dup2 (pfds [0], 0);
5205 (void) close (pfds [0]);
5206 /* invoke filter */
5207 if (strchr (filter_write, ' ')) {
5208 /* filter spec includes parameters */
5209 char filter [maxFILENAMElen];
5210 if (strstr (filter_write, "%s")) {
5211 /* filter spec has filename placeholder */
5212 sprintf (filter, filter_write, file_name);
5213 } else {
5214 /* append filename */
5215 sprintf (filter, "%s %s", filter_write, file_name);
5216 }
5217 #ifdef debug_filter
5218 printf ("system ('%s')\n", filter);
5219 #endif
5220 status = system (filter);
5221 if (status >> 8) {
5222 _exit (status >> 8);
5223 } else {
5224 _exit (status);
5225 }
5226 } else {
5227 #ifdef debug_filter
5228 printf ("%s %s\n", filter_write, file_name);
5229 #endif
5230 execlp (filter_write, "filter_write", file_name, NIL_PTR);
5231 #ifdef debug_filter
5232 printf ("_exit (127) [%s]\n", serror ());
5233 #endif
5234 _exit (127);
5235 }
5236 } else { /* pid > 0: parent */
5237 (void) close (pfds [0]);
5238 /* write file contents to pipe */
5239 write_file (pfds [1]);
5240 /* wait for filter to terminate */
5241 do {
5242 w = wait (& status);
5243 waiterr = geterrno ();
5244 } while (w != pid && (w != -1 || waiterr == EINTR));
5245 }
5246 quit = False;
5247 intr_char = False;
5248 #ifdef debug_filter
5249 printf ("wait status %04X\n", status);
5250 #endif
5251
5252 raw_mode (True);
5253 clear_status ();
5254 RD ();
5255
5256 /* check child process errors */
5257 if (w == -1) {
5258 status_fmt ("Filter wait error: ", serrorof (waiterr));
5259 write_count = -1L; /* indicate error */
5260 } else if ((status >> 8) == 127) { /* child could not exec filter */
5261 status_fmt2 (filter_write, ": Failed to start filter");
5262 write_count = -1L; /* indicate error */
5263 } else if ((status & 0x80) != 0) { /* filter dumped */
5264 status_fmt ("Filter dumped: ", dec_out (status & 0x7F));
5265 write_count = -1L; /* indicate error */
5266 } else if ((status & 0xFF) != 0) { /* filter aborted */
5267 status_fmt ("Filter aborted: ", dec_out (status & 0x7F));
5268 write_count = -1L; /* indicate error */
5269 } else if ((status >> 8) != 0) { /* filter reported error */
5270 status_fmt ("Filter error: ", serrorof (status >> 8));
5271 write_count = -1L; /* indicate error */
5272 }
5273 } else
5274 #endif
5275 {
5276 status_line ("Writing ", file_name);
5277 write_file (fd);
5278 }
5279
5280 #ifndef VAXC
5281 if (stat (file_name, & fstat_buf) == 0) {
5282 memcpy (& filestat, & fstat_buf, sizeof (struct stat));
5283 trace_modtime (& filestat.st_mtime, "write new", file_name);
5284 }
5285 #endif
5286 }
5287
5288 if (write_count == -1L) {
5289 return ERRORS;
5290 }
5291
5292 /*filelist_add (dupstr (file_name), False);*/
5293
5294 modified = False;
5295 unlock_file ();
5296 #ifdef unix
5297 RD_window_title ();
5298 #endif
5299 reading_pipe = False; /* File name is now assigned */
5300
5301 /* Display how many chars (and lines) were written */
5302 fstatus ("Wrote", write_count, chars_written);
5303 /* fstatus ("Wrote", -1L); */
5304 (void) save_open_pos (file_name, hop_flag || ! groom_info_files);
5305 return FINE;
5306 }
5307
5308 int
write_text()5309 write_text ()
5310 {
5311 return write_text_pos (False, False, True);
5312 }
5313
5314 static
5315 int
write_text_defer_screenmode()5316 write_text_defer_screenmode ()
5317 {
5318 return write_text_pos (False, False, False);
5319 }
5320
5321 static
5322 void
restore_screenmode()5323 restore_screenmode ()
5324 {
5325 if (! isscreenmode) {
5326 raw_mode (True);
5327 }
5328 }
5329
5330
5331 /*======================================================================*\
5332 |* File commands *|
5333 \*======================================================================*/
5334
5335 int
save_text_load_file(fn)5336 save_text_load_file (fn)
5337 char * fn;
5338 {
5339 if (modified) {
5340 if (write_text () == ERRORS) {
5341 return ERRORS;
5342 }
5343 }
5344
5345 (void) load_file (fn, OPEN, False, True);
5346 return FINE;
5347 }
5348
5349 void
SAVEAS()5350 SAVEAS ()
5351 {
5352 if (restricted) {
5353 restrictederr ();
5354 return;
5355 }
5356 if (set_NN () == True) {
5357 WT ();
5358 }
5359 }
5360
5361 void
WT()5362 WT ()
5363 {
5364 (void) write_text_pos (False, True, True);
5365 }
5366
5367 void
WTU()5368 WTU ()
5369 {
5370 if (restricted && viewonly) {
5371 restrictederr ();
5372 return;
5373 }
5374 (void) write_text_pos (True, True, True);
5375 }
5376
5377 /*
5378 * Ask the user if he wants to save the file or not.
5379 */
5380 static
5381 int
ask_save_recover_keepscreenmode(do_recover,keep_screenmode)5382 ask_save_recover_keepscreenmode (do_recover, keep_screenmode)
5383 FLAG do_recover;
5384 FLAG keep_screenmode;
5385 {
5386 register character c;
5387
5388 c = status2_prompt (do_recover ? "ynr" : "yn",
5389 file_name [0] ? file_name :
5390 reading_pipe ? "[standard input]"
5391 : "[new file]",
5392 do_recover
5393 ? ": Save modified text? (yes/no/to recover/ESC)"
5394 : ": Save modified text? (yes/no/ESC)");
5395 clear_status ();
5396 if (c == 'y') {
5397 return write_text_pos (False, False, keep_screenmode);
5398 } else if (c == 'r') {
5399 if (do_recover) {
5400 write_recovery ();
5401 }
5402 return FINE;
5403 } else if (c == 'n') {
5404 return FINE;
5405 } else {
5406 quit = False; /* abort character has been given */
5407 return ERRORS;
5408 }
5409 }
5410
5411 static
5412 int
ask_save()5413 ask_save ()
5414 {
5415 return ask_save_recover_keepscreenmode (True, True);
5416 }
5417
5418 static
5419 int
ask_save_no_recover()5420 ask_save_no_recover ()
5421 {
5422 return ask_save_recover_keepscreenmode (False, True);
5423 }
5424
5425 static
5426 int
ask_save_defer_screenmode()5427 ask_save_defer_screenmode ()
5428 {
5429 return ask_save_recover_keepscreenmode (True, False);
5430 }
5431
5432 /*
5433 * Edit/view another file. If the current file has been modified,
5434 * ask whether the user wants to save it.
5435 * (We could allow to switch between edit and view mode without changing
5436 * the file, but we would have to consider carefully the relationship
5437 * between viewonly and modified.)
5438 */
5439 static
5440 void
edit_file(prompt,vomode)5441 edit_file (prompt, vomode)
5442 char * prompt;
5443 FLAG vomode;
5444 {
5445 char new_file [maxFILENAMElen]; /* Buffer to hold new file name */
5446
5447 if (modified && viewonly == False && ask_save () != FINE) {
5448 return;
5449 }
5450
5451 /* Get new file name */
5452 if (get_filename (prompt, new_file, False) == ERRORS) {
5453 return;
5454 }
5455 writing_pipe = False; /* cancel pipe output if explicitly editing file */
5456
5457 viewonly_mode = vomode;
5458
5459 load_wild_file (new_file [0] == '\0' ? NIL_PTR : new_file, False, True);
5460 }
5461
5462 void
RECOVER()5463 RECOVER ()
5464 {
5465 char * rn;
5466 char orig_name [maxFILENAMElen]; /* Name of file being edited */
5467 struct stat orig_filestat;
5468
5469 if (! recovery_exists) {
5470 error ("No recovery file");
5471 return;
5472 }
5473
5474 rn = get_recovery_name (file_name);
5475
5476 if (modified && viewonly == False && ask_save_no_recover () != FINE) {
5477 status_fmt2 ("", "Aborted file recovery");
5478 return;
5479 }
5480
5481 strcpy (orig_name, file_name);
5482 memcpy (& orig_filestat, & filestat, sizeof (struct stat));
5483 if (load_file (rn, True, False, True) != ERRORS) {
5484 set_modified ();
5485 /* to do: postpone deletion until recovered file has been saved? */
5486 (void) delete_file (rn);
5487 } else {
5488 /* avoid overwriting file with incompletely recovered buffer */
5489 modified = False;
5490 overwriteOK = False;
5491 }
5492 strcpy (file_name, orig_name);
5493 memcpy (& filestat, & orig_filestat, sizeof (struct stat));
5494
5495 #ifdef unix
5496 RD_window_title ();
5497 #endif
5498 }
5499
5500 void
EDIT()5501 EDIT ()
5502 {
5503 if (restricted) {
5504 restrictederr ();
5505 return;
5506 }
5507 edit_file ("Edit file:", False);
5508 }
5509
5510 void
VIEW()5511 VIEW ()
5512 {
5513 if (restricted) {
5514 restrictederr ();
5515 return;
5516 }
5517 edit_file ("View file:", True);
5518 }
5519
5520 void
EDITmode()5521 EDITmode ()
5522 {
5523 if (restricted) {
5524 restrictederr ();
5525 return;
5526 }
5527 viewonly_mode = False;
5528 if (viewonly_locked) {
5529 status_fmt2 ("", "File is still view-only because it is locked; Unlock from File menu");
5530 sleep (2);
5531 } else if (viewonly) {
5532 status_fmt2 ("", "File is still view-only after read error");
5533 sleep (2);
5534 }
5535 FSTATUS ();
5536 flags_changed = True;
5537 }
5538
5539 void
VIEWmode()5540 VIEWmode ()
5541 {
5542 if (modified == False) {
5543 viewonly_mode = True;
5544 FSTATUS ();
5545 flags_changed = True;
5546 } else {
5547 error ("Cannot view only - already modified");
5548 }
5549 }
5550
5551 void
toggle_VIEWmode()5552 toggle_VIEWmode ()
5553 {
5554 if (viewonly) {
5555 EDITmode ();
5556 } else {
5557 VIEWmode ();
5558 }
5559 }
5560
5561 void
view_help(helpfile,item)5562 view_help (helpfile, item)
5563 char * helpfile;
5564 char * item;
5565 {
5566 char searchstring [maxPROMPTlen];
5567
5568 /* unless already viewing help, save edited text */
5569 if (viewing_help == False) {
5570 if (modified) {
5571 if (write_text () != FINE) {
5572 return;
5573 }
5574 }
5575
5576 /* save current position */
5577 save_cur_line = line_number;
5578 save_cur_pos = get_cur_pos ();
5579
5580 /* save editing mode and file name */
5581 save_restricted = restricted;
5582 copy_string (save_file_name, file_name);
5583
5584 /* set mode appropriate for viewing online help */
5585 viewonly_err = True;
5586 restricted = True;
5587 viewing_help = True;
5588
5589 /* load online help file */
5590 (void) load_file_position (helpfile, True, False, True, -1, 0);
5591 }
5592
5593 /* position to selected help topic */
5594 BFILE ();
5595 build_string (searchstring, "mined help topic '%s'", item);
5596 search_for (searchstring, FORWARD, True);
5597 }
5598
5599 #ifdef viewing_help_within_session
5600 #warning leaving help not properly implemented
5601 static
5602 void
end_view_help()5603 end_view_help ()
5604 {
5605 restricted = save_restricted;
5606 viewing_help = False;
5607
5608 (void) load_file_position (save_file_name, True, False, True, save_cur_line, save_cur_pos);
5609 }
5610 #endif
5611
5612 /**
5613 final clean-up (temp. files, terminal), exit
5614 no return from here!
5615 */
5616 static
5617 void
quit_mined()5618 quit_mined ()
5619 {
5620 #ifdef viewing_help_within_session
5621 if (viewing_help) {
5622 end_view_help ();
5623 return;
5624 }
5625 #endif
5626
5627 /* Remove file lock (if any) */
5628 unlock_file ();
5629
5630 delete_yank_files ();
5631
5632 clear_status ();
5633 set_cursor (0, YMAX);
5634 putchar ('\n');
5635 #ifdef unix
5636 clear_window_title ();
5637 #endif
5638 #ifdef msdos
5639 clear_screen ();
5640 #endif
5641 flush ();
5642
5643 /* avoid double raw_mode (False) after closing pipe */
5644 if (isscreenmode) {
5645 raw_mode (False);
5646 set_cursor (0, YMAX);
5647 flush ();
5648 }
5649
5650 debuglog (0, 0, "close");
5651 exit (0);
5652 }
5653
5654
5655 /*======================================================================*\
5656 |* File selector *|
5657 \*======================================================================*/
5658
5659 struct fileentry {
5660 struct fileentry * prev;
5661 struct fileentry * next;
5662 char * fn;
5663 short line;
5664 int left;
5665 int right;
5666 };
5667
5668 static struct fileentry * filelist = 0;
5669
5670 int
filelist_count()5671 filelist_count ()
5672 {
5673 struct fileentry * fl = filelist;
5674 int i = 0;
5675 while (fl) {
5676 fl = fl->next;
5677 i ++;
5678 }
5679 return i;
5680 }
5681
5682 static struct fileentry * last_fl = 0;
5683
5684 /**
5685 Get i'th filename from File selector list.
5686 */
5687 char *
filelist_get(i)5688 filelist_get (i)
5689 int i;
5690 {
5691 struct fileentry * fl = filelist;
5692 while (i > 0 && fl) {
5693 fl = fl->next;
5694 i --;
5695 }
5696 last_fl = fl;
5697 if (fl) {
5698 return fl->fn;
5699 } else {
5700 return NIL_PTR;
5701 }
5702 }
5703
5704 /**
5705 Set screen coordinates into last delivered file entry.
5706 */
5707 void
filelist_set_coord(line,left,right)5708 filelist_set_coord (line, left, right)
5709 short line;
5710 int left;
5711 int right;
5712 {
5713 if (last_fl) {
5714 last_fl->line = line;
5715 last_fl->left = left;
5716 last_fl->right = right;
5717 }
5718 }
5719
5720 /**
5721 Search filename by screen (mouse) coordinates.
5722 */
5723 char *
filelist_search(line,col)5724 filelist_search (line, col)
5725 short line;
5726 int col;
5727 {
5728 struct fileentry * fl = filelist;
5729 while (fl) {
5730 if (fl->line == line && fl->left <= col && fl->right > col) {
5731 break;
5732 }
5733 fl = fl->next;
5734 }
5735 if (fl) {
5736 return fl->fn;
5737 } else {
5738 return NIL_PTR;
5739 }
5740 }
5741
5742 static
5743 void
filelist_append(flpoi,fn,allowdups,versionbase,prevfl)5744 filelist_append (flpoi, fn, allowdups, versionbase, prevfl)
5745 struct fileentry * * flpoi;
5746 char * fn;
5747 FLAG allowdups;
5748 char * versionbase;
5749 struct fileentry * prevfl;
5750 {
5751 if (* flpoi) {
5752 /* suppress subsequent backup/version names
5753 as generated by command line filename completion
5754 */
5755 if (versionbase && streq ((* flpoi)->fn, versionbase)) {
5756 return;
5757 }
5758
5759 if (allowdups || ! streq ((* flpoi)->fn, fn)) {
5760 filelist_append (& ((* flpoi)->next), fn, allowdups, versionbase, * flpoi);
5761 }
5762 } else {
5763 * flpoi = alloc (sizeof (struct fileentry));
5764 if (* flpoi) {
5765 top_line_dirty = True;
5766 (* flpoi)->fn = fn;
5767 (* flpoi)->line = 0;
5768 (* flpoi)->left = 0;
5769 (* flpoi)->right = 0;
5770 (* flpoi)->next = 0;
5771 (* flpoi)->prev = prevfl;
5772 }
5773 }
5774 }
5775
5776 static
5777 char *
filelist_delete_next(flpoi,fn)5778 filelist_delete_next (flpoi, fn)
5779 struct fileentry * * flpoi;
5780 char * fn;
5781 {
5782 if (* flpoi) {
5783 if (streq ((* flpoi)->fn, fn)) {
5784 top_line_dirty = True;
5785 * flpoi = (* flpoi)->next;
5786 if (* flpoi && (* flpoi)->prev) {
5787 (* flpoi)->prev = (* flpoi)->prev->prev;
5788 }
5789 if (* flpoi) {
5790 return (* flpoi)->fn;
5791 } else {
5792 return NIL_PTR;
5793 }
5794 } else {
5795 return filelist_delete_next (& ((* flpoi)->next), fn);
5796 }
5797 } else {
5798 return NIL_PTR;
5799 }
5800 }
5801
5802 static
5803 char *
filelist_next(fl,fn)5804 filelist_next (fl, fn)
5805 struct fileentry * fl;
5806 char * fn;
5807 {
5808 if (fl) {
5809 if (streq (fl->fn, fn)) {
5810 if (fl->next) {
5811 return fl->next->fn;
5812 } else {
5813 return NIL_PTR;
5814 }
5815 } else {
5816 return filelist_next (fl->next, fn);
5817 }
5818 } else {
5819 return NIL_PTR;
5820 }
5821 }
5822
5823
5824 static
5825 char *
filelist_prev(fl,fn)5826 filelist_prev (fl, fn)
5827 struct fileentry * fl;
5828 char * fn;
5829 {
5830 if (fl) {
5831 if (streq (fl->fn, fn)) {
5832 if (fl->prev) {
5833 return fl->prev->fn;
5834 } else {
5835 return NIL_PTR;
5836 }
5837 } else {
5838 return filelist_prev (fl->next, fn);
5839 }
5840 } else {
5841 return NIL_PTR;
5842 }
5843 }
5844
5845 static
5846 char *
backup_suffix(fn)5847 backup_suffix (fn)
5848 char * fn;
5849 {
5850 char * suffix = fn + strlen (fn) - 1;
5851 if (suffix >= fn && * suffix == '~') {
5852 char * suffe = suffix - 1;
5853 /* check emacs style numbered backup file name x.~N~*/
5854 while (suffe > fn && * suffe >= '0' && * suffe <= '9') {
5855 suffe --;
5856 }
5857 if (suffe < suffix - 1 && * suffe == '~') {
5858 suffe --;
5859 if (suffe >= fn && * suffe == '.') {
5860 return suffe;
5861 }
5862 }
5863 /* simple backup file name x~ */
5864 return suffix;
5865 } else {
5866 /* check VMS style numbered backup file name x;N */
5867 suffix = strrchr (fn, ';');
5868 if (suffix != NIL_PTR) {
5869 int ver = -1;
5870 char * afterver;
5871 suffix ++;
5872 afterver = scan_int (suffix, & ver);
5873 if (ver > 0 && * afterver == '\0') {
5874 suffix --;
5875 return suffix;
5876 }
5877 }
5878 }
5879 return NIL_PTR;
5880 }
5881
5882 /**
5883 Add filename to File selector list.
5884 String must not be volatile.
5885 */
5886 void
filelist_add(fn,allowdups)5887 filelist_add (fn, allowdups)
5888 char * fn;
5889 FLAG allowdups;
5890 {
5891 if (fn) {
5892 char * bs = backup_suffix (fn);
5893 if (allowdups && (bs != NIL_PTR)) {
5894 /* suppress subsequent backup/version names
5895 as generated by command line filename completion;
5896 could be separate parameter but correlates with allowdups
5897 */
5898 char basename [maxFILENAMElen];
5899 strcpy (basename, fn);
5900 basename [bs - fn] = '\0'; /* strip version suffix */
5901 filelist_append (& filelist, fn, allowdups, basename, 0);
5902 } else {
5903 filelist_append (& filelist, fn, allowdups, NIL_PTR, 0);
5904 }
5905 }
5906 }
5907
5908 static
5909 int
select_file()5910 select_file ()
5911 {
5912 int fi = 0;
5913 struct fileentry * fl = filelist;
5914 menuitemtype * filemenu;
5915
5916 if (! filelist) {
5917 error ("No files opened");
5918 return ERRORS;
5919 }
5920
5921 /* allocate menu structure */
5922 filemenu = alloc (filelist_count () * sizeof (menuitemtype));
5923 if (! filemenu) {
5924 error ("Cannot allocate memory for file menu");
5925 return ERRORS;
5926 }
5927
5928 while (fl) {
5929 fill_menuitem (& filemenu [fi ++], fl->fn, NIL_PTR);
5930 fl = fl->next;
5931 }
5932 hop_flag = 0;
5933 fi = popup_menu (filemenu, filelist_count (), 0, 4, "Switch to file", True, False, "*");
5934 if (fi < 0) {
5935 return ERRORS;
5936 }
5937
5938 #ifdef keep_position_on_reload
5939 if (streq (file_name, filemenu [fi].itemname)) {
5940 (void) load_file_position (file_name, False, False, True,
5941 line_number, get_cur_pos ());
5942 } else
5943 #endif
5944 {
5945 Pushmark ();
5946 load_wild_file (filemenu [fi].itemname, False, True);
5947 }
5948
5949 return FINE;
5950 }
5951
5952
5953 void
SELECTFILE()5954 SELECTFILE ()
5955 {
5956 if (modified && ! viewonly) {
5957 #ifdef auto_save
5958 if (write_text () == ERRORS) {
5959 return;
5960 }
5961 #else
5962 if (ask_save () != FINE) {
5963 return;
5964 }
5965 #endif
5966 }
5967
5968 (void) select_file ();
5969 }
5970
5971 void
CLOSEFILE()5972 CLOSEFILE ()
5973 {
5974 char * nextfn;
5975
5976 if (modified && ! viewonly) {
5977 #ifdef auto_save
5978 if (write_text () == ERRORS) {
5979 return;
5980 }
5981 #else
5982 if (ask_save () != FINE) {
5983 return;
5984 }
5985 #endif
5986 }
5987
5988 nextfn = filelist_delete_next (& filelist, file_name);
5989 Pushmark ();
5990 load_wild_file (nextfn, False, True);
5991 }
5992
5993 static
5994 FLAG
nextfile()5995 nextfile ()
5996 {
5997 char * nextfn = filelist_next (filelist, file_name);
5998 if (nextfn) {
5999 restore_screenmode ();
6000 Pushmark ();
6001 load_wild_file (nextfn, False, True);
6002 return True;
6003 } else {
6004 return False;
6005 }
6006 }
6007
6008 static
6009 void
edit_this_file(fn)6010 edit_this_file (fn)
6011 char * fn;
6012 {
6013 if (modified && ! viewonly) {
6014 #ifdef auto_save
6015 if (write_text () == ERRORS) {
6016 return;
6017 }
6018 #else
6019 if (ask_save () != FINE) {
6020 return;
6021 }
6022 #endif
6023 }
6024
6025 Pushmark ();
6026 load_wild_file (fn, False, True);
6027 }
6028
6029 void
NXTFILE()6030 NXTFILE ()
6031 {
6032 char * nextfn;
6033 if (hop_flag > 0) {
6034 nextfn = filelist_get (filelist_count () - 1);
6035 } else {
6036 nextfn = filelist_next (filelist, file_name);
6037 }
6038
6039 if (nextfn) {
6040 edit_this_file (nextfn);
6041 } else {
6042 error ("Already at last file");
6043 }
6044 }
6045
6046 void
PRVFILE()6047 PRVFILE ()
6048 {
6049 char * prevfn;
6050 if (hop_flag > 0) {
6051 prevfn = filelist_get (0);
6052 } else {
6053 prevfn = filelist_prev (filelist, file_name);
6054 }
6055
6056 if (prevfn) {
6057 edit_this_file (prevfn);
6058 } else {
6059 error ("Already at first file");
6060 }
6061 }
6062
6063 void
edit_nth_file(n)6064 edit_nth_file (n)
6065 int n;
6066 {
6067 char * fn = n > 0 ? filelist_get (n - 1) : NIL_PTR;
6068
6069 if (fn) {
6070 edit_this_file (fn);
6071 } else {
6072 error ("No such file");
6073 }
6074 }
6075
6076
6077 /*======================================================================*\
6078 |* Tag search with file change *|
6079 \*======================================================================*/
6080
6081 static
6082 int
get_tagline(idf,filename,search)6083 get_tagline (idf, filename, search)
6084 char * idf;
6085 char * filename;
6086 char * search;
6087 {
6088 int tags_fd = open ("tags", O_RDONLY | O_BINARY, 0);
6089 if (tags_fd >= 0) {
6090 FLAG found = False;
6091 int dumlen;
6092 FLAG modif = modified;
6093 unsigned int len = strlen (idf);
6094
6095 reset_get_line (False);
6096 flush (); /* obsolete?! clear the shared screen/get_line buffer! */
6097 while (/*found != VALID &&*/
6098 line_gotten (get_line (tags_fd, text_buffer, & dumlen, False)))
6099 {
6100 if (strncmp (idf, text_buffer, len) == 0 && text_buffer [len] == '\t') {
6101 char * poi = text_buffer + len + 1;
6102 char * outpoi;
6103 char lastpat = '\0';
6104
6105 found = True;
6106
6107 outpoi = filename;
6108 while (* poi != '\0' && * poi != '\t') {
6109 * outpoi ++ = * poi ++;
6110 }
6111 * outpoi = '\0';
6112
6113 outpoi = search;
6114 poi ++;
6115 if (* poi == '/') {
6116 poi ++;
6117 }
6118 while (* poi != '\0' && (* poi != '/' || lastpat == '\\')) {
6119 if (* poi == '[' || * poi == ']' || * poi == '*') {
6120 * outpoi ++ = '\\';
6121 }
6122 lastpat = * poi ++;
6123 * outpoi ++ = lastpat;
6124 }
6125 * outpoi = '\0';
6126 } else if (found == True) {
6127 found = VALID;
6128 }
6129 }
6130 (void) close (tags_fd);
6131 clear_filebuf ();
6132
6133 modified = modif; /* don't let the tags file affect the modified flag */
6134
6135 if (found == False) {
6136 error2 ("Identifier not found in tags file: ", idf);
6137 return ERRORS;
6138 } else {
6139 return FINE;
6140 }
6141 } else {
6142 error ("No tags file present; apply the ctags command to your source files");
6143 return ERRORS;
6144 }
6145 }
6146
6147 /*
6148 * Stag () opens file and moves to idf, using tags file
6149 */
6150 void
Stag()6151 Stag ()
6152 {
6153 char idf_buf [maxLINElen]; /* identifier to search for */
6154 char new_file [maxFILENAMElen]; /* new file name */
6155 char search [maxLINElen]; /* search expression */
6156 FLAG go_idf = True;
6157
6158 if (hop_flag > 0) {
6159 if (get_string ("Enter identifier (to locate definition):", idf_buf, True, "") != FINE) {
6160 return;
6161 }
6162 } else if (cur_text == cur_line->text &&
6163 (* cur_text == '#' || strisprefix ("include", cur_text))) {
6164 char * cp = cur_text;
6165 if (* cp == '#') {
6166 cp ++;
6167 }
6168 while (white_space (* cp)) {
6169 cp ++;
6170 }
6171 if (strisprefix ("include", cp)) {
6172 char * ep = NIL_PTR;
6173 cp += 7;
6174 while (white_space (* cp)) {
6175 cp ++;
6176 }
6177 if (* cp == '"') {
6178 cp ++;
6179 ep = strchr (cp, '"');
6180 strcpy (new_file, "");
6181 } else if (* cp == '<') {
6182 cp ++;
6183 ep = strchr (cp, '>');
6184 strcpy (new_file, "/usr/include/");
6185 } else {
6186 ep = strchr (cp, '\n');
6187 strcpy (new_file, "");
6188 }
6189 if (ep && ep - cp < maxFILENAMElen - strlen (new_file)) {
6190 strncat (new_file, cp, ep - cp);
6191 strcpy (search, "");
6192 go_idf = False;
6193 } else {
6194 error ("No include file name");
6195 return;
6196 }
6197 }
6198 } else {
6199 if (get_idf (idf_buf, cur_text, cur_line->text) == ERRORS) {
6200 return;
6201 }
6202 }
6203
6204 if (go_idf) {
6205 if (get_tagline (idf_buf, new_file, search) == ERRORS) {
6206 return;
6207 }
6208 }
6209
6210 Pushmark ();
6211
6212 if (! streq (new_file, file_name)) {
6213 FLAG save_lineends_detectCR = lineends_detectCR;
6214 /* force line counting compatible with ctags */
6215 lineends_detectCR = True;
6216 if (save_text_load_file (new_file) == ERRORS) {
6217 lineends_detectCR = save_lineends_detectCR;
6218 return;
6219 }
6220 }
6221
6222 if (* search >= '0' && * search <= '9') {
6223 int lineno;
6224 LINE * line = header->next;
6225
6226 (void) scan_int (search, & lineno);
6227
6228 /* don't call goline for two reaons:
6229 line # mismatch in presence of Mac or Unicode line ends
6230 don't call Pushmark
6231 */
6232 while (lineno > 1 && line != tail) {
6233 if (line->return_type == lineend_LF
6234 || line->return_type == lineend_CR
6235 || line->return_type == lineend_CRLF) {
6236 lineno --;
6237 }
6238 line = line->next;
6239 }
6240 clear_status ();
6241 move_y (find_y (line));
6242 } else {
6243 search_for (search, FORWARD, False);
6244 }
6245 }
6246
6247
6248 /*======================================================================*\
6249 |* Checkin/out *|
6250 \*======================================================================*/
6251
6252 /*
6253 * Checkout (from version managing system).
6254 */
6255 void
checkout()6256 checkout ()
6257 {
6258 int save_cur_pos;
6259 int save_cur_line;
6260 char syscommand [maxCMDlen]; /* Buffer for full system command */
6261 int sysres;
6262
6263 if (modified) {
6264 if (write_text () != FINE) {
6265 return;
6266 }
6267 }
6268
6269 /* save current position */
6270 save_cur_line = line_number;
6271 save_cur_pos = get_cur_pos ();
6272
6273 /* try to check out */
6274 build_string (syscommand, "co %s", file_name);
6275 sysres = systemcall (NIL_PTR, syscommand, 1);
6276 RDwin ();
6277 if (sysres != 0) {
6278 error ("Checkout failed");
6279 }
6280
6281 /* reload file */
6282 (void) load_file_position (file_name, True, False, True, save_cur_line, save_cur_pos);
6283 }
6284
6285 /*
6286 * Checkin (to version managing system).
6287 */
6288 void
checkin()6289 checkin ()
6290 {
6291 char syscommand [maxCMDlen]; /* Buffer for full system command */
6292 int sysres;
6293
6294 if (modified) {
6295 if (write_text () != FINE) {
6296 return;
6297 }
6298 }
6299
6300 /* try to check in */
6301 build_string (syscommand, "ci %s", file_name);
6302 sysres = systemcall (NIL_PTR, syscommand, 1);
6303 RDwin ();
6304 if (sysres != 0) {
6305 error ("Checkin failed");
6306 }
6307 }
6308
6309
6310 /*======================================================================*\
6311 |* Exiting *|
6312 \*======================================================================*/
6313
6314 /*
6315 * Leave editor. If the file has changed, ask if the user wants to save it.
6316 */
6317 void
QUED()6318 QUED ()
6319 {
6320 if (modified && viewonly == False && ask_save_defer_screenmode () != FINE) {
6321 restore_screenmode ();
6322 return;
6323 }
6324
6325 quit_mined ();
6326 restore_screenmode ();
6327 }
6328
6329 /*
6330 * Exit editing current file. If the file has changed, save it.
6331 * Edit next file if there is one.
6332 */
6333 void
EXFILE()6334 EXFILE ()
6335 {
6336 if (modified) {
6337 if (write_text_defer_screenmode () != FINE) {
6338 restore_screenmode ();
6339 return;
6340 }
6341 }
6342
6343 if (hop_flag == 0) {
6344 if (! nextfile ()) {
6345 quit_mined ();
6346 restore_screenmode ();
6347 }
6348 } else {
6349 quit_mined ();
6350 restore_screenmode ();
6351 }
6352 }
6353
6354 /*
6355 * Exit editor. If the file has changed, save it.
6356 */
6357 void
EXMINED()6358 EXMINED ()
6359 {
6360 if (modified) {
6361 if (write_text_defer_screenmode () != FINE) {
6362 restore_screenmode ();
6363 return;
6364 }
6365 }
6366
6367 quit_mined ();
6368 restore_screenmode ();
6369 }
6370
6371 /*
6372 * Exit editing current file.
6373 Either switch to next file or exit editor.
6374 */
6375 void
EXED()6376 EXED ()
6377 {
6378 if (multiexit) {
6379 EXFILE ();
6380 } else {
6381 EXMINED ();
6382 }
6383 }
6384
6385
6386 /*======================================================================*\
6387 |* End *|
6388 \*======================================================================*/
6389