xref: /dragonfly/contrib/file/magic/Magdir/rtf (revision 7d3e9a5b)
1
2#------------------------------------------------------------------------------
3# $File: rtf,v 1.9 2020/12/12 20:01:47 christos Exp $
4# rtf:	file(1) magic for Rich Text Format (RTF)
5#
6# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk
7# Update:	Joerg Jenderek
8# URL:		https://en.wikipedia.org/wiki/Rich_Text_Format
9# Reference:	http://www.snake.net/software/RTF/RTF-Spec-1.7.rtf
10#		http://www.kleinlercher.at/tools/Windows_Protocols/Word2007RTFSpec9.pdf
110	string		{\\rtf
12# skip DROID fmt-355-signature-id-522.rtf by looking for valid version
13>5	ubyte		!0xAB
14# skip also \ in DROID fmt-50-signature-id-158.rtf by looking for valid version
15>>5	ubyte		!0x5C		Rich Text Format data
16!:mime	text/rtf
17!:apple	????RTF
18!:ext	rtf
19>>>0	use		rtf-info
20#	display information like version, language and code page of RTF
210	name		rtf-info
22# 1 mostly, 2 for newer Pocket Word documents, space for test like fdo78502.rtf, { for some urtf
23>5	ubyte		!0x7b		\b, version %c
24# The word for character set must precede any text or most other control words
25>6	string		\\mac		\b, Apple Macintosh
26>6	string		\\pc
27# control word \pca
28>>9	ubyte		=0x61		\b, IBM PS/2, code page 850
29>>9	ubyte		!0x61		\b, IBM PC, code page 437
30# unknown character set or ANSI later after control words like
31# \adeflang1025 \info \title \author \category \manager
32# "Burow, Steffanie - Im Tal des Schneeleoparden.rtf"
33#>6	search/105	\\ansi		\b, ANSI
34>6	search/502	\\ansi		\b, ANSI
35>6	default		x		\b, unknown character set
36# look for explicit codepage keyword
37# "Burow, Steffanie - Im Tal des Schneeleoparden.rtf"
38#>5	search/110	\\ansicpg
39>5	search/500	\\ansicpg
40# skip unknown or buggy codepage string 0 like in fdo78502.rtf
41>>&0	ubyte		!0x30		\b, code page
42# codepage string: 437~United States IBM, ..., 1252~WesternEuropean, ..., 57011~Punjabi
43>>>&-1		string	x		%-.3s
44# skip space or \ and display possible 4th digit of code page string
45>>>&2		ubyte	>0x2F
46>>>>&-1		ubyte	<0x3A		\b%c
47# possible 5th digit of code page string
48>>>>>&0		ubyte	>0x2F
49>>>>>>&-1	ubyte	<0x3A		\b%c
50# look again at version byte to use default clause
51>5	ubyte		x
52# Default language ID for South Asian/Middle Eastern text
53# language ID: 1025, ..., 1065~Persian, ..., 2057~English_UnitedKingdom, ..., 58380~French_NorthAfrica
54# Readme-0.72-Persian.rtf
55#>6	search/1	\\adeflang	\b, default middle east language ID
56>>6	search/497	\\adeflang	\b, default middle east language ID
57# https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/6c085406-a698-4e12-9d4d-c3b0ee3dbc4a
58>>>&0	string		x		%.4s
59# skip \ and NL and show possible 5th digit of language string
60>>>&4	ubyte		>0x2F
61>>>>&-1	ubyte		<0x3A		\b%c
62# else look for default language to be used when the \plain control word is encountered
63>>6	default		x
64# "Burow, Steffanie - Im Tal des Schneeleoparden.rtf"
65#>>>6	search/127	\\deflang
66>>>6	search/505	\\deflang
67>>>>&0	string		>0		\b, default language ID %-.4s
68# possible 5th digit of language string
69>>>>&4		ubyte	>0x2F
70>>>>>&-1	ubyte	<0x3A		\b%c
71
72# Reference:	http://latex2rtf.sourceforge.net/rtfspec_63.html
73# Note:		no real world example found
740	string		{\\urtf		Rich Text Format unicoded data
75!:mime	text/rtf
76#!:apple	????RTF
77!:ext	rtf
78>1	use		rtf-info
79
80# URL:		https://en.wikipedia.org/wiki/Microsoft_Word
81# Reference:	http://fileformats.archiveteam.org/wiki/Microsoft_Word
82# Note:	called by TrID "Pocket Word document"
83#	by PlanMaker "Pocket Word-Handheld PC" for pwd
84#	by PlanMaker "Pocket Word-Pocket PC" for psw
850	string		{\\pwd		Pocket Word document or template
86# by SoftMaker Office	http://extension.nirsoft.net/pwd
87#!:mime	application/msword
88# https://reposcope.com/mimetype/application/x-pocket-word
89!:mime	application/x-pocket-word
90# PWD for Handheld PC variant and PSW for Pocket PC variant
91# PWT for template
92!:ext	pwd/psw/pwt
93>0	use		rtf-info
94
95