1 /* fsys_reiserfs.c - an implementation for the ReiserFS filesystem */
2 /*
3  *  GRUB  --  GRand Unified Bootloader
4  *  Copyright (C) 2000, 2001  Free Software Foundation, Inc.
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
19  *  MA 02110-1301, USA.
20  */
21 
22 #ifdef FSYS_REISERFS
23 #include "shared.h"
24 #include "filesys.h"
25 
26 #undef REISERDEBUG
27 
28 /* Some parts of this code (mainly the structures and defines) are
29  * from the original reiser fs code, as found in the linux kernel.
30  */
31 
32 /* include/asm-i386/types.h */
33 typedef __signed__ char __s8;
34 typedef unsigned char __u8;
35 typedef __signed__ short __s16;
36 typedef unsigned short __u16;
37 typedef __signed__ int __s32;
38 typedef unsigned int __u32;
39 typedef unsigned long long __u64;
40 
41 /* linux/posix_type.h */
42 typedef long linux_off_t;
43 
44 #include "libc/byteorder.h"
45 
46 /* include/linux/reiser_fs.h */
47 /* This is the new super block of a journaling reiserfs system */
48 struct reiserfs_super_block
49 {
50   __u32 s_block_count;			/* blocks count         */
51   __u32 s_free_blocks;                  /* free blocks count    */
52   __u32 s_root_block;           	/* root block number    */
53   __u32 s_journal_block;           	/* journal block number    */
54   __u32 s_journal_dev;           	/* journal device number  */
55   __u32 s_journal_size; 		/* size of the journal on FS creation.  used to make sure they don't overflow it */
56   __u32 s_journal_trans_max;            /* max number of blocks in a transaction.  */
57   __u32 s_journal_magic;                /* random value made on fs creation */
58   __u32 s_journal_max_batch;            /* max number of blocks to batch into a trans */
59   __u32 s_journal_max_commit_age;       /* in seconds, how old can an async commit be */
60   __u32 s_journal_max_trans_age;        /* in seconds, how old can a transaction be */
61   __u16 s_blocksize;                   	/* block size           */
62   __u16 s_oid_maxsize;			/* max size of object id array  */
63   __u16 s_oid_cursize;			/* current size of object id array */
64   __u16 s_state;                       	/* valid or error       */
65   char s_magic[16];                     /* reiserfs magic string indicates that file system is reiserfs */
66   __u16 s_tree_height;                  /* height of disk tree */
67   __u16 s_bmap_nr;                      /* amount of bitmap blocks needed to address each block of file system */
68   __u16 s_version;
69   char s_unused[128];			/* zero filled by mkreiserfs */
70 };
71 
72 #define REISERFS_MAX_SUPPORTED_VERSION 2
73 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
74 #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
75 
76 #define MAX_HEIGHT 7
77 
78 /* must be correct to keep the desc and commit structs at 4k */
79 #define JOURNAL_TRANS_HALF 1018
80 
81 /* first block written in a commit.  */
82 struct reiserfs_journal_desc {
83   __u32 j_trans_id;			/* id of commit */
84   __u32 j_len;				/* length of commit. len +1 is the commit block */
85   __u32 j_mount_id;			/* mount id of this trans*/
86   __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the first blocks */
87   char j_magic[12];
88 };
89 
90 /* last block written in a commit */
91 struct reiserfs_journal_commit {
92   __u32 j_trans_id;			/* must match j_trans_id from the desc block */
93   __u32 j_len;			/* ditto */
94   __u32 j_realblock[JOURNAL_TRANS_HALF]; /* real locations for the last blocks */
95   char j_digest[16];			/* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
96 };
97 
98 /* this header block gets written whenever a transaction is considered
99    fully flushed, and is more recent than the last fully flushed
100    transaction.
101    fully flushed means all the log blocks and all the real blocks are
102    on disk, and this transaction does not need to be replayed.
103 */
104 struct reiserfs_journal_header {
105   /* id of last fully flushed transaction */
106   __u32 j_last_flush_trans_id;
107   /* offset in the log of where to start replay after a crash */
108   __u32 j_first_unflushed_offset;
109   /* mount id to detect very old transactions */
110   __u32 j_mount_id;
111 };
112 
113 /* magic string to find desc blocks in the journal */
114 #define JOURNAL_DESC_MAGIC "ReIsErLB"
115 
116 
117 /*
118  * directories use this key as well as old files
119  */
120 struct offset_v1
121 {
122   /*
123    * for regular files this is the offset to the first byte of the
124    * body, contained in the object-item, as measured from the start of
125    * the entire body of the object.
126    *
127    * for directory entries, k_offset consists of hash derived from
128    * hashing the name and using few bits (23 or more) of the resulting
129    * hash, and generation number that allows distinguishing names with
130    * hash collisions. If number of collisions overflows generation
131    * number, we return EEXIST.  High order bit is 0 always
132    */
133   __u32 k_offset;
134   __u32 k_uniqueness;
135 };
136 
137 struct offset_v2
138 {
139   /*
140    * for regular files this is the offset to the first byte of the
141    * body, contained in the object-item, as measured from the start of
142    * the entire body of the object.
143    *
144    * for directory entries, k_offset consists of hash derived from
145    * hashing the name and using few bits (23 or more) of the resulting
146    * hash, and generation number that allows distinguishing names with
147    * hash collisions. If number of collisions overflows generation
148    * number, we return EEXIST.  High order bit is 0 always
149    */
150   __u64 k_offset:60;
151   __u64 k_type: 4;
152 };
153 
154 
155 struct key
156 {
157   /* packing locality: by default parent directory object id */
158   __u32 k_dir_id;
159   /* object identifier */
160   __u32 k_objectid;
161   /* the offset and node type (old and new form) */
162   union
163   {
164     struct offset_v1 v1;
165     struct offset_v2 v2;
166   }
167   u;
168 };
169 
170 #define KEY_SIZE (sizeof (struct key))
171 
172 /* Header of a disk block.  More precisely, header of a formatted leaf
173    or internal node, and not the header of an unformatted node. */
174 struct block_head
175 {
176   __u16 blk_level;        /* Level of a block in the tree. */
177   __u16 blk_nr_item;      /* Number of keys/items in a block. */
178   __u16 blk_free_space;   /* Block free space in bytes. */
179   struct key  blk_right_delim_key; /* Right delimiting key for this block (supported for leaf level nodes
180 				      only) */
181 };
182 #define BLKH_SIZE (sizeof (struct block_head))
183 #define DISK_LEAF_NODE_LEVEL  1 /* Leaf node level.                       */
184 
185 struct item_head
186 {
187   struct key ih_key; 	/* Everything in the tree is found by searching for it based on its key.*/
188 
189   union
190   {
191     __u16 ih_free_space; /* The free space in the last unformatted node of an indirect item if this
192 			    is an indirect item.  This equals 0xFFFF iff this is a direct item or
193 			    stat data item. Note that the key, not this field, is used to determine
194 			    the item type, and thus which field this union contains. */
195     __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory
196 			     entries in the directory item. */
197   }
198   u;
199   __u16 ih_item_len;           /* total size of the item body                  */
200   __u16 ih_item_location;      /* an offset to the item body within the block  */
201   __u16 ih_version;	       /* ITEM_VERSION_1 for all old items,
202 				  ITEM_VERSION_2 for new ones.
203 				  Highest bit is set by fsck
204                                   temporary, cleaned after all done */
205 };
206 /* size of item header     */
207 #define IH_SIZE (sizeof (struct item_head))
208 
209 #define ITEM_VERSION_1 0
210 #define ITEM_VERSION_2 1
211 #define IH_KEY_OFFSET(ih) ((ih)->ih_version == ITEM_VERSION_1 \
212 			   ? (ih)->ih_key.u.v1.k_offset \
213 			   : (ih)->ih_key.u.v2.k_offset)
214 
215 #define IH_KEY_ISTYPE(ih, type) ((ih)->ih_version == ITEM_VERSION_1 \
216 				 ? (ih)->ih_key.u.v1.k_uniqueness == V1_##type \
217 				 : (ih)->ih_key.u.v2.k_type == V2_##type)
218 
219 /* FIXME these types look wrong. */
220 struct disk_child
221 {
222   unsigned long       dc_block_number;              /* Disk child's block number. */
223   unsigned short      dc_size;		            /* Disk child's used space.   */
224 };
225 
226 #define DC_SIZE (sizeof (struct disk_child))
227 
228 /* Stat Data on disk.
229  *
230  * Note that reiserfs has two different forms of stat data.  Luckily
231  * the fields needed by grub are at the same position.
232  */
233 struct stat_data
234 {
235   __u16 sd_mode;	/* file type, permissions */
236   __u16 sd_notused1[3]; /* fields not needed by reiserfs */
237   __u32 sd_size;	/* file size */
238   __u32 sd_size_hi;	/* file size high 32 bits (since version 2) */
239 };
240 
241 struct reiserfs_de_head
242 {
243   __u32 deh_offset;  /* third component of the directory entry key */
244   __u32 deh_dir_id;  /* objectid of the parent directory of the
245 			object, that is referenced by directory entry */
246   __u32 deh_objectid;/* objectid of the object, that is referenced by
247                         directory entry */
248   __u16 deh_location;/* offset of name in the whole item */
249   __u16 deh_state;   /* whether 1) entry contains stat data (for
250 			future), and 2) whether entry is hidden
251 			(unlinked) */
252 };
253 
254 #define DEH_SIZE (sizeof (struct reiserfs_de_head))
255 
256 #define DEH_Statdata (1 << 0)			/* not used now */
257 #define DEH_Visible  (1 << 2)
258 
259 #define SD_OFFSET  0
260 #define SD_UNIQUENESS 0
261 #define DOT_OFFSET 1
262 #define DOT_DOT_OFFSET 2
263 #define DIRENTRY_UNIQUENESS 500
264 
265 #define V1_TYPE_STAT_DATA 0x0
266 #define V1_TYPE_DIRECT 0xffffffff
267 #define V1_TYPE_INDIRECT 0xfffffffe
268 #define V1_TYPE_DIRECTORY_MAX 0xfffffffd
269 #define V2_TYPE_STAT_DATA 0
270 #define V2_TYPE_INDIRECT 1
271 #define V2_TYPE_DIRECT 2
272 #define V2_TYPE_DIRENTRY 3
273 
274 #define REISERFS_ROOT_OBJECTID 2
275 #define REISERFS_ROOT_PARENT_OBJECTID 1
276 #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
277 /* the spot for the super in versions 3.5 - 3.5.11 (inclusive) */
278 #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
279 #define REISERFS_OLD_BLOCKSIZE 4096
280 
281 #define S_ISREG(mode) (((mode) & 0170000) == 0100000)
282 #define S_ISDIR(mode) (((mode) & 0170000) == 0040000)
283 #define S_ISLNK(mode) (((mode) & 0170000) == 0120000)
284 
285 #define PATH_MAX       1024	/* include/linux/limits.h */
286 #define MAX_LINK_COUNT    5	/* number of symbolic links to follow */
287 
288 /* The size of the node cache */
289 #define FSYSREISER_CACHE_SIZE 24*1024
290 #define FSYSREISER_MIN_BLOCKSIZE SECTOR_SIZE
291 #define FSYSREISER_MAX_BLOCKSIZE FSYSREISER_CACHE_SIZE / 3
292 
293 /* Info about currently opened file */
294 struct fsys_reiser_fileinfo
295 {
296   __u32 k_dir_id;
297   __u32 k_objectid;
298 };
299 
300 /* In memory info about the currently mounted filesystem */
301 struct fsys_reiser_info
302 {
303   /* The last read item head */
304   struct item_head *current_ih;
305   /* The last read item */
306   char *current_item;
307   /* The information for the currently opened file */
308   struct fsys_reiser_fileinfo fileinfo;
309   /* The start of the journal */
310   __u32 journal_block;
311   /* The size of the journal */
312   __u32 journal_block_count;
313   /* The first valid descriptor block in journal
314      (relative to journal_block) */
315   __u32 journal_first_desc;
316 
317   /* The ReiserFS version. */
318   __u16 version;
319   /* The current depth of the reiser tree. */
320   __u16 tree_depth;
321   /* SECTOR_SIZE << blocksize_shift == blocksize. */
322   __u8  blocksize_shift;
323   /* 1 << full_blocksize_shift == blocksize. */
324   __u8  fullblocksize_shift;
325   /* The reiserfs block size  (must be a power of 2) */
326   __u16 blocksize;
327   /* The number of cached tree nodes */
328   __u16 cached_slots;
329   /* The number of valid transactions in journal */
330   __u16 journal_transactions;
331 
332   unsigned int blocks[MAX_HEIGHT];
333   unsigned int next_key_nr[MAX_HEIGHT];
334 };
335 
336 /* The cached s+tree blocks in FSYS_BUF,  see below
337  * for a more detailed description.
338  */
339 #define ROOT     ((char *)FSYS_BUF)
340 #define CACHE(i) (ROOT + ((i) << INFO->fullblocksize_shift))
341 #define LEAF     CACHE (DISK_LEAF_NODE_LEVEL)
342 
343 #define BLOCKHEAD(cache) ((struct block_head *) cache)
344 #define ITEMHEAD         ((struct item_head  *) ((char *) LEAF + BLKH_SIZE))
345 #define KEY(cache)       ((struct key        *) ((char *) cache + BLKH_SIZE))
346 #define DC(cache)        ((struct disk_child *) \
347 			  ((char *) cache + BLKH_SIZE + KEY_SIZE * nr_item))
348 /* The fsys_reiser_info block.
349  */
350 #define INFO \
351     ((struct fsys_reiser_info *) ((char *) FSYS_BUF + FSYSREISER_CACHE_SIZE))
352 /*
353  * The journal cache.  For each transaction it contains the number of
354  * blocks followed by the real block numbers of this transaction.
355  *
356  * If the block numbers of some transaction won't fit in this space,
357  * this list is stopped with a 0xffffffff marker and the remaining
358  * uncommitted transactions aren't cached.
359  */
360 #define JOURNAL_START    ((__u32 *) (INFO + 1))
361 #define JOURNAL_END      ((__u32 *) (FSYS_BUF + FSYS_BUFLEN))
362 
363 static __inline__ int
is_power_of_two(unsigned long word)364 is_power_of_two (unsigned long word)
365 {
366   return (word & -word) == word;
367 }
368 
369 static int
journal_read(int block,int len,char * buffer)370 journal_read (int block, int len, char *buffer)
371 {
372   return devread ((INFO->journal_block + block) << INFO->blocksize_shift,
373 		  0, len, buffer);
374 }
375 
376 /* Read a block from ReiserFS file system, taking the journal into
377  * account.  If the block nr is in the journal, the block from the
378  * journal taken.
379  */
380 static int
block_read(int blockNr,int start,int len,char * buffer)381 block_read (int blockNr, int start, int len, char *buffer)
382 {
383   int transactions = INFO->journal_transactions;
384   int desc_block = INFO->journal_first_desc;
385   int journal_mask = INFO->journal_block_count - 1;
386   int translatedNr = blockNr;
387   __u32 *journal_table = JOURNAL_START;
388   while (transactions-- > 0)
389     {
390       int i = 0;
391       int j_len;
392       if (*journal_table != 0xffffffff)
393 	{
394 	  /* Search for the blockNr in cached journal */
395 	  j_len = *journal_table++;
396 	  while (i++ < j_len)
397 	    {
398 	      if (*journal_table++ == blockNr)
399 		{
400 		  journal_table += j_len - i;
401 		  goto found;
402 		}
403 	    }
404 	}
405       else
406 	{
407 	  /* This is the end of cached journal marker.  The remaining
408 	   * transactions are still on disk.
409 	   */
410 	  struct reiserfs_journal_desc   desc;
411 	  struct reiserfs_journal_commit commit;
412 
413 	  if (! journal_read (desc_block, sizeof (desc), (char *) &desc))
414 	    return 0;
415 
416 	  j_len = desc.j_len;
417 	  while (i < j_len && i < JOURNAL_TRANS_HALF)
418 	    if (desc.j_realblock[i++] == blockNr)
419 	      goto found;
420 
421 	  if (j_len >= JOURNAL_TRANS_HALF)
422 	    {
423 	      int commit_block = (desc_block + 1 + j_len) & journal_mask;
424 	      if (! journal_read (commit_block,
425 				  sizeof (commit), (char *) &commit))
426 		return 0;
427 	      while (i < j_len)
428 		if (commit.j_realblock[i++ - JOURNAL_TRANS_HALF] == blockNr)
429 		  goto found;
430 	    }
431 	}
432       goto not_found;
433 
434     found:
435       translatedNr = INFO->journal_block + ((desc_block + i) & journal_mask);
436 #ifdef REISERDEBUG
437       printf ("block_read: block %d is mapped to journal block %d.\n",
438 	      blockNr, translatedNr - INFO->journal_block);
439 #endif
440       /* We must continue the search, as this block may be overwritten
441        * in later transactions.
442        */
443     not_found:
444       desc_block = (desc_block + 2 + j_len) & journal_mask;
445     }
446   return devread (translatedNr << INFO->blocksize_shift, start, len, buffer);
447 }
448 
449 /* Init the journal data structure.  We try to cache as much as
450  * possible in the JOURNAL_START-JOURNAL_END space, but if it is full
451  * we can still read the rest from the disk on demand.
452  *
453  * The first number of valid transactions and the descriptor block of the
454  * first valid transaction are held in INFO.  The transactions are all
455  * adjacent, but we must take care of the journal wrap around.
456  */
457 static int
journal_init(void)458 journal_init (void)
459 {
460   unsigned int block_count = INFO->journal_block_count;
461   unsigned int desc_block;
462   unsigned int commit_block;
463   unsigned int next_trans_id;
464   struct reiserfs_journal_header header;
465   struct reiserfs_journal_desc   desc;
466   struct reiserfs_journal_commit commit;
467   __u32 *journal_table = JOURNAL_START;
468 
469   journal_read (block_count, sizeof (header), (char *) &header);
470   desc_block = header.j_first_unflushed_offset;
471   if (desc_block >= block_count)
472     return 0;
473 
474   INFO->journal_first_desc = desc_block;
475   next_trans_id = header.j_last_flush_trans_id + 1;
476 
477 #ifdef REISERDEBUG
478   printf ("journal_init: last flushed %d\n",
479 	  header.j_last_flush_trans_id);
480 #endif
481 
482   while (1)
483     {
484       journal_read (desc_block, sizeof (desc), (char *) &desc);
485       if (substring (JOURNAL_DESC_MAGIC, desc.j_magic) > 0
486 	  || desc.j_trans_id != next_trans_id
487 	  || desc.j_mount_id != header.j_mount_id)
488 	/* no more valid transactions */
489 	break;
490 
491       commit_block = (desc_block + desc.j_len + 1) & (block_count - 1);
492       journal_read (commit_block, sizeof (commit), (char *) &commit);
493       if (desc.j_trans_id != commit.j_trans_id
494 	  || desc.j_len != commit.j_len)
495 	/* no more valid transactions */
496 	break;
497 
498 #ifdef REISERDEBUG
499       printf ("Found valid transaction %d/%d at %d.\n",
500 	      desc.j_trans_id, desc.j_mount_id, desc_block);
501 #endif
502 
503       next_trans_id++;
504       if (journal_table < JOURNAL_END)
505 	{
506 	  if ((journal_table + 1 + desc.j_len) >= JOURNAL_END)
507 	    {
508 	      /* The table is almost full; mark the end of the cached
509 	       * journal.*/
510 	      *journal_table = 0xffffffff;
511 	      journal_table = JOURNAL_END;
512 	    }
513 	  else
514 	    {
515 	      int i;
516 	      /* Cache the length and the realblock numbers in the table.
517 	       * The block number of descriptor can easily be computed.
518 	       * and need not to be stored here.
519 	       */
520 	      *journal_table++ = desc.j_len;
521 	      for (i = 0; i < desc.j_len && i < JOURNAL_TRANS_HALF; i++)
522 		{
523 		  *journal_table++ = desc.j_realblock[i];
524 #ifdef REISERDEBUG
525 		  printf ("block %d is in journal %d.\n",
526 			  desc.j_realblock[i], desc_block);
527 #endif
528 		}
529 	      for (     ; i < desc.j_len; i++)
530 		{
531 		  *journal_table++ = commit.j_realblock[i-JOURNAL_TRANS_HALF];
532 #ifdef REISERDEBUG
533 		  printf ("block %d is in journal %d.\n",
534 			  commit.j_realblock[i-JOURNAL_TRANS_HALF],
535 			  desc_block);
536 #endif
537 		}
538 	    }
539 	}
540       desc_block = (commit_block + 1) & (block_count - 1);
541     }
542 #ifdef REISERDEBUG
543   printf ("Transaction %d/%d at %d isn't valid.\n",
544 	  desc.j_trans_id, desc.j_mount_id, desc_block);
545 #endif
546 
547   INFO->journal_transactions
548     = next_trans_id - header.j_last_flush_trans_id - 1;
549   return errnum == 0;
550 }
551 
552 /* check filesystem types and read superblock into memory buffer */
553 int
reiserfs_mount(void)554 reiserfs_mount (void)
555 {
556   struct reiserfs_super_block super;
557   int superblock = REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
558 
559   if (part_length < superblock + (sizeof (super) >> SECTOR_BITS)
560       || ! devread (superblock, 0, sizeof (struct reiserfs_super_block),
561 		(char *) &super)
562       || (substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
563 	  && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
564       || (/* check that this is not a copy inside the journal log */
565 	  super.s_journal_block * super.s_blocksize
566 	  <= REISERFS_DISK_OFFSET_IN_BYTES))
567     {
568       /* Try old super block position */
569       superblock = REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS;
570       if (part_length < superblock + (sizeof (super) >> SECTOR_BITS)
571 	  || ! devread (superblock, 0, sizeof (struct reiserfs_super_block),
572 			(char *) &super))
573 	return 0;
574 
575       if (substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) > 0
576 	  && substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) > 0)
577 	{
578 	  /* pre journaling super block ? */
579 	  if (substring (REISERFS_SUPER_MAGIC_STRING,
580 			 (char*) ((char *) &super + 20)) > 0)
581 	    return 0;
582 
583 	  super.s_blocksize = REISERFS_OLD_BLOCKSIZE;
584 	  super.s_journal_block = 0;
585 	  super.s_version = 0;
586 	}
587     }
588 
589   /* check the version number.  */
590   if (super.s_version > REISERFS_MAX_SUPPORTED_VERSION)
591     return 0;
592 
593   INFO->version = super.s_version;
594   INFO->blocksize = super.s_blocksize;
595   INFO->fullblocksize_shift = log2 (super.s_blocksize);
596   INFO->blocksize_shift = INFO->fullblocksize_shift - SECTOR_BITS;
597   INFO->cached_slots =
598     (FSYSREISER_CACHE_SIZE >> INFO->fullblocksize_shift) - 1;
599 
600 #ifdef REISERDEBUG
601   printf ("reiserfs_mount: version=%d, blocksize=%d\n",
602 	  INFO->version, INFO->blocksize);
603 #endif /* REISERDEBUG */
604 
605   /* Clear node cache. */
606   memset (INFO->blocks, 0, sizeof (INFO->blocks));
607 
608   if (super.s_blocksize < FSYSREISER_MIN_BLOCKSIZE
609       || super.s_blocksize > FSYSREISER_MAX_BLOCKSIZE
610       || (SECTOR_SIZE << INFO->blocksize_shift) != super.s_blocksize)
611     return 0;
612 
613   /* Initialize journal code.  If something fails we end with zero
614    * journal_transactions, so we don't access the journal at all.
615    */
616   INFO->journal_transactions = 0;
617   if (super.s_journal_block != 0 && super.s_journal_dev == 0)
618     {
619       INFO->journal_block = super.s_journal_block;
620       INFO->journal_block_count = super.s_journal_size;
621       if (is_power_of_two (INFO->journal_block_count))
622 	journal_init ();
623 
624       /* Read in super block again, maybe it is in the journal */
625       block_read (superblock >> INFO->blocksize_shift,
626 		  0, sizeof (struct reiserfs_super_block), (char *) &super);
627     }
628 
629   if (! block_read (super.s_root_block, 0, INFO->blocksize, (char*) ROOT))
630     return 0;
631 
632   INFO->tree_depth = BLOCKHEAD (ROOT)->blk_level;
633 
634 #ifdef REISERDEBUG
635   printf ("root read_in: block=%d, depth=%d\n",
636 	  super.s_root_block, INFO->tree_depth);
637 #endif /* REISERDEBUG */
638 
639   if (INFO->tree_depth >= MAX_HEIGHT)
640     return 0;
641   if (INFO->tree_depth == DISK_LEAF_NODE_LEVEL)
642     {
643       /* There is only one node in the whole filesystem,
644        * which is simultanously leaf and root */
645       memcpy (LEAF, ROOT, INFO->blocksize);
646     }
647   return 1;
648 }
649 
650 /***************** TREE ACCESSING METHODS *****************************/
651 
652 /* I assume you are familiar with the ReiserFS tree, if not go to
653  * http://www.namesys.com/content_table.html
654  *
655  * My tree node cache is organized as following
656  *   0   ROOT node
657  *   1   LEAF node  (if the ROOT is also a LEAF it is copied here
658  *   2-n other nodes on current path from bottom to top.
659  *       if there is not enough space in the cache, the top most are
660  *       omitted.
661  *
662  * I have only two methods to find a key in the tree:
663  *   search_stat(dir_id, objectid) searches for the stat entry (always
664  *       the first entry) of an object.
665  *   next_key() gets the next key in tree order.
666  *
667  * This means, that I can only sequential reads of files are
668  * efficient, but this really doesn't hurt for grub.
669  */
670 
671 /* Read in the node at the current path and depth into the node cache.
672  * You must set INFO->blocks[depth] before.
673  */
674 static char *
read_tree_node(unsigned int blockNr,int depth)675 read_tree_node (unsigned int blockNr, int depth)
676 {
677   char* cache = CACHE(depth);
678   int num_cached = INFO->cached_slots;
679   if (depth < num_cached)
680     {
681       /* This is the cached part of the path.  Check if same block is
682        * needed.
683        */
684       if (blockNr == INFO->blocks[depth])
685 	return cache;
686     }
687   else
688     cache = CACHE(num_cached);
689 
690 #ifdef REISERDEBUG
691   printf ("  next read_in: block=%d (depth=%d)\n",
692 	  blockNr, depth);
693 #endif /* REISERDEBUG */
694   if (! block_read (blockNr, 0, INFO->blocksize, cache))
695       return NULL;
696   /* Make sure it has the right node level */
697   if (BLOCKHEAD (cache)->blk_level != depth)
698     {
699       errnum = ERR_FSYS_CORRUPT;
700       return NULL;
701     }
702 
703   INFO->blocks[depth] = blockNr;
704   return cache;
705 }
706 
707 /* Get the next key, i.e. the key following the last retrieved key in
708  * tree order.  INFO->current_ih and
709  * INFO->current_info are adapted accordingly.  */
710 static int
next_key(void)711 next_key (void)
712 {
713   int depth;
714   struct item_head *ih = INFO->current_ih + 1;
715   char *cache;
716 
717 #ifdef REISERDEBUG
718   printf ("next_key:\n  old ih: key %d:%d:%d:%d version:%d\n",
719 	  INFO->current_ih->ih_key.k_dir_id,
720 	  INFO->current_ih->ih_key.k_objectid,
721 	  INFO->current_ih->ih_key.u.v1.k_offset,
722 	  INFO->current_ih->ih_key.u.v1.k_uniqueness,
723 	  INFO->current_ih->ih_version);
724 #endif /* REISERDEBUG */
725 
726   if (ih == &ITEMHEAD[BLOCKHEAD (LEAF)->blk_nr_item])
727     {
728       depth = DISK_LEAF_NODE_LEVEL;
729       /* The last item, was the last in the leaf node.
730        * Read in the next block
731        */
732       do
733 	{
734 	  if (depth == INFO->tree_depth)
735 	    {
736 	      /* There are no more keys at all.
737 	       * Return a dummy item with MAX_KEY */
738 	      ih = (struct item_head *) &BLOCKHEAD (LEAF)->blk_right_delim_key;
739 	      goto found;
740 	    }
741 	  depth++;
742 #ifdef REISERDEBUG
743 	  printf ("  depth=%d, i=%d\n", depth, INFO->next_key_nr[depth]);
744 #endif /* REISERDEBUG */
745 	}
746       while (INFO->next_key_nr[depth] == 0);
747 
748       if (depth == INFO->tree_depth)
749 	cache = ROOT;
750       else if (depth <= INFO->cached_slots)
751 	cache = CACHE (depth);
752       else
753 	{
754 	  cache = read_tree_node (INFO->blocks[depth], depth);
755 	  if (! cache)
756 	    return 0;
757 	}
758 
759       do
760 	{
761 	  int nr_item = BLOCKHEAD (cache)->blk_nr_item;
762 	  int key_nr = INFO->next_key_nr[depth]++;
763 #ifdef REISERDEBUG
764 	  printf ("  depth=%d, i=%d/%d\n", depth, key_nr, nr_item);
765 #endif /* REISERDEBUG */
766 	  if (key_nr == nr_item)
767 	    /* This is the last item in this block, set the next_key_nr to 0 */
768 	    INFO->next_key_nr[depth] = 0;
769 
770 	  cache = read_tree_node (DC (cache)[key_nr].dc_block_number, --depth);
771 	  if (! cache)
772 	    return 0;
773 	}
774       while (depth > DISK_LEAF_NODE_LEVEL);
775 
776       ih = ITEMHEAD;
777     }
778  found:
779   INFO->current_ih   = ih;
780   INFO->current_item = &LEAF[ih->ih_item_location];
781 #ifdef REISERDEBUG
782   printf ("  new ih: key %d:%d:%d:%d version:%d\n",
783 	  INFO->current_ih->ih_key.k_dir_id,
784 	  INFO->current_ih->ih_key.k_objectid,
785 	  INFO->current_ih->ih_key.u.v1.k_offset,
786 	  INFO->current_ih->ih_key.u.v1.k_uniqueness,
787 	  INFO->current_ih->ih_version);
788 #endif /* REISERDEBUG */
789   return 1;
790 }
791 
792 /* preconditions: reiserfs_mount already executed, therefore
793  *   INFO block is valid
794  * returns: 0 if error (errnum is set),
795  *   nonzero iff we were able to find the key successfully.
796  * postconditions: on a nonzero return, the current_ih and
797  *   current_item fields describe the key that equals the
798  *   searched key.  INFO->next_key contains the next key after
799  *   the searched key.
800  * side effects: messes around with the cache.
801  */
802 static int
search_stat(__u32 dir_id,__u32 objectid)803 search_stat (__u32 dir_id, __u32 objectid)
804 {
805   char *cache;
806   int depth;
807   int nr_item;
808   int i;
809   struct item_head *ih;
810 #ifdef REISERDEBUG
811   printf ("search_stat:\n  key %d:%d:0:0\n", dir_id, objectid);
812 #endif /* REISERDEBUG */
813 
814   depth = INFO->tree_depth;
815   cache = ROOT;
816 
817   while (depth > DISK_LEAF_NODE_LEVEL)
818     {
819       struct key *key;
820       nr_item = BLOCKHEAD (cache)->blk_nr_item;
821 
822       key = KEY (cache);
823 
824       for (i = 0; i < nr_item; i++)
825 	{
826 	  if (key->k_dir_id > dir_id
827 	      || (key->k_dir_id == dir_id
828 		  && (key->k_objectid > objectid
829 		      || (key->k_objectid == objectid
830 			  && (key->u.v1.k_offset
831 			      | key->u.v1.k_uniqueness) > 0))))
832 	    break;
833 	  key++;
834 	}
835 
836 #ifdef REISERDEBUG
837       printf ("  depth=%d, i=%d/%d\n", depth, i, nr_item);
838 #endif /* REISERDEBUG */
839       INFO->next_key_nr[depth] = (i == nr_item) ? 0 : i+1;
840       cache = read_tree_node (DC (cache)[i].dc_block_number, --depth);
841       if (! cache)
842 	return 0;
843     }
844 
845   /* cache == LEAF */
846   nr_item = BLOCKHEAD (LEAF)->blk_nr_item;
847   ih = ITEMHEAD;
848   for (i = 0; i < nr_item; i++)
849     {
850       if (ih->ih_key.k_dir_id == dir_id
851 	  && ih->ih_key.k_objectid == objectid
852 	  && ih->ih_key.u.v1.k_offset == 0
853 	  && ih->ih_key.u.v1.k_uniqueness == 0)
854 	{
855 #ifdef REISERDEBUG
856 	  printf ("  depth=%d, i=%d/%d\n", depth, i, nr_item);
857 #endif /* REISERDEBUG */
858 	  INFO->current_ih   = ih;
859 	  INFO->current_item = &LEAF[ih->ih_item_location];
860 	  return 1;
861 	}
862       ih++;
863     }
864   errnum = ERR_FSYS_CORRUPT;
865   return 0;
866 }
867 
868 int
reiserfs_read(char * buf,int len)869 reiserfs_read (char *buf, int len)
870 {
871   unsigned int blocksize;
872   unsigned int offset;
873   unsigned int to_read;
874   char *prev_buf = buf;
875 
876 #ifdef REISERDEBUG
877   printf ("reiserfs_read: filepos=%d len=%d, offset=%x:%x\n",
878 	  filepos, len, (__u64) IH_KEY_OFFSET (INFO->current_ih) - 1);
879 #endif /* REISERDEBUG */
880 
881   if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid
882       || IH_KEY_OFFSET (INFO->current_ih) > filepos + 1)
883     {
884       search_stat (INFO->fileinfo.k_dir_id, INFO->fileinfo.k_objectid);
885       goto get_next_key;
886     }
887 
888   while (! errnum)
889     {
890       if (INFO->current_ih->ih_key.k_objectid != INFO->fileinfo.k_objectid)
891 	break;
892 
893       offset = filepos - IH_KEY_OFFSET (INFO->current_ih) + 1;
894       blocksize = INFO->current_ih->ih_item_len;
895 
896 #ifdef REISERDEBUG
897       printf ("  loop: filepos=%d len=%d, offset=%d blocksize=%d\n",
898 	      filepos, len, offset, blocksize);
899 #endif /* REISERDEBUG */
900 
901       if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_DIRECT)
902 	  && offset < blocksize)
903 	{
904 #ifdef REISERDEBUG
905 	  printf ("direct_read: offset=%d, blocksize=%d\n",
906 		  offset, blocksize);
907 #endif /* REISERDEBUG */
908 	  to_read = blocksize - offset;
909 	  if (to_read > len)
910 	    to_read = len;
911 
912 	  if (disk_read_hook != NULL)
913 	    {
914 	      disk_read_func = disk_read_hook;
915 
916 	      block_read (INFO->blocks[DISK_LEAF_NODE_LEVEL],
917 			  (INFO->current_item - LEAF + offset), to_read, buf);
918 
919 	      disk_read_func = NULL;
920 	    }
921 	  else
922 	    memcpy (buf, INFO->current_item + offset, to_read);
923 	  goto update_buf_len;
924 	}
925       else if (IH_KEY_ISTYPE(INFO->current_ih, TYPE_INDIRECT))
926 	{
927 	  blocksize = (blocksize >> 2) << INFO->fullblocksize_shift;
928 #ifdef REISERDEBUG
929 	  printf ("indirect_read: offset=%d, blocksize=%d\n",
930 		  offset, blocksize);
931 #endif /* REISERDEBUG */
932 
933 	  while (offset < blocksize)
934 	    {
935 	      __u32 blocknr = ((__u32 *) INFO->current_item)
936 		[offset >> INFO->fullblocksize_shift];
937 	      int blk_offset = offset & (INFO->blocksize-1);
938 
939 	      to_read = INFO->blocksize - blk_offset;
940 	      if (to_read > len)
941 		to_read = len;
942 
943 	      disk_read_func = disk_read_hook;
944 
945 	      /* Journal is only for meta data.  Data blocks can be read
946 	       * directly without using block_read
947 	       */
948 	      devread (blocknr << INFO->blocksize_shift,
949 		       blk_offset, to_read, buf);
950 
951 	      disk_read_func = NULL;
952 	    update_buf_len:
953 	      len -= to_read;
954 	      buf += to_read;
955 	      offset += to_read;
956 	      filepos += to_read;
957 	      if (len == 0)
958 		goto done;
959 	    }
960 	}
961     get_next_key:
962       next_key ();
963     }
964  done:
965   return errnum ? 0 : buf - prev_buf;
966 }
967 
968 
969 /* preconditions: reiserfs_mount already executed, therefore
970  *   INFO block is valid
971  * returns: 0 if error, nonzero iff we were able to find the file successfully
972  * postconditions: on a nonzero return, INFO->fileinfo contains the info
973  *   of the file we were trying to look up, filepos is 0 and filemax is
974  *   the size of the file.
975  */
976 int
reiserfs_dir(char * dirname)977 reiserfs_dir (char *dirname)
978 {
979   struct reiserfs_de_head *de_head;
980   char *rest, ch;
981   __u32 dir_id, objectid, parent_dir_id = 0, parent_objectid = 0;
982 #ifndef STAGE1_5
983   int do_possibilities = 0;
984 #endif /* ! STAGE1_5 */
985   char linkbuf[PATH_MAX];	/* buffer for following symbolic links */
986   int link_count = 0;
987   int mode;
988 
989   dir_id = REISERFS_ROOT_PARENT_OBJECTID;
990   objectid = REISERFS_ROOT_OBJECTID;
991 
992   while (1)
993     {
994 #ifdef REISERDEBUG
995       printf ("dirname=%s\n", dirname);
996 #endif /* REISERDEBUG */
997 
998       /* Search for the stat info first. */
999       if (! search_stat (dir_id, objectid))
1000 	return 0;
1001 
1002 #ifdef REISERDEBUG
1003       printf ("sd_mode=%x sd_size=%d\n",
1004 	      ((struct stat_data *) INFO->current_item)->sd_mode,
1005 	      ((struct stat_data *) INFO->current_item)->sd_size);
1006 #endif /* REISERDEBUG */
1007 
1008       mode = ((struct stat_data *) INFO->current_item)->sd_mode;
1009 
1010       /* If we've got a symbolic link, then chase it. */
1011       if (S_ISLNK (mode))
1012 	{
1013 	  int len;
1014 	  if (++link_count > MAX_LINK_COUNT)
1015 	    {
1016 	      errnum = ERR_SYMLINK_LOOP;
1017 	      return 0;
1018 	    }
1019 
1020 	  /* Get the symlink size. */
1021 	  filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1022 
1023 	  /* Find out how long our remaining name is. */
1024 	  len = 0;
1025 	  while (dirname[len] && !isspace (dirname[len]))
1026 	    len++;
1027 
1028 	  if (filemax + len > sizeof (linkbuf) - 1)
1029 	    {
1030 	      errnum = ERR_FILELENGTH;
1031 	      return 0;
1032 	    }
1033 
1034 	  /* Copy the remaining name to the end of the symlink data.
1035 	     Note that DIRNAME and LINKBUF may overlap! */
1036 	  grub_memmove (linkbuf + filemax, dirname, len+1);
1037 
1038 	  INFO->fileinfo.k_dir_id = dir_id;
1039 	  INFO->fileinfo.k_objectid = objectid;
1040   	  filepos = 0;
1041 	  if (! next_key ()
1042 	      || reiserfs_read (linkbuf, filemax) != filemax)
1043 	    {
1044 	      if (! errnum)
1045 		errnum = ERR_FSYS_CORRUPT;
1046 	      return 0;
1047 	    }
1048 
1049 #ifdef REISERDEBUG
1050 	  printf ("symlink=%s\n", linkbuf);
1051 #endif /* REISERDEBUG */
1052 
1053 	  dirname = linkbuf;
1054 	  if (*dirname == '/')
1055 	    {
1056 	      /* It's an absolute link, so look it up in root. */
1057 	      dir_id = REISERFS_ROOT_PARENT_OBJECTID;
1058 	      objectid = REISERFS_ROOT_OBJECTID;
1059 	    }
1060 	  else
1061 	    {
1062 	      /* Relative, so look it up in our parent directory. */
1063 	      dir_id   = parent_dir_id;
1064 	      objectid = parent_objectid;
1065 	    }
1066 
1067 	  /* Now lookup the new name. */
1068 	  continue;
1069 	}
1070 
1071       /* if we have a real file (and we're not just printing possibilities),
1072 	 then this is where we want to exit */
1073 
1074       if (! *dirname || isspace (*dirname))
1075 	{
1076 	  if (! S_ISREG (mode))
1077 	    {
1078 	      errnum = ERR_BAD_FILETYPE;
1079 	      return 0;
1080 	    }
1081 
1082 	  filepos = 0;
1083 	  filemax = ((struct stat_data *) INFO->current_item)->sd_size;
1084 
1085 	  /* If this is a new stat data and size is > 4GB set filemax to
1086 	   * maximum
1087 	   */
1088 	  if (INFO->current_ih->ih_version == ITEM_VERSION_2
1089 	      && ((struct stat_data *) INFO->current_item)->sd_size_hi > 0)
1090 	    filemax = 0xffffffff;
1091 
1092 	  INFO->fileinfo.k_dir_id = dir_id;
1093 	  INFO->fileinfo.k_objectid = objectid;
1094 	  return next_key ();
1095 	}
1096 
1097       /* continue with the file/directory name interpretation */
1098       while (*dirname == '/')
1099 	dirname++;
1100       if (! S_ISDIR (mode))
1101 	{
1102 	  errnum = ERR_BAD_FILETYPE;
1103 	  return 0;
1104 	}
1105       for (rest = dirname; (ch = *rest) && ! isspace (ch) && ch != '/'; rest++);
1106       *rest = 0;
1107 
1108 # ifndef STAGE1_5
1109       if (print_possibilities && ch != '/')
1110 	do_possibilities = 1;
1111 # endif /* ! STAGE1_5 */
1112 
1113       while (1)
1114 	{
1115 	  char *name_end;
1116 	  int num_entries;
1117 
1118 	  if (! next_key ())
1119 	    return 0;
1120 #ifdef REISERDEBUG
1121 	  printf ("ih: key %d:%d:%d:%d version:%d\n",
1122 		  INFO->current_ih->ih_key.k_dir_id,
1123 		  INFO->current_ih->ih_key.k_objectid,
1124 		  INFO->current_ih->ih_key.u.v1.k_offset,
1125 		  INFO->current_ih->ih_key.u.v1.k_uniqueness,
1126 		  INFO->current_ih->ih_version);
1127 #endif /* REISERDEBUG */
1128 
1129 	  if (INFO->current_ih->ih_key.k_objectid != objectid)
1130 	    break;
1131 
1132 	  name_end = INFO->current_item + INFO->current_ih->ih_item_len;
1133 	  de_head = (struct reiserfs_de_head *) INFO->current_item;
1134 	  num_entries = INFO->current_ih->u.ih_entry_count;
1135 	  while (num_entries > 0)
1136 	    {
1137 	      char *filename = INFO->current_item + de_head->deh_location;
1138 	      char  tmp = *name_end;
1139 	      if ((de_head->deh_state & DEH_Visible))
1140 		{
1141 		  int cmp;
1142 		  /* Directory names in ReiserFS are not null
1143 		   * terminated.  We write a temporary 0 behind it.
1144 		   * NOTE: that this may overwrite the first block in
1145 		   * the tree cache.  That doesn't hurt as long as we
1146 		   * don't call next_key () in between.
1147 		   */
1148 		  *name_end = 0;
1149 		  cmp = substring (dirname, filename);
1150 		  *name_end = tmp;
1151 # ifndef STAGE1_5
1152 		  if (do_possibilities)
1153 		    {
1154 		      if (cmp <= 0)
1155 			{
1156 			  if (print_possibilities > 0)
1157 			    print_possibilities = -print_possibilities;
1158 			  *name_end = 0;
1159 			  print_a_completion (filename);
1160 			  *name_end = tmp;
1161 			}
1162 		    }
1163 		  else
1164 # endif /* ! STAGE1_5 */
1165 		    if (cmp == 0)
1166 		      goto found;
1167 		}
1168 	      /* The beginning of this name marks the end of the next name.
1169 	       */
1170 	      name_end = filename;
1171 	      de_head++;
1172 	      num_entries--;
1173 	    }
1174 	}
1175 
1176 # ifndef STAGE1_5
1177       if (print_possibilities < 0)
1178 	return 1;
1179 # endif /* ! STAGE1_5 */
1180 
1181       errnum = ERR_FILE_NOT_FOUND;
1182       *rest = ch;
1183       return 0;
1184 
1185     found:
1186 
1187       *rest = ch;
1188       dirname = rest;
1189 
1190       parent_dir_id = dir_id;
1191       parent_objectid = objectid;
1192       dir_id = de_head->deh_dir_id;
1193       objectid = de_head->deh_objectid;
1194     }
1195 }
1196 
1197 int
reiserfs_embed(int * start_sector,int needed_sectors)1198 reiserfs_embed (int *start_sector, int needed_sectors)
1199 {
1200   struct reiserfs_super_block super;
1201   int num_sectors;
1202 
1203   if (! devread (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS, 0,
1204 		 sizeof (struct reiserfs_super_block), (char *) &super))
1205     return 0;
1206 
1207   *start_sector = 1; /* reserve first sector for stage1 */
1208   if ((substring (REISERFS_SUPER_MAGIC_STRING, super.s_magic) <= 0
1209        || substring (REISER2FS_SUPER_MAGIC_STRING, super.s_magic) <= 0)
1210       && (/* check that this is not a super block copy inside
1211 	   * the journal log */
1212 	  super.s_journal_block * super.s_blocksize
1213 	  > REISERFS_DISK_OFFSET_IN_BYTES))
1214     num_sectors = (REISERFS_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1215   else
1216     num_sectors = (REISERFS_OLD_DISK_OFFSET_IN_BYTES >> SECTOR_BITS) - 1;
1217 
1218   return (needed_sectors <= num_sectors);
1219 }
1220 #endif /* FSYS_REISERFS */
1221