view toys/mke2fs.c @ 105:c5b6fb1fe3a4

Calculate block and inode usage in group table and bitmaps.
author Rob Landley <rob@landley.net>
date Mon, 19 Feb 2007 01:44:28 -0500
parents 1c2f6d2e25de
children aadd28817955
line wrap: on
line source

/* vi: set ts=4:
 *
 * mke2fs.c - Create an ext2 filesystem image.
 *
 * Copyright 2006 Rob Landley <rob@landley.net>
 */

#include "toys.h"

#define TT toy.mke2fs

	// b - block size (1024, 2048, 4096)
	// F - force (run on mounted device or non-block device)
	// i - bytes per inode 
	// N - number of inodes
	// m - reserved blocks percentage
	// n - Don't write
	// q - quiet

	// L - volume label
	// M - last mounted path
	// o - creator os
	
	// j - create journal
	// J - journal options (size=1024-102400 blocks,device=)
	//        device=/dev/blah or LABEL=label UUID=uuid

	// E - extended options (stride=stripe-size blocks)
	// O - none,dir_index,filetype,has_journal,journal_dev,sparse_super

#define INODES_RESERVED 10

// Calculate data blocks plus index blocks needed to hold a file.

static uint32_t count_blocks_used(uint64_t size)
{
	uint32_t dblocks = (uint32_t)((size+(TT.blocksize-1))/TT.blocksize);
	uint32_t idx=TT.blocksize/4, iblocks=0, diblocks=0, tiblocks=0;

	// Account for direct, singly, doubly, and triply indirect index blocks

	if (dblocks > 12) {
		iblocks = ((dblocks-13)/idx)+1;
		if (iblocks > 1) {
			diblocks = ((iblocks-2)/idx)+1;
			if (diblocks > 1)
				tiblocks = ((diblocks-2)/idx)+1;
		}
	}

	return dblocks + iblocks + diblocks + tiblocks;
}

// Calculate the number of blocks used by each inode.  Returns blocks used,
// assigns bytes used to *size.  Writes total block count to TT.treeblocks
// and inode count to TT.treeinodes.

static long check_treesize(struct dirtree *this, off_t *size)
{
	long blocks;

	while (this) {
		*size += sizeof(struct ext2_dentry) + strlen(this->name);

		if (this->child)
			this->st.st_blocks = check_treesize(this->child, &this->st.st_size);
		else if (S_ISREG(this->st.st_mode)) {
			 this->st.st_blocks = count_blocks_used(this->st.st_size);
			 TT.treeblocks += this->st.st_blocks;
		}
		this = this->next;
	}
	TT.treeblocks += blocks = count_blocks_used(*size);
	TT.treeinodes++;

	return blocks;
}

// Use the parent pointer to iterate through the tree non-recursively.
static struct dirtree *treenext(struct dirtree *this)
{
	while (this && !this->next) this = this->parent;
	if (this) this = this->next;

	return this;
}

// To do this right I need to copy the tree and sort it, but here's a really
// ugly n^2 way of dealing with the problem that doesn't scale well to large
// numbers of files but can be done in very little code.

static void check_treelinks(void)
{
	struct dirtree *this, *that;

	for (this = TT.dt; this; this = treenext(this)) {
		// Since we can't hardlink to directories, we know their link count.
		if (S_ISDIR(this->st.st_mode)) this->st.st_nlink = 2;
		else {
			this->st.st_nlink = 0;
			for (that = TT.dt; that; that = treenext(that))
				if (this->st.st_ino == that->st.st_ino)
					if (this->st.st_dev == that->st.st_dev)
						this->st.st_nlink++;
		}
	}
}

// According to http://www.opengroup.org/onlinepubs/9629399/apdxa.htm
// we should generate a uuid structure by reading a clock with 100 nanosecond
// precision, normalizing it to the start of the gregorian calendar in 1582,
// and looking up our eth0 mac address.
//
// On the other hand, we have 128 bits to come up with a unique identifier, of
// which 6 have a defined value.  /dev/urandom it is.

static void create_uuid(char *uuid)
{
	// Read 128 random bits
	int fd = xopen("/dev/urandom", O_RDONLY);
	xreadall(fd, uuid, 16);
	close(fd);

	// Claim to be a DCE format UUID.
	uuid[6] = (uuid[6] & 0x0F) | 0x40;
	uuid[8] = (uuid[8] & 0x3F) | 0x80;

    // rfc2518 section 6.4.1 suggests if we're not using a macaddr, we should
	// set bit 1 of the node ID, which is the mac multicast bit.  This means we
	// should never collide with anybody actually using a macaddr.
	uuid[11] = uuid[11] | 128;
}

// Figure out inodes per group, rounded up to fill complete inode blocks.
static uint32_t get_inodespg(uint32_t inodes)
{
	uint32_t temp;

	temp = (inodes + TT.groups - 1) / TT.groups;
	inodes = TT.blocksize/sizeof(struct ext2_inode);
	return ((temp + inodes - 1)/inodes)*inodes;
}

// Fill out superblock and TT

static void init_superblock(struct ext2_superblock *sb)
{
	uint32_t temp;

	// Set log_block_size and log_frag_size.

	for (temp = 0; temp < 4; temp++) if (TT.blocksize == 1024<<temp) break;
	if (temp==4) error_exit("bad blocksize");
	sb->log_block_size = sb->log_frag_size = SWAP_LE32(temp);

	// Fill out blocks_count, r_blocks_count, first_data_block

	sb->blocks_count = SWAP_LE32(TT.blocks);
	temp = (TT.blocks * (uint64_t)TT.reserved_percent) / 100;
	sb->r_blocks_count = SWAP_LE32(temp);

	sb->first_data_block = SWAP_LE32(TT.blocksize == 1024 ? 1 : 0);

	// Set blocks_per_group and frags_per_group, which is the size of an
	// allocation bitmap that fits in one block (I.E. how many bits per block)?

	sb->blocks_per_group = sb->frags_per_group = SWAP_LE32(TT.blockbits);

	// How many block groups do we need?  (Round up avoiding integer overflow.)

	TT.groups = (TT.blocks)/TT.blockbits;
	if (TT.blocks & (TT.blockbits-1)) TT.groups++;

	// Figure out inodes per group, rounded up to block size.

	TT.inodespg = get_inodespg(TT.inodespg);

	// Set inodes_per_group and total inodes_count
	sb->inodes_per_group = SWAP_LE32(TT.inodespg);
	sb->inodes_count = SWAP_LE32(TT.inodespg * TT.groups);

	// Fill out the rest of the superblock.
	sb->max_mnt_count=0xFFFF;
	sb->wtime = sb->lastcheck = sb->mkfs_time = SWAP_LE32(time(NULL));
	sb->magic = SWAP_LE32(0xEF53);
	sb->state = sb->errors = SWAP_LE16(1);

	sb->rev_level = SWAP_LE32(1);
	sb->first_ino = SWAP_LE32(INODES_RESERVED+1);
	sb->inode_size = SWAP_LE16(sizeof(struct ext2_inode));
	sb->feature_incompat = SWAP_LE32(EXT2_FEATURE_INCOMPAT_FILETYPE);
	sb->feature_ro_compat = SWAP_LE32(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER);

	create_uuid(sb->uuid);

	// TODO If we're called as mke3fs or mkfs.ext3, do a journal.

	//if (strchr(toys.which->name,'3'))
	//	sb->feature_compat |= SWAP_LE32(EXT3_FEATURE_COMPAT_HAS_JOURNAL);
}

// Does this group contain a superblock backup (and group descriptor table)?
static int is_sb_group(uint32_t group)
{
	int i;

	// Superblock backups are on groups 0, 1, and powers of 3, 5, and 7.
	if(!group || group==1) return 1;
	for (i=3; i<9; i+=2) {
		int j = i;
		while (j<group) j*=i;
		if (j==group) return 1;
	}
	return 0;
}

	
// Number of blocks used in this group by superblock/group list backup.
static int group_superblock_used(uint32_t group)
{
	int used;

	if (!is_sb_group(group)) return 0;

	// How blocks does the group table take up?
	used = TT.groups * sizeof(struct ext2_group);
	used += TT.blocksize - 1;
	used /= TT.blocksize;
	// Plus the superblock itself.
	used++;
	// And a corner case.
	if (!group && TT.blocksize == 1024) used++;

	return used;
}

static uint32_t get_all_group_blocks(void)
{
	uint32_t i, blocks, inodeblks;

	inodeblks = get_inodespg(TT.inodespg);
	inodeblks /= TT.blocksize/sizeof(struct ext2_inode);
	for (i = blocks = 0; i<TT.groups; i++)
		blocks += group_superblock_used(i) + 2 + inodeblks;

	return blocks;
}

static void bits_set(char *array, int start, int len)
{
	while(len) {
		if ((start&7) || len<8) {
			array[start/8]|=(1<<(start&7));
			start++;
			len--;
		} else {
			array[start/8]=255;
			start+=8;
			len-=8;
		}
	}
}

// Seek past len bytes (to maintain sparse file), or write zeroes if output
// not seekable
static void put_zeroes(int len)
{
	if(TT.noseek || -1 == lseek(TT.fsfd, len, SEEK_SET)) {

		TT.noseek=1;
		memset(toybuf, 0, sizeof(toybuf));
		while (len) {
			int out = len > sizeof(toybuf) ? sizeof(toybuf) : len;
			xwrite(TT.fsfd, toybuf, out);
			len -= out;
		}
	}
}

static void fill_inode(struct ext2_inode *in, struct dirtree *this)
{
	memset(in,0,sizeof(struct ext2_inode));

	// This works on Linux.  S_ISREG/DIR/CHR/BLK/FIFO/LNK/SOCK(m)
	in->mode = this->st.st_mode;

	in->uid = this->st.st_uid & 0xFFFF;
	in->uid_high = this->st.st_uid >> 16;
	in->gid = this->st.st_gid & 0xFFFF;
	in->gid_high = this->st.st_gid >> 16;
	in->size = this->st.st_size & 0xFFFFFFFF;

	in->atime = this->st.st_atime;
	in->ctime = this->st.st_ctime;
	in->mtime = this->st.st_mtime;

	in->links_count = this->st.st_nlink;
	in->blocks = this->st.st_blocks;
}

int mke2fs_main(void)
{
	int i, temp;
	off_t length;
	uint32_t usedblocks, usedinodes;

	// Handle command line arguments.

	if (toys.optargs[1]) {
		sscanf(toys.optargs[1], "%u", &TT.blocks);
		temp = O_RDWR|O_CREAT;
	} else temp = O_RDWR;
	if (!TT.reserved_percent) TT.reserved_percent = 5;

	// TODO: Check if filesystem is mounted here

	// For mke?fs, open file.  For gene?fs, create file.
	TT.fsfd = xcreate(*toys.optargs, temp, 0777);
	
	// Determine appropriate block size and block count from file length.
	// (If no length, default to 4k.  They can override it on the cmdline.)

	length = fdlength(TT.fsfd);
	if (!TT.blocksize) TT.blocksize = (length && length < 1<<29) ? 1024 : 4096;
	TT.blockbits = 8*TT.blocksize;
	if (!TT.blocks) TT.blocks = length/TT.blocksize;

	// Figure out how many total inodes we need.

	if (!TT.inodespg) {
		if (!TT.bytes_per_inode) TT.bytes_per_inode = 8192;
		TT.inodespg = (TT.blocks * (uint64_t)TT.blocksize) / TT.bytes_per_inode;
	}

	// Collect gene2fs list or lost+found, calculate requirements.

	if (TT.gendir) {
		strncpy(toybuf, TT.gendir, sizeof(toybuf));
		TT.dt = read_dirtree(toybuf, NULL);
	} else {
		TT.dt = xzalloc(sizeof(struct dirtree)+11);
		strcpy(TT.dt->name, "lost+found");
		TT.dt->st.st_mode = S_IFDIR|0755;
		TT.dt->st.st_ctime = TT.dt->st.st_mtime = time(NULL);
	}

	// Figure out how much space is used by preset files
	length = 0;
	length = check_treesize(TT.dt, &length);
	check_treelinks(); // Calculate st_nlink for each node in tree.

	if (TT.gendir && !TT.blocks) {
		// Figure out how many blocks of overhead superblock backups and
		// group descriptor tables impose.  Start with a minimal guess,
		// find the overhead for that many groups, and loop until this
		// is enough groups to store this many blocks.
		TT.groups = (TT.treeblocks/TT.blockbits)+1;
		for (;;) {
			TT.blocks = TT.treeblocks + get_all_group_blocks();
			if (TT.blocks <= TT.groups * TT.blockbits) break;
			TT.groups++;
		}
	}

	// TT.blocks is now big enough to initialize superblock structure

	init_superblock(&TT.sb);
	temp = get_all_group_blocks();
   	if (TT.blocks < TT.treeblocks + temp) error_exit("Not enough space.\n");
	TT.sb.free_blocks_count = SWAP_LE32(TT.blocks - TT.treeblocks - temp);

	temp = TT.inodespg*TT.groups - INODES_RESERVED;
	if (temp < TT.treeinodes) error_exit("Not enough inodes.\n");
	TT.sb.free_inodes_count = SWAP_LE32(temp - TT.treeinodes);

	// Skip the first 1k to avoid the boot sector (if any)
	put_zeroes(1024);

	// Loop through block groups, write out each one.
	usedblocks = 0;
	usedinodes = 0;
	for (i=0; i<TT.groups; i++) {
		struct ext2_inode *in = (struct ext2_inode *)toybuf;
		uint32_t start, itable, used, end;
		int j, slot;

		// Where does this group end?
		end = TT.blockbits;
		if ((i+1)*TT.blockbits > TT.blocks) end = TT.blocks & (TT.blockbits-1);

		// Blocks used by inode table
		itable = (TT.inodespg*sizeof(struct ext2_inode))/TT.blocksize;

		// If a superblock goes here, write it out.
		start = group_superblock_used(i);
		if (start) {
			struct ext2_group *bg = (struct ext2_group *)toybuf;
			int treeblocks = TT.treeblocks, treeinodes = TT.treeinodes;

			TT.sb.block_group_nr = SWAP_LE16(i);

			// Write superblock and pad it up to block size
			xwrite(TT.fsfd, &TT.sb, sizeof(struct ext2_superblock));
			temp = TT.blocksize - sizeof(struct ext2_superblock);
			if (!i && TT.blocksize > 1024) temp -= 1024;
			memset(toybuf, 0, TT.blocksize);
			xwrite(TT.fsfd, toybuf, temp);

			// Loop through groups to write group descriptor table.
			for(j=0; j<TT.groups; j++) {

				// Figure out what sector this group starts in.
				used = group_superblock_used(j);

				// Find next array slot in this block (flush block if full).
				slot = j % (TT.blocksize/sizeof(struct ext2_group));
				if (!slot) {
					if (j) xwrite(TT.fsfd, bg, TT.blocksize);
					memset(bg, 0, TT.blocksize);
				}

				// How many free inodes in this group?
				temp = TT.inodespg;
				if (!i) temp -= INODES_RESERVED;
				if (temp > treeinodes) {
					treeinodes -= temp;
					temp = 0;
				} else {
					temp -= treeinodes;
					treeinodes = 0;
				}
				bg[slot].free_inodes_count = SWAP_LE16(temp);

				// How many free blocks in this group?
				temp = TT.inodespg/(TT.blocksize/sizeof(struct ext2_inode)) + 2;
				temp = end-used-temp;
				if (temp > treeblocks) {
					treeblocks -= temp;
					temp = 0;
				} else {
					temp -= treeblocks;
					treeblocks = 0;
				}
				bg[slot].free_blocks_count = SWAP_LE32(temp);

				// Fill out rest of group structure
				used += j*TT.blockbits;
				bg[slot].block_bitmap = SWAP_LE32(used++);
				bg[slot].inode_bitmap = SWAP_LE32(used++);
				bg[slot].inode_table = SWAP_LE32(used);
				bg[slot].used_dirs_count = 0;  // (TODO)
			}
			xwrite(TT.fsfd, bg, TT.blocksize);
		}

		// Now write out stuff that every block group has.

		// Write block usage bitmap

		start += 2 + itable;
		memset(toybuf, 0, TT.blocksize);
		bits_set(toybuf, 0, start);
		bits_set(toybuf, end, TT.blockbits-end);
		temp = TT.treeblocks - usedblocks;
		if (temp) {
			if (end-start > temp) temp = end-start;
			bits_set(toybuf, start, temp);
		}
		xwrite(TT.fsfd, toybuf, TT.blocksize);

		// Write inode bitmap
		memset(toybuf, 0, TT.blocksize);
		j = 0;
		if (!i) bits_set(toybuf, 0, j = INODES_RESERVED);
		bits_set(toybuf, TT.inodespg, slot = TT.blockbits-TT.inodespg);
		temp = TT.treeinodes - usedinodes;
		if (temp) {
			if (slot-j > temp) temp = slot-j;
			bits_set(toybuf, j, temp);
		}
		xwrite(TT.fsfd, toybuf, TT.blocksize);

		// Write inode table for this group (TODO)
		for (j = 0; j<TT.inodespg; j++) {
			slot = j % (TT.blocksize/sizeof(struct ext2_inode));
			if (!slot) {
				if (j) xwrite(TT.fsfd, in, TT.blocksize);
				memset(in, 0, TT.blocksize);
			}
		}
		xwrite(TT.fsfd, in, TT.blocksize);

		// Write empty data blocks
		put_zeroes((end-start) * TT.blocksize);
	}

	return 0;
}