/* * mkjournal.c --- make a journal for a filesystem * * Copyright (C) 2000 Theodore Ts'o. * * %Begin-Header% * This file may be redistributed under the terms of the GNU Library * General Public License, version 2. * %End-Header% */ #include "config.h" #include <stdio.h> #include <string.h> #if HAVE_UNISTD_H #include <unistd.h> #endif #if HAVE_ERRNO_H #include <errno.h> #endif #include <fcntl.h> #include <time.h> #if HAVE_SYS_STAT_H #include <sys/stat.h> #endif #if HAVE_SYS_TYPES_H #include <sys/types.h> #endif #if HAVE_SYS_IOCTL_H #include <sys/ioctl.h> #endif #if HAVE_NETINET_IN_H #include <netinet/in.h> #endif #include "ext2_fs.h" #include "e2p/e2p.h" #include "ext2fs.h" #include "kernel-jbd.h" /* * This function automatically sets up the journal superblock and * returns it as an allocated block. */ errcode_t ext2fs_create_journal_superblock(ext2_filsys fs, __u32 num_blocks, int flags, char **ret_jsb) { errcode_t retval; journal_superblock_t *jsb; if (num_blocks < JFS_MIN_JOURNAL_BLOCKS) return EXT2_ET_JOURNAL_TOO_SMALL; if ((retval = ext2fs_get_mem(fs->blocksize, &jsb))) return retval; memset (jsb, 0, fs->blocksize); jsb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER); if (flags & EXT2_MKJOURNAL_V1_SUPER) jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V1); else jsb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2); jsb->s_blocksize = htonl(fs->blocksize); jsb->s_maxlen = htonl(num_blocks); jsb->s_nr_users = htonl(1); jsb->s_first = htonl(1); jsb->s_sequence = htonl(1); memcpy(jsb->s_uuid, fs->super->s_uuid, sizeof(fs->super->s_uuid)); /* * If we're creating an external journal device, we need to * adjust these fields. */ if (ext2fs_has_feature_journal_dev(fs->super)) { jsb->s_nr_users = 0; jsb->s_first = htonl(ext2fs_journal_sb_start(fs->blocksize) + 1); } *ret_jsb = (char *) jsb; return 0; } /* * This function writes a journal using POSIX routines. It is used * for creating external journals and creating journals on live * filesystems. */ static errcode_t write_journal_file(ext2_filsys fs, char *filename, blk_t num_blocks, int flags) { errcode_t retval; char *buf = 0; int fd, ret_size; blk_t i; if ((retval = ext2fs_create_journal_superblock(fs, num_blocks, flags, &buf))) return retval; /* Open the device or journal file */ if ((fd = open(filename, O_WRONLY)) < 0) { retval = errno; goto errfree; } /* Write the superblock out */ retval = EXT2_ET_SHORT_WRITE; ret_size = write(fd, buf, fs->blocksize); if (ret_size < 0) { retval = errno; goto errout; } if (ret_size != (int) fs->blocksize) goto errout; memset(buf, 0, fs->blocksize); if (flags & EXT2_MKJOURNAL_LAZYINIT) goto success; for (i = 1; i < num_blocks; i++) { ret_size = write(fd, buf, fs->blocksize); if (ret_size < 0) { retval = errno; goto errout; } if (ret_size != (int) fs->blocksize) goto errout; } success: retval = 0; errout: close(fd); errfree: ext2fs_free_mem(&buf); return retval; } /* * Convenience function which zeros out _num_ blocks starting at * _blk_. In case of an error, the details of the error is returned * via _ret_blk_ and _ret_count_ if they are non-NULL pointers. * Returns 0 on success, and an error code on an error. * * As a special case, if the first argument is NULL, then it will * attempt to free the static zeroizing buffer. (This is to keep * programs that check for memory leaks happy.) */ #define MAX_STRIDE_LENGTH (4194304 / (int) fs->blocksize) errcode_t ext2fs_zero_blocks2(ext2_filsys fs, blk64_t blk, int num, blk64_t *ret_blk, int *ret_count) { int j, count; static void *buf; static int stride_length; errcode_t retval; /* If fs is null, clean up the static buffer and return */ if (!fs) { if (buf) { free(buf); buf = 0; stride_length = 0; } return 0; } /* Deal with zeroing less than 1 block */ if (num <= 0) return 0; /* Try a zero out command, if supported */ retval = io_channel_zeroout(fs->io, blk, num); if (retval == 0) return 0; /* Allocate the zeroizing buffer if necessary */ if (num > stride_length && stride_length < MAX_STRIDE_LENGTH) { void *p; int new_stride = num; if (new_stride > MAX_STRIDE_LENGTH) new_stride = MAX_STRIDE_LENGTH; p = realloc(buf, fs->blocksize * new_stride); if (!p) return EXT2_ET_NO_MEMORY; buf = p; stride_length = new_stride; memset(buf, 0, fs->blocksize * stride_length); } /* OK, do the write loop */ j=0; while (j < num) { if (blk % stride_length) { count = stride_length - (blk % stride_length); if (count > (num - j)) count = num - j; } else { count = num - j; if (count > stride_length) count = stride_length; } retval = io_channel_write_blk64(fs->io, blk, count, buf); if (retval) { if (ret_count) *ret_count = count; if (ret_blk) *ret_blk = blk; return retval; } j += count; blk += count; } return 0; } errcode_t ext2fs_zero_blocks(ext2_filsys fs, blk_t blk, int num, blk_t *ret_blk, int *ret_count) { blk64_t ret_blk2; errcode_t retval; retval = ext2fs_zero_blocks2(fs, blk, num, &ret_blk2, ret_count); if (retval) *ret_blk = (blk_t) ret_blk2; return retval; } /* * Calculate the initial goal block to be roughly at the middle of the * filesystem. Pick a group that has the largest number of free * blocks. */ static blk64_t get_midpoint_journal_block(ext2_filsys fs) { dgrp_t group, start, end, i, log_flex; group = ext2fs_group_of_blk2(fs, (ext2fs_blocks_count(fs->super) - fs->super->s_first_data_block) / 2); log_flex = 1 << fs->super->s_log_groups_per_flex; if (fs->super->s_log_groups_per_flex && (group > log_flex)) { group = group & ~(log_flex - 1); while ((group < fs->group_desc_count) && ext2fs_bg_free_blocks_count(fs, group) == 0) group++; if (group == fs->group_desc_count) group = 0; start = group; } else start = (group > 0) ? group-1 : group; end = ((group+1) < fs->group_desc_count) ? group+1 : group; group = start; for (i = start + 1; i <= end; i++) if (ext2fs_bg_free_blocks_count(fs, i) > ext2fs_bg_free_blocks_count(fs, group)) group = i; return ext2fs_group_first_block2(fs, group); } /* * This function creates a journal using direct I/O routines. */ static errcode_t write_journal_inode(ext2_filsys fs, ext2_ino_t journal_ino, blk_t num_blocks, blk64_t goal, int flags) { char *buf; errcode_t retval; struct ext2_inode inode; unsigned long long inode_size; int falloc_flags = EXT2_FALLOCATE_FORCE_INIT; blk64_t zblk; if ((retval = ext2fs_create_journal_superblock(fs, num_blocks, flags, &buf))) return retval; if ((retval = ext2fs_read_bitmaps(fs))) goto out2; if ((retval = ext2fs_read_inode(fs, journal_ino, &inode))) goto out2; if (inode.i_blocks > 0) { retval = EEXIST; goto out2; } if (goal == ~0ULL) goal = get_midpoint_journal_block(fs); if (ext2fs_has_feature_extents(fs->super)) inode.i_flags |= EXT4_EXTENTS_FL; if (!(flags & EXT2_MKJOURNAL_LAZYINIT)) falloc_flags |= EXT2_FALLOCATE_ZERO_BLOCKS; inode_size = (unsigned long long)fs->blocksize * num_blocks; inode.i_mtime = inode.i_ctime = fs->now ? fs->now : time(0); inode.i_links_count = 1; inode.i_mode = LINUX_S_IFREG | 0600; retval = ext2fs_inode_size_set(fs, &inode, inode_size); if (retval) goto out2; retval = ext2fs_fallocate(fs, falloc_flags, journal_ino, &inode, goal, 0, num_blocks); if (retval) goto out2; if ((retval = ext2fs_write_new_inode(fs, journal_ino, &inode))) goto out2; retval = ext2fs_bmap2(fs, journal_ino, &inode, NULL, 0, 0, NULL, &zblk); if (retval) goto out2; retval = io_channel_write_blk64(fs->io, zblk, 1, buf); if (retval) goto out2; memcpy(fs->super->s_jnl_blocks, inode.i_block, EXT2_N_BLOCKS*4); fs->super->s_jnl_blocks[15] = inode.i_size_high; fs->super->s_jnl_blocks[16] = inode.i_size; fs->super->s_jnl_backup_type = EXT3_JNL_BACKUP_BLOCKS; ext2fs_mark_super_dirty(fs); out2: ext2fs_free_mem(&buf); return retval; } /* * Find a reasonable journal file size (in blocks) given the number of blocks * in the filesystem. For very small filesystems, it is not reasonable to * have a journal that fills more than half of the filesystem. * * n.b. comments assume 4k blocks */ int ext2fs_default_journal_size(__u64 num_blocks) { if (num_blocks < 2048) return -1; if (num_blocks < 32768) /* 128 MB */ return (1024); /* 4 MB */ if (num_blocks < 256*1024) /* 1 GB */ return (4096); /* 16 MB */ if (num_blocks < 512*1024) /* 2 GB */ return (8192); /* 32 MB */ if (num_blocks < 4096*1024) /* 16 GB */ return (16384); /* 64 MB */ if (num_blocks < 8192*1024) /* 32 GB */ return (32768); /* 128 MB */ if (num_blocks < 16384*1024) /* 64 GB */ return (65536); /* 256 MB */ if (num_blocks < 32768*1024) /* 128 GB */ return (131072); /* 512 MB */ return 262144; /* 1 GB */ } int ext2fs_journal_sb_start(int blocksize) { if (blocksize == EXT2_MIN_BLOCK_SIZE) return 2; return 1; } /* * This function adds a journal device to a filesystem */ errcode_t ext2fs_add_journal_device(ext2_filsys fs, ext2_filsys journal_dev) { struct stat st; errcode_t retval; char buf[SUPERBLOCK_SIZE]; journal_superblock_t *jsb; int start; __u32 i, nr_users; /* Make sure the device exists and is a block device */ if (stat(journal_dev->device_name, &st) < 0) return errno; if (!S_ISBLK(st.st_mode)) return EXT2_ET_JOURNAL_NOT_BLOCK; /* Must be a block device */ /* Get the journal superblock */ start = ext2fs_journal_sb_start(journal_dev->blocksize); if ((retval = io_channel_read_blk64(journal_dev->io, start, -SUPERBLOCK_SIZE, buf))) return retval; jsb = (journal_superblock_t *) buf; if ((jsb->s_header.h_magic != (unsigned) ntohl(JFS_MAGIC_NUMBER)) || (jsb->s_header.h_blocktype != (unsigned) ntohl(JFS_SUPERBLOCK_V2))) return EXT2_ET_NO_JOURNAL_SB; if (ntohl(jsb->s_blocksize) != (unsigned long) fs->blocksize) return EXT2_ET_UNEXPECTED_BLOCK_SIZE; /* Check and see if this filesystem has already been added */ nr_users = ntohl(jsb->s_nr_users); for (i=0; i < nr_users; i++) { if (memcmp(fs->super->s_uuid, &jsb->s_users[i*16], 16) == 0) break; } if (i >= nr_users) { memcpy(&jsb->s_users[nr_users*16], fs->super->s_uuid, 16); jsb->s_nr_users = htonl(nr_users+1); } /* Writeback the journal superblock */ if ((retval = io_channel_write_blk64(journal_dev->io, start, -SUPERBLOCK_SIZE, buf))) return retval; fs->super->s_journal_inum = 0; fs->super->s_journal_dev = st.st_rdev; memcpy(fs->super->s_journal_uuid, jsb->s_uuid, sizeof(fs->super->s_journal_uuid)); memset(fs->super->s_jnl_blocks, 0, sizeof(fs->super->s_jnl_blocks)); ext2fs_set_feature_journal(fs->super); ext2fs_mark_super_dirty(fs); return 0; } /* * This function adds a journal inode to a filesystem, using either * POSIX routines if the filesystem is mounted, or using direct I/O * functions if it is not. */ errcode_t ext2fs_add_journal_inode2(ext2_filsys fs, blk_t num_blocks, blk64_t goal, int flags) { errcode_t retval; ext2_ino_t journal_ino; struct stat st; char jfile[1024]; int mount_flags; int fd = -1; if (flags & EXT2_MKJOURNAL_NO_MNT_CHECK) mount_flags = 0; else if ((retval = ext2fs_check_mount_point(fs->device_name, &mount_flags, jfile, sizeof(jfile)-10))) return retval; if (mount_flags & EXT2_MF_MOUNTED) { #if HAVE_EXT2_IOCTLS int f = 0; #endif strcat(jfile, "/.journal"); /* * If .../.journal already exists, make sure any * immutable or append-only flags are cleared. */ #if defined(HAVE_CHFLAGS) && defined(UF_NODUMP) (void) chflags (jfile, 0); #else #if HAVE_EXT2_IOCTLS fd = open(jfile, O_RDONLY); if (fd >= 0) { retval = ioctl(fd, EXT2_IOC_SETFLAGS, &f); close(fd); if (retval) return retval; } #endif #endif /* Create the journal file */ if ((fd = open(jfile, O_CREAT|O_WRONLY, 0600)) < 0) return errno; /* Note that we can't do lazy journal initialization for mounted * filesystems, since the zero writing is also allocating the * journal blocks. We could use fallocate, but not all kernels * support that, and creating a journal on a mounted ext2 * filesystems is extremely rare these days... Ignore it. */ flags &= ~EXT2_MKJOURNAL_LAZYINIT; if ((retval = write_journal_file(fs, jfile, num_blocks, flags))) goto errout; /* Get inode number of the journal file */ if (fstat(fd, &st) < 0) { retval = errno; goto errout; } #if defined(HAVE_CHFLAGS) && defined(UF_NODUMP) retval = fchflags (fd, UF_NODUMP|UF_IMMUTABLE); #else #if HAVE_EXT2_IOCTLS if (ioctl(fd, EXT2_IOC_GETFLAGS, &f) < 0) { retval = errno; goto errout; } f |= EXT2_NODUMP_FL | EXT2_IMMUTABLE_FL; retval = ioctl(fd, EXT2_IOC_SETFLAGS, &f); #endif #endif if (retval) { retval = errno; goto errout; } if (close(fd) < 0) { retval = errno; fd = -1; goto errout; } journal_ino = st.st_ino; memset(fs->super->s_jnl_blocks, 0, sizeof(fs->super->s_jnl_blocks)); } else { if ((mount_flags & EXT2_MF_BUSY) && !(fs->flags & EXT2_FLAG_EXCLUSIVE)) { retval = EBUSY; goto errout; } journal_ino = EXT2_JOURNAL_INO; if ((retval = write_journal_inode(fs, journal_ino, num_blocks, goal, flags))) return retval; } fs->super->s_journal_inum = journal_ino; fs->super->s_journal_dev = 0; memset(fs->super->s_journal_uuid, 0, sizeof(fs->super->s_journal_uuid)); ext2fs_set_feature_journal(fs->super); ext2fs_mark_super_dirty(fs); return 0; errout: if (fd >= 0) close(fd); return retval; } errcode_t ext2fs_add_journal_inode(ext2_filsys fs, blk_t num_blocks, int flags) { return ext2fs_add_journal_inode2(fs, num_blocks, ~0ULL, flags); } #ifdef DEBUG main(int argc, char **argv) { errcode_t retval; char *device_name; ext2_filsys fs; if (argc < 2) { fprintf(stderr, "Usage: %s filesystem\n", argv[0]); exit(1); } device_name = argv[1]; retval = ext2fs_open (device_name, EXT2_FLAG_RW, 0, 0, unix_io_manager, &fs); if (retval) { com_err(argv[0], retval, "while opening %s", device_name); exit(1); } retval = ext2fs_add_journal_inode(fs, JFS_MIN_JOURNAL_BLOCKS, 0); if (retval) { com_err(argv[0], retval, "while adding journal to %s", device_name); exit(1); } retval = ext2fs_flush(fs); if (retval) { printf("Warning, had trouble writing out superblocks.\n"); } ext2fs_close_free(&fs); exit(0); } #endif