/*
 * readahead.c -- Prefetch filesystem metadata to speed up fsck.
 *
 * Copyright (C) 2014 Oracle.
 *
 * %Begin-Header%
 * This file may be redistributed under the terms of the GNU Library
 * General Public License, version 2.
 * %End-Header%
 */

#include "config.h"
#include <string.h>

#include "e2fsck.h"

#undef DEBUG

#ifdef DEBUG
# define dbg_printf(f, a...)  do {printf(f, ## a); fflush(stdout); } while (0)
#else
# define dbg_printf(f, a...)
#endif

struct read_dblist {
	errcode_t err;
	blk64_t run_start;
	blk64_t run_len;
	int flags;
};

static int readahead_dir_block(ext2_filsys fs, struct ext2_db_entry2 *db,
			       void *priv_data)
{
	struct read_dblist *pr = priv_data;
	e2_blkcnt_t count = (pr->flags & E2FSCK_RA_DBLIST_IGNORE_BLOCKCNT ?
			     1 : db->blockcnt);

	if (!pr->run_len || db->blk != pr->run_start + pr->run_len) {
		if (pr->run_len) {
			pr->err = io_channel_cache_readahead(fs->io,
							     pr->run_start,
							     pr->run_len);
			dbg_printf("readahead start=%llu len=%llu err=%d\n",
				   pr->run_start, pr->run_len,
				   (int)pr->err);
		}
		pr->run_start = db->blk;
		pr->run_len = 0;
	}
	pr->run_len += count;

	return pr->err ? DBLIST_ABORT : 0;
}

errcode_t e2fsck_readahead_dblist(ext2_filsys fs, int flags,
				  ext2_dblist dblist,
				  unsigned long long start,
				  unsigned long long count)
{
	errcode_t err;
	struct read_dblist pr;

	dbg_printf("%s: flags=0x%x\n", __func__, flags);
	if (flags & ~E2FSCK_RA_DBLIST_ALL_FLAGS)
		return EXT2_ET_INVALID_ARGUMENT;

	memset(&pr, 0, sizeof(pr));
	pr.flags = flags;
	err = ext2fs_dblist_iterate3(dblist, readahead_dir_block, start,
				     count, &pr);
	if (pr.err)
		return pr.err;
	if (err)
		return err;

	if (pr.run_len)
		err = io_channel_cache_readahead(fs->io, pr.run_start,
						 pr.run_len);

	return err;
}

static errcode_t e2fsck_readahead_bitmap(ext2_filsys fs,
					 ext2fs_block_bitmap ra_map)
{
	blk64_t start, end, out;
	errcode_t err;

	start = 1;
	end = ext2fs_blocks_count(fs->super) - 1;

	err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end, &out);
	while (err == 0) {
		start = out;
		err = ext2fs_find_first_zero_block_bitmap2(ra_map, start, end,
							   &out);
		if (err == ENOENT) {
			out = end;
			err = 0;
		} else if (err)
			break;

		err = io_channel_cache_readahead(fs->io, start, out - start);
		if (err)
			break;
		start = out;
		err = ext2fs_find_first_set_block_bitmap2(ra_map, start, end,
							  &out);
	}

	if (err == ENOENT)
		err = 0;

	return err;
}

/* Try not to spew bitmap range errors for readahead */
static errcode_t mark_bmap_range(ext2fs_block_bitmap map,
				 blk64_t blk, unsigned int num)
{
	if (blk >= ext2fs_get_generic_bmap_start(map) &&
	    blk + num <= ext2fs_get_generic_bmap_end(map))
		ext2fs_mark_block_bitmap_range2(map, blk, num);
	else
		return EXT2_ET_INVALID_ARGUMENT;
	return 0;
}

static errcode_t mark_bmap(ext2fs_block_bitmap map, blk64_t blk)
{
	if (blk >= ext2fs_get_generic_bmap_start(map) &&
	    blk <= ext2fs_get_generic_bmap_end(map))
		ext2fs_mark_block_bitmap2(map, blk);
	else
		return EXT2_ET_INVALID_ARGUMENT;
	return 0;
}

errcode_t e2fsck_readahead(ext2_filsys fs, int flags, dgrp_t start,
			   dgrp_t ngroups)
{
	blk64_t		super, old_gdt, new_gdt;
	blk_t		blocks;
	dgrp_t		i;
	ext2fs_block_bitmap		ra_map = NULL;
	dgrp_t		end = start + ngroups;
	errcode_t	err = 0;

	dbg_printf("%s: flags=0x%x start=%d groups=%d\n", __func__, flags,
		   start, ngroups);
	if (flags & ~E2FSCK_READA_ALL_FLAGS)
		return EXT2_ET_INVALID_ARGUMENT;

	if (end > fs->group_desc_count)
		end = fs->group_desc_count;

	if (flags == 0)
		return 0;

	err = ext2fs_allocate_block_bitmap(fs, "readahead bitmap",
					   &ra_map);
	if (err)
		return err;

	for (i = start; i < end; i++) {
		err = ext2fs_super_and_bgd_loc2(fs, i, &super, &old_gdt,
						&new_gdt, &blocks);
		if (err)
			break;

		if (flags & E2FSCK_READA_SUPER) {
			err = mark_bmap(ra_map, super);
			if (err)
				break;
		}

		if (flags & E2FSCK_READA_GDT) {
			err = mark_bmap_range(ra_map,
					      old_gdt ? old_gdt : new_gdt,
					      blocks);
			if (err)
				break;
		}

		if ((flags & E2FSCK_READA_BBITMAP) &&
		    !ext2fs_bg_flags_test(fs, i, EXT2_BG_BLOCK_UNINIT) &&
		    ext2fs_bg_free_blocks_count(fs, i) <
				fs->super->s_blocks_per_group) {
			super = ext2fs_block_bitmap_loc(fs, i);
			err = mark_bmap(ra_map, super);
			if (err)
				break;
		}

		if ((flags & E2FSCK_READA_IBITMAP) &&
		    !ext2fs_bg_flags_test(fs, i, EXT2_BG_INODE_UNINIT) &&
		    ext2fs_bg_free_inodes_count(fs, i) <
				fs->super->s_inodes_per_group) {
			super = ext2fs_inode_bitmap_loc(fs, i);
			err = mark_bmap(ra_map, super);
			if (err)
				break;
		}

		if ((flags & E2FSCK_READA_ITABLE) &&
		    ext2fs_bg_free_inodes_count(fs, i) <
				fs->super->s_inodes_per_group) {
			super = ext2fs_inode_table_loc(fs, i);
			blocks = fs->inode_blocks_per_group -
				 (ext2fs_bg_itable_unused(fs, i) *
				  EXT2_INODE_SIZE(fs->super) / fs->blocksize);
			err = mark_bmap_range(ra_map, super, blocks);
			if (err)
				break;
		}
	}

	if (!err)
		err = e2fsck_readahead_bitmap(fs, ra_map);

	ext2fs_free_block_bitmap(ra_map);
	return err;
}

int e2fsck_can_readahead(ext2_filsys fs)
{
	errcode_t err;

	err = io_channel_cache_readahead(fs->io, 0, 1);
	dbg_printf("%s: supp=%d\n", __func__, err != EXT2_ET_OP_NOT_SUPPORTED);
	return err != EXT2_ET_OP_NOT_SUPPORTED;
}

unsigned long long e2fsck_guess_readahead(ext2_filsys fs)
{
	unsigned long long guess;

	/*
	 * The optimal readahead sizes were experimentally determined by
	 * djwong in August 2014.  Setting the RA size to two block groups'
	 * worth of inode table blocks seems to yield the largest reductions
	 * in e2fsck runtime.
	 */
	guess = 2ULL * fs->blocksize * fs->inode_blocks_per_group;

	/* Disable RA if it'd use more 1/50th of RAM. */
	if (get_memory_size() > (guess * 50))
		return guess / 1024;

	return 0;
}