C++程序  |  544行  |  14.07 KB

/*
 * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it would be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Further, this software is distributed without any warranty that it is
 * free of the rightful claim of any third person regarding infringement
 * or the like.  Any license provided herein, whether implied or
 * otherwise, applies only to this software file.  Patent licenses, if
 * any, provided herein do not apply to combinations of this program with
 * other software, or any other product whatsoever.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
 * Mountain View, CA  94043, or:
 *
 * http://www.sgi.com
 *
 * For further information regarding this notice, see:
 *
 * http://oss.sgi.com/projects/GenInfo/NoticeExplan/
 *
 */
/* $Id: rand_lines.c,v 1.5 2002/09/16 15:02:57 nstraz Exp $ */
/**************************************************************
 *
 *    OS Testing - Silicon Graphics, Inc.
 *
 *    TOOL IDENTIFIER   : rand_lines
 *
 *    DESCRIPTION       : prints lines from a file in random order
 *
 *    SYNOPSIS:
 *      rand_line [-hg][-S seed][-l numlines] [files...]
 *
 *    AUTHOR            : Richard Logan
 *
 *    CO-PILOT(s)       :
 *
 *    DATE STARTED      : 05/94
 *
 *    INPUT SPECIFICATIONS
 *     This tool will print lines of a file in random order.
 *     The max line length is 4096.
 *     The options supported are:
 *       -h     This option prints an help message then exits.
 *
 *       -g     This option specifies to count the number of lines
 *		in the file before randomizing.  This option overrides
 *		-l option.  Using this option, will give you the best
 *		randomization, but it requires processing
 *		the file an additional time.
 *
 *       -l numlines : This option specifies to randomize file in
 *		numlines chucks.  The default size is 4096.
 *
 *       -S seed     : sets randomization seed to seed.
 *		The default is time(0).  If seed is zero, time(0) is used.
 *
 *	 file   A readable, seekable filename.  The cmd allows the user
 *	 	to specify multiple files, but each file is dealt with
 *		separately.
 *
 *    DESIGN DESCRIPTION
 *	This tool uses a simple algorithm where the file is read.
 *	The offset to the each line is randomly placed into an
 *	array.  The array is then processed sequentially.  The infile's
 *	line who's offset in the array element is thus reread then printed.
 *	This output will thus be infile's lines in random order.
 *
 *    SPECIAL REQUIREMENTS
 *	None.
 *
 *    UPDATE HISTORY
 *      This should contain the description, author, and date of any
 *      "interesting" modifications (i.e. info should helpful in
 *      maintaining/enhancing this tool).
 *      username     description
 *      ----------------------------------------------------------------
 *	rrl 	    Creatation of program
 *	rrl  06/02  Fixed bug and some cleanup. Changed default chunk
 *	            and line size to 4096 characters.
 *
 *    BUGS/LIMITATIONS
 *	This program can not deal with non-seekable file like
 *	stdin or a pipe.  If more than one file is specified,
 *	each file is randomized one at a time.  The max line
 *	length is 4096 characters.
 *
 **************************************************************/

#include <err.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

#include "random_range.h"

/*
 * Structure used to hold file line offset.
 */
struct offset_t {
	long used;
	long offset;
};

void usage(FILE * stream);
void help(void);
int rnd_file(FILE * infile, int numlines, long seed);
int get_numlines(FILE * infile);
int rnd_insert(struct offset_t offsets[], long offset, int size);

#define DEF_SIZE	4096	/* default chunk size */
#define MAX_LN_SZ	4096	/* max line size */

#ifndef SEEK_SET
#define SEEK_SET	0
#endif

char *Progname = NULL;

/***********************************************************************
 *  MAIN
 ***********************************************************************/
int main(int argc, char *argv[])
{
	FILE *infile;
	int c;
	long seed = -1;		/* use time as seed */
	int lsize = DEF_SIZE;	/* num lines to randomize */
	int getfilelines = 0;	/* if set, count lines first */

	if ((Progname = strrchr(argv[0], '/')) == NULL)
		Progname = argv[0];
	else
		Progname++;

	while ((c = getopt(argc, argv, "hgS:l:")) != EOF) {
		switch (c) {
		case 'h':
			help();
			exit(0);
			break;
		case 'S':	/* seed */
			if (sscanf(optarg, "%li", &seed) != 1) {
				fprintf(stderr,
					"%s: --S option argument is invalid\n",
					Progname);
				exit(1);
			}
			break;

		case 'l':	/* number of lines */
			if (sscanf(optarg, "%i", &lsize) != 1) {
				fprintf(stderr,
					"%s: --s option argument is invalid\n",
					Progname);
				exit(1);
			}
			break;

		case 'g':
			getfilelines++;
			break;

		case '?':
			usage(stderr);
			exit(1);
			break;
		}
	}

	if (optind + 1 != argc) {
		fprintf(stderr, "%s: Missing argument.\n", Progname);
		usage(stderr);
		exit(1);
	}

	if (seed == -1) {
		seed = time(0);
	}

	if (strcmp(argv[argc - 1], "-") == 0) {
		infile = stdin;
		fprintf(stderr, "%s: Can not support stdin processing.\n",
			Progname);
		exit(2);
	} else {

		if ((infile = fopen(argv[argc - 1], "r")) == NULL) {
			fprintf(stderr, "%s: Unable to open file %s: %s\n",
				Progname, argv[argc - 1], strerror(errno));
			exit(1);
		}

		if (getfilelines) {
			lsize = get_numlines(infile);
		}

		rnd_file(infile, lsize, seed);
	}

	exit(0);
}

/***********************************************************************
 * Print usage message to stream.
 ***********************************************************************/
void usage(FILE * stream)
{
	fprintf(stream,
		"Usage %s [-hg][-S seed][-l numlines] [files...]\n", Progname);

}

/***********************************************************************
 * Print help message to stdout.
 ***********************************************************************/
void help(void)
{
	usage(stdout);
	printf("This tool will print lines in random order (max line len %d).\n\
  -h          : print this help and exit\n\
  -g          : count the number of lines in the file before randomizing\n\
	        This option overrides -l option.\n\
  -l numlines : randoms lines in numlines chuncks (def %d)\n\
  -S seed     : sets seed to seed (def time(0))\n", MAX_LN_SZ, DEF_SIZE);

}

/***********************************************************************
 * counts the number of lines in already open file.
 * Note: File must be seekable (not stdin or a pipe).
 ***********************************************************************/
int get_numlines(FILE *infile)
{
	char line[MAX_LN_SZ];	/* max size of a line */
	int cnt = 0;

	while (fgets(line, MAX_LN_SZ, infile) != NULL) {
		cnt++;
	}

	/* rewind the file */
	fseek(infile, 0, SEEK_SET);

	return cnt;
}

/***********************************************************************
 *
 *  infile must be a fseekable file.  Thus, it can not be stdin.
 * It will read each line in the file, randomly saving the offset
 * of each line in a array of struct offset_t.
 * It will then print each line in the array stored order.
 *
 ***********************************************************************/
int rnd_file(FILE *infile,
	int numlines,	/* can be more or less than num lines in file */
			/* most opt randomized when num lines in files */
			/* or just a bit bigger */
	long seed)
{

	char line[MAX_LN_SZ];	/* max size of a line */
	int cnt;
	long coffset;		/* current line offset */

	struct offset_t *offsets;
	int memsize;

	if (numlines <= 0) {	/*use default */
		numlines = DEF_SIZE;
	}

	/*
	 * Malloc space for numlines copies the offset_t structure.
	 * This is where the randomization takes place.
	 */
	memsize = sizeof(struct offset_t) * numlines;

	if ((offsets = (struct offset_t *)malloc(memsize)) == NULL) {
		fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize,
			errno);
		return -1;
	}

	random_range_seed(seed);

	coffset = 0;

	while (!feof(infile)) {

		fseek(infile, coffset, SEEK_SET);
		coffset = ftell(infile);
		memset(offsets, 0, memsize);
		cnt = 0;

		/*
		 * read the file in and place offset of each line randomly
		 * into offsets array.  Only numlines line can be randomized
		 * at a time.
		 */
		while (cnt < numlines && fgets(line, MAX_LN_SZ, infile) != NULL) {

			if (rnd_insert(offsets, coffset, numlines) < 0) {
				fprintf(stderr,
					"%s:%d rnd_insert() returned -1 (fatal error)!\n",
					__FILE__, __LINE__);
				abort();
			}
			cnt++;

			coffset = ftell(infile);
		}

		if (cnt == 0) {
			continue;
		}

		/*
		 * print out lines based on offset.
		 */
		for (cnt = 0; cnt < numlines; cnt++) {

			if (offsets[cnt].used) {
				fseek(infile, offsets[cnt].offset, SEEK_SET);
				if (fgets(line, MAX_LN_SZ, infile) == NULL)
					err(1, "fgets");
				fputs(line, stdout);
			}
		}

	}			/* end of file */

	return 0;
}

/***********************************************************************
 * This function randomly inserts offset information into
 * the offsets array.  The array has a size of size.
 * It will attempt 75 random array indexes before finding the first
 * open array element.
 *
 ***********************************************************************/
int rnd_insert(struct offset_t offsets[], long offset, int size)
{
	int rand_num;
	int quick = 0;
	int ind;

	/*
	 * Loop looking for random unused index.
	 * It will only be attempted 75 times.
	 */
	while (quick < 75) {

		rand_num = random_range(0, size - 1, 1, NULL);

		if (!offsets[rand_num].used) {
			offsets[rand_num].offset = offset;
			offsets[rand_num].used++;
			return rand_num;
		}
		quick++;
	}

	/*
	 * an randomly choosen index was not found, find
	 * first open index and use it.
	 */
	for (ind = 0; ind < size && offsets[ind].used != 0; ind++) ;	/* do nothing */

	if (ind >= size) {
		/*
		 * If called with an array where all offsets are used,
		 * we won't be able to find an open array location.
		 * Thus, return -1 indicating the error.
		 * This should never happen if called correctly.
		 */
		return -1;
	}

	offsets[ind].offset = offset;
	offsets[ind].used++;
	return ind;

}

/***********************************************************************
 *
 * CODE NOT TESTED AT ALL - it must be tested before it is used.
 *
 * This function was written to allow rand_lines to work on non-seekable
 * file (i.e stdin).
 *
 ***********************************************************************/
int rnd_stdin(FILE *infile,
	int space,	/* amount of space to use to read file into memory, */
			/* randomized and print.  randomize in chunks */
	int numlines,	/* can be more or less than num lines in file */
			/* most opt randomized when num lines in files */
			/* or just a bit bigger */
	long seed)
{

	char line[MAX_LN_SZ];	/* max size of a line */
	int cnt;		/* offset printer counter */
	long loffset;		/* last line address */
	char *buffer;		/* malloc space for file reads */
	char *rdbuff;		/* where to start read */
	long stopaddr;		/* end of read space (address) */
	int rdsz;		/* amount read */
	int sztord;
	char *chr;		/* buffer processing pointer */
	char *ptr;		/* printing processing pointer */
	char *lptr;		/* printing processing pointer */
	int loopcntl = 1;	/* main loop control flag */
	struct offset_t *offsets;	/* pointer to offset space */
	int memsize;		/* amount of offset space to malloc */
	int newbuffer = 1;	/* need new buffer */

	if (numlines <= 0) {	/*use default */
		numlines = DEF_SIZE;
	}

	/*
	 * Malloc space for file contents
	 */
	if ((buffer = (char *)malloc(space)) == NULL) {
		fprintf(stderr, "Unable to malloc(%d): errno:%d\n", space,
			errno);
		return -1;
	}

	/*
	 * Malloc space for numlines copies the offset_t structure.
	 * This is where the randomization takes place.
	 */
	memsize = sizeof(struct offset_t) * numlines;

	if ((offsets = (struct offset_t *)malloc(memsize)) == NULL) {
		fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize,
			errno);
		return -1;
	}

	random_range_seed(seed);
	rdbuff = buffer;	/* read into start of buffer */
	sztord = space;		/* amount of space left in buffer */

	/*
	 *  Loop until read doesn't read anything
	 *  If last line does not end in newline, it is not printed
	 */
	while (loopcntl) {
		/*
		 *  read in file up to space size
		 *  only works if used as filter.
		 *  The code will randomize one reads worth at a time.
		 *  If typing in lines, read will read only one line - no randomizing.
		 */

		chr = buffer;
		if ((rdsz = fread((void *)rdbuff, sztord, 1, infile)) == 0) {
			fprintf(stderr,
				"input file is empty, done randomizing\n");
			loopcntl = 0;
			return 0;
		}

		stopaddr = ((long)buffer + rdsz);

		loffset = (long)buffer;

		while (!newbuffer) {

			while ((long)chr < stopaddr && *chr != '\n')
				chr++;

			chr++;

			if ((long)chr >= stopaddr) {

				fprintf(stderr, "end of read in buffer\n");

				/*
				 * print out lines based on offset.
				 */
				for (cnt = 0; cnt < numlines; cnt++) {

					if (offsets[cnt].used) {
						ptr =
						    (char *)offsets[cnt].offset;
						/*
						 * copy buffer characters into line for printing
						 */
						lptr = line;
						while (*ptr != '\n')
							*lptr++ = *ptr++;

						printf("%s\n", line);
					}
				}

				/*
				 * move start of partically read line to beginning of buffer
				 * and adjust rdbuff to end of partically read line
				 */
				memcpy((void *)loffset, buffer,
				       (stopaddr - loffset));
				rdbuff = buffer + (stopaddr - loffset);
				sztord = space - (stopaddr - loffset);

				newbuffer++;
			}

			if (rnd_insert(offsets, loffset, numlines) < 0) {
				fprintf(stderr,
					"%s:%d rnd_insert() returned -1 (fatal error)!\n",
					__FILE__, __LINE__);
				abort();
			}

			loffset = (long)chr;
		}
	}

	return 0;

}