/*
    Disk latency benchmarking tool
    Copyright (C) 2006-2018 Darrick Wong

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#define PACKAGE "bogoseek"
#include "bogodisk.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
#include <time.h>
#include <unistd.h>
#include <stdint.h>
#include <sched.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/syscall.h>
#include <asm/unistd.h>
#include <pthread.h>
#include "util.h"

static int use_mlock = 1;
static int boost_ioprio = 1;
typedef ssize_t (*io_func_t)(int fd, void *buf, size_t count, off_t offset);

struct bogoseek_context {
	const char *dev;
	uint64_t start, stop, bufsize, max_seeks;
	FILE *report;

	pthread_mutex_t lock;
	pthread_cond_t cond;
	unsigned int threads;
	int has_threads;

	io_func_t io_func;
	int access_flag, direct_flag;
};

#ifdef O_DIRECT
static int default_odirect_flag = O_DIRECT;
#else
#warn O_DIRECT not supported!
static int default_odirect_flag = 0;
#endif

static void print_help(void) {
	printf("Usage: %s [options] device [devices...]\n", PACKAGE);
	printf("\n");
	printf("Options:\n");
	printf(" -b	Start test at this location.\n");
	printf(" -c	Use memory buffer of this size.\n");
#ifdef O_DIRECT
	printf(" -d     Do not bypass disk cache via O_DIRECT.\n");
#endif
	printf(" -e	End test after this location.\n");
	printf(" -i	Do not boost IO priority.\n");
	printf(" -n	Run with the specified number of threads.\n");
	printf(" -o	Save output in the file \"report\".\n");
	printf(" -s	Perform no more than this many seeks.\n");
	printf(" -w	Destructive write test.\n");
	printf(" -y	Allow buffers to be swapped.\n");
	printf(" -z	Use O_SYNC.\n");
}

static void time_device(struct bogoseek_context *bc)
{
	uint64_t max_size, pos, newpos, i, oldpos;
	void *buf;
	struct timespec now, before;
	double nowf, beforef, repf;
	int clock_type = CLOCK_MONOTONIC;
	int fd, ret;
	uint32_t blksz;

	/* Flip backwards params */
	if (bc->start && bc->stop && (bc->start > bc->stop)) {
		max_size = bc->start;
		bc->start = bc->stop;
		bc->stop = max_size;
	}

	/* Check parameters */
	get_size_and_block_size(bc->dev, &max_size, &blksz, NULL);
	if (!max_size) {
		fprintf(stderr, "%s: Size is zero, skipping.\n", bc->dev);
		return;
	}

	if (bc->start > max_size) {
		fprintf(stderr, "%s: Starting at %"PRIu64", which is beyond "
			"the end of the device at %"PRIu64"!\n", bc->dev,
			bc->start, max_size);
		return;
	}

	if (bc->stop > max_size) {
		fprintf(stderr, "%s: Clamping end to %"PRIu64".\n", bc->dev,
			max_size);
		bc->stop = max_size;
	} else if (!bc->stop) {
		bc->stop = max_size;
	}

	if (!bc->bufsize)
		bc->bufsize = blksz;

	if (!bc->max_seeks)
		bc->max_seeks = (bc->stop - bc->start) / (blksz * 1000);
	
	if (bc->report != stdout)
		fprintf(stdout, "%s: Seeking %"PRIu64" times from %"PRIu64
			" to %"PRIu64" in %"PRIu64" byte chunks.\n", bc->dev,
			bc->max_seeks, bc->start, bc->stop, bc->bufsize);
	fprintf(bc->report, "%s: Seeking %"PRIu64" times from %"PRIu64" to %"
		PRIu64" in %"PRIu64" byte chunks.\n", bc->dev, bc->max_seeks,
		bc->start, bc->stop, bc->bufsize);
	fflush(bc->report);
	fflush(stdout);

	/* grab buffer */
	if (posix_memalign(&buf, blksz, bc->bufsize)) {
		perror("posix_memalign");
		return;
	}

	/* lock pages */
#ifdef _POSIX_MEMLOCK_RANGE
	if (use_mlock && mlock(buf, bc->bufsize)) {
		perror("mlock");
		fprintf(stderr, "Can't lock pages; proceeding anyway.\n");
	}
#else
#warn mlock not supported
#endif

	/* open device */
	fd = open(bc->dev, bc->access_flag | bc->direct_flag | O_LARGEFILE);
	if (!fd) {
		perror("dev");
		goto unlock_mem;
	}

	/* make sure CLOCK_MONOTONIC is supported */
	if (clock_gettime(clock_type, &now)) {
		clock_type = CLOCK_REALTIME;
	}

	repf = 0.0;

	/* wait until all io threads are ready */
	if (bc->has_threads) {
		pthread_mutex_lock(&bc->lock);
		bc->threads++;
		pthread_cond_wait(&bc->cond, &bc->lock);
		pthread_mutex_unlock(&bc->lock);
	}

	/*
	 * NOW increase io/sched priority.  We have to wait until the last
	 * minute because otherwise the threads won't sync up properly.
	 */
	if (boost_ioprio && bump_priority())
		goto out;

	/* push the disk to a known location */
	newpos = get_randnum_align(bc->start, bc->stop, bc->bufsize);
	ret = bc->io_func(fd, buf, bc->bufsize, newpos);
	if (!ret)
		goto out;

	/* loop */
	for (i = bc->max_seeks; i > 0; i--) {
		oldpos = newpos;
		newpos = get_randnum_align(bc->start, bc->stop, bc->bufsize);
		clock_gettime(clock_type, &before);
		ret = bc->io_func(fd, buf, bc->bufsize, newpos);
		if (!ret)
			break;
		else if (ret < 0) {
			perror(bc->dev);
			if (ret != EIO)
				break;
		}
		clock_gettime(clock_type, &now);

		nowf = (now.tv_sec) + ((double)now.tv_nsec / 1000000000);
		beforef = (before.tv_sec) +
			  ((double)before.tv_nsec / 1000000000);
		if (newpos > oldpos)
			pos = newpos - oldpos;
		else
			pos = oldpos - newpos;
		fprintf(bc->report, "%"PRIu64", %"PRIu64", %"PRIu64", %.3f\n",
			oldpos, newpos, pos, (nowf - beforef) * 1000);
		if ((nowf - repf) > 5) {
			fprintf(stdout, "%s: %"PRIu64" seeks (%.2f%%)"
				"            \r",
				bc->dev, bc->max_seeks - i,
				((double)1.0 - ((double)i / bc->max_seeks)) * 100);
			fflush(stdout);
			repf = nowf;
		}
	}

	fprintf(stdout, "%s: %"PRIu64" seeks (%.2f%%)            \n",
		bc->dev, bc->max_seeks - i,
		((double)1.0 - ((double)i / bc->max_seeks)) * 100);
	fflush(stdout);
	fflush(bc->report);

out:
	/* close device */
	close(fd);

unlock_mem:
	/* unlock pages */
#ifdef _POSIX_MEMLOCK_RANGE
	if (munlock(buf, bc->bufsize)) {
		perror("munlock");
	}
#endif
	free(buf);
}

static void *time_device_helper(void *arg)
{
	time_device(arg);
	return NULL;
}

int main(int argc, char *argv[])
{
	struct bogoseek_context bc;
	pthread_t *threads;
	unsigned int nthreads = 1, j;
	int i;
	int c;
	int sync;

	bc.report = stdout;
	bc.start = bc.stop = bc.max_seeks = bc.bufsize = 0;
	bc.io_func = pread;
	bc.access_flag = O_RDONLY;
	bc.direct_flag = default_odirect_flag;

	fprintf(stdout, "%s %s, Copyright (C) 2006-2018 Darrick Wong.\n",
		PACKAGE, PACKAGE_VERSION);

	/* parse args */
	while((c = getopt(argc, argv, "wb:e:c:o:s:dn:iyz")) != -1) {
		switch(c) {
			case 'o':
				bc.report = fopen(optarg, "w+");
				if (!bc.report) {
					perror(optarg);
					return 2;
				}
				break;
			case 'b':
				bc.start = get_number(optarg);
				break;
			case 'e':
				bc.stop = get_number(optarg);
				break;
			case 'c':
				bc.bufsize = get_number(optarg);
				break;
			case 's':
				bc.max_seeks = get_number(optarg);
				break;
			case 'd':
				bc.direct_flag = 0;
				break;
			case 'w':
				bc.io_func = (io_func_t)pwrite;
				bc.access_flag = O_WRONLY;
				break;
			case 'n':
				nthreads = get_number(optarg);
				break;
			case 'y':
				use_mlock = 0;
				break;
			case 'z':
				sync = 1;
				break;
			case 'i':
				boost_ioprio = 0;
				break;
			default:
				print_help();
				return 1;
		}
	}

	if (sync)
		bc.access_flag |= O_SYNC;

	if (argc == optind || !nthreads) {
		print_help();
		return 1;
	}

	if (!seed_random())
		return 2;

	bc.has_threads = 0;
	if (nthreads == 1) {
		for (i = optind; i < argc; i++) {
			bc.dev = argv[i];
			time_device(&bc);
		}
		return 0;
	}

	/* multithreaded mode */
	bc.has_threads = 1;
	threads = calloc(nthreads, sizeof (*threads));
	if (!threads) {
		perror("calloc");
		return 3;
	}
	pthread_mutex_init(&bc.lock, NULL);
	pthread_cond_init(&bc.cond, NULL);

	for (i = optind; i < argc; i++) {
		bc.threads = 0;
		bc.dev = argv[i];

		for (j = 0; j < nthreads; j++)
			pthread_create(&threads[j], NULL, time_device_helper,
				       &bc);

		/* hold all threads until they're ready */
		while (bc.threads != nthreads)
			usleep(10000);
		pthread_mutex_lock(&bc.lock);
		pthread_cond_broadcast(&bc.cond);
		pthread_mutex_unlock(&bc.lock);

		for (j = 0; j < nthreads; j++)
			pthread_join(threads[j], NULL);
	}

	return 0;
}
