/*
 * thrulay.c -- network throughput tester (the client part).
 * 
 * Written by Stanislav Shalunov, http://www.internet2.edu/~shalunov/
 *            Huadong Liu, http://www.cs.utk.edu/~hliu/
 * 
 * Copyright 2003, Internet2.
 * Legal conditions are in file LICENSE
 * (MD5 = ecfa50d1b0bfbb81b658c810d0476a52).
 */

#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <time.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include "thrulay.h"
#include "util.h"
#include "rcs.h"

RCS_ID("@(#) $Id: thrulay.c,v 1.17 2005/08/25 15:52:34 hliu Exp $")

/* 
 * Thrulay server use one day as infinite delay. If a delay value equals to
 * DELAY_INF * 1000.0 ms, the client should consider it as infinity.
 */
#define DELAY_INF  24 * 60 * 60

/*
 * Print out usage of thrulay client.
 */ 
static void
usage(void)
{
    fprintf(stderr, "Usage: thrulay [-t#] [-i#] [-w#] [-l#] [-p#] [-u#]"
            "[-by/n] [-S#] [-D#] host\n");
    fprintf(stderr, "-t#\t\ttest duration, in seconds (default: 60s)\n");
    fprintf(stderr, "-i#\t\treporting interval, in seconds (default: 1s)\n");
    fprintf(stderr, "-w#\t\twindow, in bytes (default: 4194304B)\n");
    fprintf(stderr, "-l#\t\tblock size (default: 8192B)\n");
    fprintf(stderr, "-p#\t\tserver port (default: 5003)\n");
    fprintf(stderr, "-n#\t\tnumber of parallel streams (default: 1)\n");
    fprintf(stderr, "-u#[kMGT]\tUDP mode with given rate (default: off)\n");
    fprintf(stderr, "\t\tIn UDP mode, rate is in bits per second and can be\n"
            "\t\tfollowed by a SI suffix (K/k for 1000, M/m for 100000);\n"
            "\t\tdefault packet size, 1500, can be changed with -l;\n"
            "\t\twindow size becomes the UDP send buffer size;\n"
            "\t\treporting interval is ignored.\n");
    fprintf(stderr, "-b[y/n]\t\tbusy wait or not when sending UDP packets" 
            "(default: y)\n");
    fprintf(stderr, "-S\t\tTOS(type-of-service) for outgoing packets. You may\n"
            "\t\tspecify the value in hex with a '0x' prefix, in octal with\n"
            "\t\ta '0' prefix, or in decimal. Valid values are:\n"
            "\t\t0x10(IPTOS_LOWDELAY) for minimize delay;\n"
            "\t\t0x08(IPTOS_THROUGHPUT) for maximize throughput;\n"
            "\t\t0x04(IPTOS_RELIABILITY) for maximize reliability;\n"
            "\t\t0x02(IPTOS_LOWCOST) for minimize cost.\n");
    fprintf(stderr, "-D\t\tDSCP values for TOS bytes," 
            "mutual exclusive with \"-S\"\n");
    fprintf(stderr, "host\t\tserver to send test data to (no default)\n");

    exit(1);
}

/* 
 * Convert a string like "12k" to a number like 12000.  
 * Return zero on error.
 */
u_int64_t
rate2i(char *s)
{
    u_int64_t r;    /* Result. */
    char *p;
    int l;
    int suffix = 0;

    /* First, set the multiple. */
    l = strlen(s);
    switch (s[l-1]) {
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
        r = 1;
        break;
    case 'k':
    case 'K':
        r = 1000;
        suffix = 1;
        break;
    case 'm':
    case 'M':
        r = 1000000;
        suffix = 1;
        break;
    case 'g':
    case 'G':
        r = 1000000000ULL;
        suffix = 1;
        break;
    case 't':
    case 'T':
        r = 1000000000000ULL;
        suffix = 1;
        break;
    default:
        r = 0;
    }
    if (suffix)
        s[l-1] = '\0';
    r *= strtoll(s, &p, 10);
    if (!*s || *p)  /* Invalid character found. */
        r = 0;
    return r;
}

/*
 * Parse thrulay client settings. The caller should cast the return value
 * to either (thrulay_setting_tcp_t *) or (thrulay_setting_udp_t *) after
 * determining the value in traffic.
 */
static void * 
get_settings(int argc, char *argv[], thrulay_traffic_type_t *traffic)
{
    char *server_name;
    int server_port = 5003;
    /* How long to run, default is 60 seconds. */
    int test_duration = 60;
    /* How often to report, default is 1 second. */
    int report_interval = 1;
    /* Window size for TCP, send buffer size for UDP, in bytes. */
    int buffer_size = 4194304;
    /* Packet size for both TCP and UDP, in bytes. */
    int block_size = 0;
    /* UDP packet send rate in b/s,  zero in TCP test. */
    u_int64_t rate = 0;
    /* busywait flag in UDP test, default is busywait. */
    thrulay_wait_type_t wt = THRULAY_BUSYWAIT;
    /* Type-of-service for outgoing packets for both TCP and UDP tests. */
    int tos = 0;
    /* Number of parallel test streams, default is 1. */
    int num_stream = 1;
    
    thrulay_setting_tcp_t *opt_tcp;
    thrulay_setting_udp_t *opt_udp;
    void *options;
    int ch;

    while ((ch = getopt(argc, argv, "t:i:w:l:p:u:b:n:S:D:")) != -1)
        switch (ch) {
        case 't':
            test_duration = atoi(optarg);
            if (test_duration <= 0) {
                fprintf(stderr, "test duration must be "
                    "a positive integer (in seconds)\n");
                usage();
            }
            break;
        case 'i':
            report_interval = atoi(optarg);
            if (report_interval <= 0) {
                fprintf(stderr, "reporting interval must be "
                    "a positive integer (in seconds)\n");
                usage();
            }
            break;
        case 'w':
            buffer_size = atoi(optarg);
            if (buffer_size <= 0) {
                fprintf(stderr, "window must be "
                    "a positive integer (in bytes)\n");
                usage();
            }
            break;
        case 'l':
            block_size = atoi(optarg);
            if (block_size <= 0) {
                fprintf(stderr, "block size must be "
                    "a positive integer (in bytes)\n");
                usage();
            }
            break;
        case 'p':
            server_port = atoi(optarg);
            if (server_port <= 0) {
                fprintf(stderr, "port must be "
                    "a positive integer\n");
                usage();
            }
            break;
        case 'u':
            rate = rate2i(optarg);
            if (rate == 0) {
                fprintf(stderr, "rate must be positive\n");
                usage();
            }
            break;
        case 'b':
            if (optarg[0] == 'n')
                wt = THRULAY_NONBUSYWAIT;
            break;
        case 'n':
            num_stream = atoi(optarg);
            if (num_stream < 1) {
                fprintf(stderr, "number of streams must be positive\n");
                usage();
            }
            break;
        case 'S':
            if (tos) {
                fprintf(stderr, "Invalid option '-S'. "
                    "Can only set one '-D' or '-S'\n");
                usage();
            }
            tos = strtol(optarg, NULL, 0);
            if ((tos != 0x10)&&(tos != 0x08)&&(tos != 0x04)&&(tos != 0x02)) {
                fprintf(stderr, "Invalid IP_TOS value\n");
            }
            break;
        case 'D':
            if (tos) {
                fprintf(stderr, "Invalid option '-D'. "
                    "Can only set one '-D' or '-S'.\n");
            }
            tos = strtol(optarg, NULL, 0);
            if (tos & ~0x3F) {
                fprintf(stderr, "Invalid value for option \'-D\'. "
                    "DSCP value expected");
            }
            tos &= 0x3F;
            tos <<= 2;  /* shift for setting TOS */
            break;
        default:
            usage();
        }
    argc -= optind;
    argv += optind;

    if (argc != 1)
        usage();
    server_name = argv[0];

    /* default packet size is 8192 for TCP and 1500 for UDP */
    block_size = (!block_size && rate) ? 1500 : 8192;

    if (rate) {
        *traffic = THRULAY_UDP;
        opt_udp = calloc(1, sizeof(thrulay_setting_udp_t));
        if (!opt_udp) {
            error(ERR_FATAL, "could not allocate memory");
        }
        opt_udp->server_name = server_name;
        opt_udp->server_port = server_port;
        opt_udp->test_duration = test_duration;
        opt_udp->udp_buffer_size = buffer_size;
        opt_udp->packet_size = block_size;
        opt_udp->nstream = num_stream;
        opt_udp->tos = tos;
        opt_udp->rate = rate;
        opt_udp->wt = wt;
        options = (void *)opt_udp;
    }
    else {
        /* make sure report_interval <= test_duration */
        if (report_interval > test_duration) {
            fprintf(stderr, "Report interval cannot be greater than"
                    "test duration\n");
            usage();
        }

        *traffic = THRULAY_TCP;
        opt_tcp = calloc(1, sizeof(thrulay_setting_tcp_t));
        if (!opt_tcp) {
            error(ERR_FATAL, "could not allocate memory");
        }
        opt_tcp->server_name = server_name;
        opt_tcp->server_port = server_port;
        opt_tcp->test_duration = test_duration;
        opt_tcp->report_interval = report_interval;
        opt_tcp->window_size = buffer_size;
        opt_tcp->block_size = block_size;
        opt_tcp->nstream = num_stream;
        opt_tcp->tos = tos;
        options = (void *)opt_tcp;
    }

    return options;
}

/* 
 * We assume mss is between optimistic position and moderate position, i.e. 
 * MSS = MTU - 20(TCPHDR) - 20(IPHDR) = MTU - 40, and
 * MSS = MTU - 20(TCPHDR) - 60(IPHDR) = MTU - 80
 * as defined in RFC 879. 
 * This also works for IPv6 since there is no option filed.
 */
#define check_mss_mtu(mss, mtu) (mtu-40) >= mss && mss >= (mtu-80)

static void
print_mss_mtu(int mss, int mtu)
{
    if (mtu && mss) {
        printf("# Path MTU = %dB, MSS = %dB\n", mtu, mss);
    }
    else if (mtu && !mss) {
        printf("# Path MTU = %dB, Unknown MSS\n", mtu);
    }
    else if (!mtu && mss) {
        if (check_mss_mtu(mss, 576)) {
            printf("# Path MTU = 576B, MSS = %dB, matchs Minimum\n", mss);
        }
        else if (check_mss_mtu(mss, 1500)) {
            printf("# Path MTU = 1500B, MSS = %dB, matchs Ethernet\n", mss);
        }
        else if (check_mss_mtu(mss, 4352)) {
            printf("# Path MTU = 4352B, MSS = %dB, matchs FDDI\n", mss);
        }
        else if (check_mss_mtu(mss, 9180)) {
            printf("# Path MTU = 9180B, MSS = %dB, matchs ATM\n", mss);
        }
        else {
            printf("# Path MTU = %dB, MSS = %dB, Unknown interface\n", 
            mss+40, mss);
        }
    }
    else {
        printf("# Unknown path MTU and MSS\n");
    }
    return;
}

/*
 * Print header of TCP report.
 */ 
static void
print_tcp_report_header(int tid, thrulay_setting_tcp_t *settings)
{
    thrulay_setup_tcp_t setup;

    /* 
     * Get setup, mss and mtu of stream 0. Here we assume every stream 
     * have the same setup except fds.
     */
    if (0 != thrulay_get_setup(tid, 0, &setup)) {
        error(ERR_FATAL, "could not get thrulay setup");
    }

    printf("# local window = %dB; remote window = %dB\n", 
        setup.window_size_local, setup.window_size_remote);
    if (settings->block_size == setup.block_size) {
        printf("# block size = %dB\n", setup.block_size);
    }
    else {
        printf("# requested block size = %dB; actual block size = %dB\n", 
            settings->block_size, setup.block_size);
    }
    print_mss_mtu(setup.mss, setup.mtu);
    printf("# test duration = %ds; reporting interval = %ds\n",
       settings->test_duration, settings->report_interval);
    printf("SID     begin,s  end,s  Mb/s     RTT,ms: min   avg   max\n");
    fflush(stdout);
    
    return;
}

/*
 * Print header of UDP report.
 */ 
static void
print_udp_report_header(int tid, thrulay_setting_udp_t *option)
{
    int i;
    thrulay_setup_udp_t setup;

    /* 
     * Although UDP buffer size is the same, each stream has a different 
     * server UDP port number.  
     */
    for (i=0; i<option->nstream; i++) {
        if (0 != thrulay_get_setup(tid, i, &setup)) {
            error(ERR_FATAL, "could not get thrulay setup");
        }
        if (i == 0) {
            printf("# client buffer size = %dB\n", setup.udp_buffer_size);
            if (option->packet_size == setup.packet_size) {
                printf("# packet size = %dB\n", setup.packet_size);
            }
            else {
                printf("# requested packet size = %dB; "
                        "actual packet size = %dB\n", 
                        option->packet_size, setup.packet_size);
            }
        }
        printf("# (%d) remote UDP port = %d\n", i, setup.port_udp_server);
    }

    return;
}

static void
print_report_udp(thrulay_report_udp_t *rpt)
{
    int i;
    double loss;
    u_int64_t npacket_loss;

    if (rpt->udp_buffer_size > 0) {
        printf("Server UDP buffer size: %dB\n", rpt->udp_buffer_size);
    }
    else {
        printf("Server UDP buffer size unknown\n");
    }
    printf("Packets client proposed to send: %llu\n", rpt->npackets);
    printf("Packets client sent: %llu\n", rpt->packets_sent);
    if (rpt->udp_buffer_size > 0) {
        printf("Packets server received: %llu\n", rpt->packets_received);
        printf("Packets duplicated: %llu\n", rpt->packets_duplicate);

        /* an delay of DELAY_INF*1000.0ms indicates that the packet is lost */
        if (rpt->min_delay != DELAY_INF * 1000.0) {
            printf("0th quantile of delay (ignoring clock offset): %.3lfms\n", 
                    rpt->min_delay);
        }
        else {
            printf("0th quantile of delay (ignoring clock offset): infms\n");
        }
        if (rpt->median_delay != DELAY_INF * 1000.0) {
            printf("50th quantile of delay (ignoring clock offset): %.3lfms\n", 
                    rpt->median_delay);
        }
        else {
            printf("50th quantile of delay (ignoring clock offset): infms\n");
        }
        if (rpt->quantile95_delay != DELAY_INF * 1000.0) {
            printf("95th quantile of delay (ignoring clock offset): %.3lfms\n",
                    rpt->quantile95_delay);
        }
        else {
            printf("95th quantile of delay (ignoring clock offset): infms\n");
        }

        npacket_loss = rpt->packets_sent - rpt->packets_received + 
            rpt->packets_duplicate;
        if (rpt->packets_sent > rpt->packets_received-rpt->packets_duplicate) {
            loss = 100.0*(double)(npacket_loss)/(double)(rpt->packets_sent);
        }
        else {
            loss = 0;
        }
        printf("Total packet loss: %llu\n", npacket_loss);
        printf("Total loss period: %llu\n", rpt->nloss_periods);
        printf("Packet loss rate: %lf%%\n", loss);

        for (i = 0; i < MAX_N && rpt->nreorder[i]; i++) {
            printf("%d-reordering = %f%%\n", i+1, 
                    100.0*rpt->nreorder[i]/(rpt->packets_received-i-1));
        }
        if (i == 0) printf("no reordering\n");
        else if (i < MAX_N) printf("no %d-reordering\n", i+1);
        else printf("only up to %d-reordering is handled\n", MAX_N);
    }
    else {
        printf("Packets server received unknown\n");
        printf("Packets duplicated unknown\n");
        printf("0th quantile of delay (ignoring clock offset) unknown\n"); 
        printf("50th quantile of delay (ignoring clock offset) unknown\n"); 
        printf("95th quantile of delay unknown\n"); 
        printf("Packet loss rate unknown\n");
        printf("n-reording metric unknown\n");
    }

    printf("\n");

    return;
}

/*
 * Print reports starting from from_idx to to_idx. 
 */ 
static void
print_reports(thrulay_traffic_type_t traffic, int tid, int nstream, 
        int from_idx, int to_idx)
{
    char ch;
    int i, j;
    thrulay_report_tcp_t rpt_tcp;
    thrulay_report_udp_t rpt_udp;
    
    for (i=from_idx; i<=to_idx; i++) {
        for (j=0; j<nstream; j++) {
            if (traffic == THRULAY_TCP) {
                thrulay_get_report(tid, j, i, &rpt_tcp);
                /* Print a # for final reports. */
                ch =  (rpt_tcp.type == THRULAY_FINAL) ? '#' : ' ';
                printf("(%d)%c%9.3f %8.3f %8.3f %8.3f %8.3f %8.3f\n", j, 
                        ch, rpt_tcp.begin, rpt_tcp.end, rpt_tcp.bandwidth, 
                        rpt_tcp.min_rtt, rpt_tcp.avg_rtt, rpt_tcp.max_rtt);
            }
            else {
                thrulay_get_report(tid, j, i, &rpt_udp);
                print_report_udp(&rpt_udp); 
            }
        }
    }
    fflush(stdout);

    return;
}

/*
 * Progressively print TCP reports.
 */ 
static int
progressive_report(int tid, int nstream, int interval)
{
    int rc;
    int status;
    int printed = 0;    /* Number of reports that have been printed. */

    sleep(interval); /* first interval. */
    
    do {
        /* check for available test reports */
        rc = thrulay_wait(tid, &status, THRULAY_POLL);
        if (rc < 0) {
            return rc;
        }
        else if (rc == 0) {
            /* if we get here a bit earlier, wait for 1000 useconds. */
            if (status == 0) {
                usleep(1000);
                continue;
            }
            /* when rc==0, status cannot be less than 0. */
            else {
                print_reports(THRULAY_TCP, tid, nstream, printed+1, status);
                printed = status;
            }
            sleep(interval);
        }
        else {
            /* test process exits successfully. */
            if (status > 0) {
                print_reports(THRULAY_TCP, tid, nstream, printed+1, status);
            }
            /* test process exits with error. */
            else if (status < 0) {
                return status;
            }
            else {
                /* do nothing here. */
            }
        }
    } while (rc != tid);

    return 0;
}

int 
main(int argc, char *argv[])
{
    int tid;                            /* unique test id */    
    thrulay_traffic_type_t traffic;     /* TCP or UDP test */
    thrulay_setting_tcp_t *setting_tcp=0;   /* settings for TCP test */
    thrulay_setting_udp_t *setting_udp=0;   /* settings for UDP test */
    void *settings;     /* user settings for thrulay test */
    int nstream;        /* number of parallel tests */
    int status;
	int rc;
	
    /* Ignore SIGPIPE.  Always do in any socket code. */
    if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
        perror("signal(SIGPIPE, SIG_IGN)");
        error(ERR_FATAL, "could not ignore SIGPIPE");
    }
    
    rc = thrulay_init();
    if (rc < 0) {
        thrulay_err_msg(rc);
        error(ERR_FATAL, "could not initialize thrulay lib");
    }

    settings = get_settings(argc, argv, &traffic);

    tid = thrulay_open(traffic, settings);
    if (tid < 1) { /* tid starts from 1, not 0. */
        thrulay_err_msg(tid);
        error(ERR_FATAL, "open a thrulay test failed");
    }
    
    /* Print out test report header before any statistics. */
    if (traffic == THRULAY_TCP) {
        setting_tcp = (thrulay_setting_tcp_t *)settings;
        nstream = setting_tcp->nstream;
        print_tcp_report_header(tid, setting_tcp);
    }
    else {
        setting_udp = (thrulay_setting_udp_t *)settings;
        nstream = setting_udp->nstream;
        print_udp_report_header(tid, setting_udp);
    }

    if (tid != thrulay_start(tid)) {
        thrulay_err_msg(tid);
        error(ERR_FATAL, "start a thrulay test failed");
    }

    /* for TCP tests, report statistics at a given interval. */
    if (traffic == THRULAY_TCP) {
        rc = progressive_report(tid, nstream, setting_tcp->report_interval);
        if (rc != 0) thrulay_err_msg(rc);
    }
    /* for UDP tests, report statistics when tests are done. */
    else {
        if (tid != thrulay_wait(tid, &status, THRULAY_WAIT)) {
            error(ERR_FATAL, "wait for a thrulay test failed");
        }
        if (status > 0) {
            print_reports(THRULAY_UDP, tid, nstream, 1, status);
        }
        else {
            thrulay_err_msg(status);
            error(ERR_FATAL, "no report produced");
        }
    }
	
    /* 
     * We do not have to call thrulay_close before exiting because 
     * thrulay_exit will do this for us if the test is not closed.
     */
    if (0 != thrulay_close(tid)) {
        error(ERR_FATAL, "cannot close a thrulay test");
    }
    thrulay_exit();

    /* Since we are exiting, we do not bother to free up memory allocated. */

    return 0;
}