//
// make-timetable.cpp
//
// Bill Seymour, 2023-01-2
//
// Copyright Bill Seymour 2023.
//
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
// This is a quick and dirty program to read a schedule file from
// http://dixielandsoftware.net/cgi-bin/getschedule.pl which it uses
// to generate a simple HTML timetable.  Presumably, the output file
// will be loaded into a Web browser.
//
// At present, it can handle only a single train, or a westbound/eastbound
// pair of trains that stop at the same stations.  It can't handle mid-route
// splits like the Empire Builder, the Lake Shore Limited, or the Texas Eagle
// cars that run to Los Angeles coupled to the Sunset Limited.
//

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <utility>   // swap
#include <algorithm> // reverse

#include <cstddef> // size_t
#include <cstdlib> // bsearch, exit, EXIT_SUCCESS, EXIT_FAILURE
#include <cstring> // strcmp

using namespace std; // I told you it's quick and dirty. 8-)

//
// I think I remember reading some RFC that says that newlines in Web pages
// should always be carriage return-line feed.
//
#if defined(_MSC_VER)
  // Windows already does that.
  #define CRLF "\n"
#else
  // Assume some POSIX implementation.
  #define CRLF "\r\n"
#endif

namespace {

struct stop
{
    string code, name, tz, ardy, artm, dpdy, dptm, arfreq, dpfreq;
    stop(const string&, const string&, const string&);
};
typedef vector<stop> stops;
typedef stops::const_iterator stop_iter;

struct train
{
    string nbr, name, freq;
    stops stp;
    explicit train(const string&);
};

//
// For accumulating the data before output (for putting rowspans in <td> cells
// which can't be calculated until we see all the data):
//
struct detail_data
{
    string leftdy, lefttm, code, name, tz, rightdy, righttm;
    int leftdy_rows, tz_rows, rightdy_rows;

    detail_data() : leftdy_rows(0), tz_rows(0), rightdy_rows(0) { }

    void write(ostream&, bool) const;
};
typedef vector<detail_data> details;
typedef details::iterator detiter;
typedef details::const_iterator detciter;

struct output_data
{
    string left_nbr, right_nbr, train_name;
    details dets;

    void write(ostream&);

private:
    void fixup_rowspans(bool);
    void begin_html(ostream&, bool);
    static void end_html(ostream&);
};
output_data output;  // just make it a global (quick and dirty)

string make_filename(const char*, const char* = nullptr);
void check_stops(const train&, const train&);
void accumulate_data(const train&, const train&);

[[noreturn]] void usage()
{
    cerr << "Usage:  make-timetable left-train [right-train] "
            "{[-s] | [-o output-filename]}"
         << endl;
    exit(EXIT_FAILURE);
}

[[noreturn]] void fatal(const char* why, const string& what)
{
    cerr << why << ' ' << what << endl;
    exit(EXIT_FAILURE);
}

} // anonymous namespace

int main(int argc, char** argv)
{
    string left_fn, right_fn, out_fn;
    bool get_outfn = false, use_stdout = false;

    while (--argc)
    {
        char const * const arg = *++argv;

        if (arg[0] == '-' && arg[1] == 'o' && arg[2] == '\0')
        {
            if (get_outfn || !out_fn.empty() || use_stdout)
            {
                usage();
            }
            get_outfn = true;
        }
        else if (arg[0] == '-' && arg[1] == 's' && arg[2] == '\0')
        {
            if (use_stdout || !out_fn.empty())
            {
                usage();
            }
            use_stdout = true;
        }
        else if (get_outfn)
        {
            if (!out_fn.empty())
            {
                usage();
            }
            out_fn.assign(arg);
            get_outfn = false;
        }
        else if (left_fn.empty())
        {
            left_fn.assign(make_filename(arg));
        }
        else if (right_fn.empty())
        {
            right_fn.assign(make_filename(arg));
        }
        else
        {
            usage();
        }
    }
    if (left_fn.empty())
    {
        usage();
    }

    train left(left_fn), right(right_fn); // NB:  right_fn could be empty

    if (!right.stp.empty()) // It'll be on the "read-up" side.
    {
        reverse(right.stp.begin(), right.stp.end());
        check_stops(left, right);
    }

    accumulate_data(left, right);

    if (out_fn.empty())
    {
        out_fn.assign(make_filename(output.left_nbr.c_str(), ".html"));
    }

    ofstream os;
    ostream* osp;
    if (use_stdout)
    {
        osp = &cout;
    }
    else
    {
        os.open(out_fn);
        if (!os)
        {
            fatal("Can't create", out_fn);
        }
        osp = &os;
    }

    output.write(*osp);

    if (!use_stdout)
    {
        bool ok = os.good();
        os.close();
        if (!ok)
        {
            fatal("Error writing", out_fn);
        }
    }

    return EXIT_SUCCESS;
}

namespace {

bool is_train_number(const string& arg)
{
    const size_t len = arg.length();
    if (len >= 1 && len <= 4)
    {
        for (size_t i = 0; i < len; ++i)
        {
            const char& c = arg[i];
            if (c < '0' || c > '9')
            {
                return false;
            }
        }
        return true;
    }
    return false;
}

//
// If the first argument doesn't seem to be a train number, then it's assumed
// to be the full path that we're looking for; otherwise, make a filename
// that's the train number + "sked" + some extension.  The extension can be
// specified by the second argument, or it can default to ".txt".
//
string make_filename(const char* arg, const char* ext)
{
    string retval(arg);

    if (is_train_number(retval)) // else it's already a full path
    {
        retval.append("sked");
        retval.append(ext != nullptr ? ext : ".txt");
    }

    return retval;
}

//
// If we have a train on the "read-up" side, it needs to serve
// all the same stations as does its "read-down" counterpart.
//
void check_stops(const train& left, const train& right)
{
    stop_iter lbeg = left.stp.cbegin(),  lend = left.stp.cend(),
              rbeg = right.stp.cbegin(), rend = right.stp.cend();

    for ( ; lbeg != lend && rbeg != rend; ++lbeg, ++rbeg)
    {
        if (lbeg->code != rbeg->code)
        {
            break;
        }
    }
    if (lbeg != lend || rbeg != rend)
    {
        cerr << "The two trains have different stops.\n";
        exit(EXIT_FAILURE);
    }
}

/*
 * A sample input file:
 * They all have the same fixed-length format with spaces, no tabs.

* Schedule For Train 302.           Formatted Data
* Lincoln Service  (PDF Schedule)
* +---------------- Station Code
* |    +----------- Schedule Arrival Day  
* |    |  +-------- Schedule Arrival Time
* |    |  |     +----- Schedule Departure Day
* |    |  |     |  +-- Schedule Departure Time 
* |    |  |     |  |     +------------- Actual Arrival Time
* |    |  |     |  |     |     +------- Actual Departure Time
* |    |  |     |  |     |     |     +- Comments
* V    V  V     V  V     V     V     V
  STL  *  *     1  640A              CT
  ALN  *  *     1  725A              CT
  CRV  *  *     1  752A              CT
  SPI  *  *     1  832A              CT
  LCN  *  *     1  856A              CT
  BNL  *  *     1  930A              CT
  PON  *  *     1  959A              CT
  DWT  *  *     1  1017A             CT
  JOL  *  *     1  1104A             CT
  SMT  *  *     1  1129A             CT
  CHI  1  1205P *  *                 CT

 * Sometimes the time zones are absent,
 * and in Arizona they'll be "MST" instead of "MT".
 *
 * There can also be a frequency (e.g., "MoWeFr")
 * following the time zone.  If it's present,
 * there will be at least one space separating
 * the time zone from the frequency.
 */
constexpr size_t nbrbeg  = 21; // position of train number on the 1st line
constexpr size_t namebeg = 2;  // position of train name on the 2nd line
constexpr size_t codebeg = 2;
constexpr size_t codelen = 3;
constexpr size_t ardybeg = 7;
constexpr size_t artmbeg = 10;
constexpr size_t dpdybeg = 16;
constexpr size_t dptmbeg = 19;
constexpr size_t tzbeg   = 37;
constexpr size_t tzmax   = 3;

//
// A comparison function for the trusty old bsearch:
//
struct sta_codes
{
    const char* code;
    const char* name;
};
int cmp_codes(const void* cd, const void* st)
{
    return strcmp(reinterpret_cast<const char*>(cd),
                  reinterpret_cast<const sta_codes*>(st)->code);
}

//
// Helpers for the stop ctor:
//
void get_name(string& name, const string& code)
{
    //
    // A cross-reference of Amtrak's 3-character station codes to city names:
    //
    static const sta_codes stations[] =
    {
        #include "station-codes.inc"
    };
    static constexpr size_t stasize = sizeof stations[0];
    static constexpr size_t nstations = sizeof stations / stasize;

    const sta_codes* sta =
        reinterpret_cast<const sta_codes*>(bsearch(code.c_str(),
                                                   stations,
                                                   nstations,
                                                   stasize,
                                                   cmp_codes));
    if (sta != nullptr)
    {
        name.assign(sta->name);
    }
    else
    {
        cerr << "Warning:  unrecognized station code \"" << code << "\"\n";
        name.clear();
    }
}
void get_time(string& tm, const string& input, size_t beg)
{
    if (tm[0] != '*') // else just leave it as "*"
    {
        size_t end = input.find(' ', beg);
        tm.assign(input, beg, end - beg);
    }
}
string rotate_day(const string& freqday, int stopday)
{
    // assert(stopday >=0 && stopday <= 3);
    static char const * const weekdays[] =
    {
        "Su", "Mo", "Tu", "We", "Th", "Fr", "Sa",
        "Su", "Mo", "Tu", "We"
    };

    int pos = 0;
    for (const char* fd = freqday.c_str(); pos < 7; ++pos)
    {
        if (strcmp(fd, weekdays[pos]) == 0)
        {
            break;
        }
    }

    if (pos < 7)
    {
        return string(weekdays[pos + stopday]);
    }
    cerr << "Warning:  unrecognized frequency day \"" << freqday << "\"\n";
    return string("??");
}
string rotate_freq(const string& freq, const string& stopday)
{
    string retval;

    int dy = stoi(stopday) - 1; // now 0-based
    if (dy > 0)
    {
        for (size_t pos = 0, end = freq.size(); pos < end; pos += 2)
        {
            retval.append(rotate_day(string(freq, pos, 2), dy));
        }
    }
    else // no change
    {
        retval.assign(freq);
    }

    return retval;
}

/*
    string code, name, tz, ardy, artm, dpdy, dptm, arfreq, dpfreq;
*/
stop::stop(const string& input, const string& prev_tz, const string& freq)
  : code(input, codebeg, codelen), name(), tz(),
    ardy(1, input[ardybeg]), artm(1, input[artmbeg]),
    dpdy(1, input[dpdybeg]), dptm(1, input[dptmbeg]),
    arfreq(), dpfreq()
{
    get_name(name, code);

    get_time(artm, input, artmbeg);
    get_time(dptm, input, dptmbeg);

    if (input.size() > tzbeg)
    {
        tz.assign(input, tzbeg, tzmax);
        if (tz.size() == tzmax && tz.back() == ' ')
        {
            tz.pop_back();
        }
    }
    else // time zone is absent
    {
        tz.assign(prev_tz);
    }

    if (!freq.empty())
    {
        if (ardy[0] != '*')
        {
            arfreq.assign(rotate_freq(freq, ardy));
        }
        if (dpdy[0] != '*')
        {
            dpfreq.assign(rotate_freq(freq, dpdy));
        }
    }
}

//
// A helper for the train ctor:
//
void get_freq(string& freq, const string& input)
{
    if (input.size() > tzbeg + 2)
    {
        const size_t end = input.find_last_not_of(" \t");
        const size_t beg = input.find_last_of(" \t", end);
        // NB:  both beg and end are one too small
        freq.assign(input, beg + 1, end - beg);
    }
}

train::train(const string& fn)
{
    if (fn.empty())
    {
        return; // no problem...it won't be used
    }

    ifstream is(fn);
    if (!is)
    {
        fatal("Can't open", fn);
    }

    string input;

    //
    // Skip to the first non-blank line:
    //
    while (getline(is, input) && input.empty())
        ;

    if (is.good())
    {
        //
        // The first line contains the train number
        // which has a period at the end.
        //
        const size_t nbrend = input.find('.', nbrbeg);
        nbr.assign(input, nbrbeg, nbrend - nbrbeg);

        //
        // The train name is on the next line.
        //
        if (getline(is, input))
        {
            size_t nameend = input.find("(PDF") - 1;
            nameend = input.find_last_not_of(" \t", nameend); // one too small
            name.assign(input, namebeg, nameend + 1 - namebeg);
        }

        //
        // Skip to the real data:
        //
        while (getline(is, input) && input[0] == '*')
            ;
    }

    //
    // The first line of the real data might have a frequency at the end.
    //
    bool first_time = true;
    for (string prev_tz; is.good(); getline(is, input))
    {
        if (first_time)
        {
            get_freq(freq, input);
        }
        first_time = false;
        if (!input.empty()) // else it's a trailing blank line
        {
            stp.push_back(stop(input, prev_tz, freq));
            if (prev_tz != stp.back().tz)
            {
                prev_tz.assign(stp.back().tz);
            }
        }
    }

    bool oops = is.bad() || !is.eof();

    is.close();

    if (oops || stp.size() < 2)
    {
        fatal("Error reading", fn);
    }
}

//
// Accumulating the data:
//

string time_24(const string& tm)
{
    string retval;

    //
    // The input time string lacks an hour-minute separator,
    // ends with 'A' or 'P', and lacks leading zeros; so it
    // can be 4 or 5 characters long.  We'll find the minutes at:
    //
    const size_t minutebeg = tm.size() - 3; // 1 or 2

    if (tm.back() == 'A')
    {
        if (minutebeg == 1)
        {
            retval.push_back('0');
            retval.push_back(tm[0]);
        }
        else if (tm[1] == '2') // 12 AM
        {
            retval.assign("00");
        }
        else
        {
            retval.assign(tm, 0, 2);
        }
    }
    else
    {
        int hr = tm[0] - '0';
        if (minutebeg == 2)
        {
            hr = hr * 10 + tm[1] - '0';
        }
        hr += 12;
        if (hr == 24) // oops...12 PM
        {
            retval.assign("12");
        }
        else
        {
            retval.push_back(static_cast<char>(hr / 10 + '0'));
            retval.push_back(static_cast<char>(hr % 10 + '0'));
        }
    }
    retval.push_back(':');
    retval.append(tm, minutebeg, 2);

    return retval;
}

void make_time(detail_data& det, const stop& st, bool rev)
{
    const string* ad = st.arfreq.empty() ? &st.ardy : &st.arfreq;
    const string* at = &st.artm;
    const string* dd = st.dpfreq.empty() ? &st.dpdy : &st.dpfreq;
    const string* dt = &st.dptm;

    string* dy = &det.leftdy;
    string* tm = &det.lefttm;

    if (rev)
    {
        // On the read-up side, put departure times before arrival times ...
        swap(ad, dd);
        swap(at, dt);
        // and use the right days and times instead of the left.
        dy = &det.rightdy;
        tm = &det.righttm;
    }

    if ((*ad)[0] == '*') // We have only departure times.
    {
        dy->assign(*dd);
        if ((*dt)[0] == '*') // This station isn't served at all.
        {
            tm->assign("&nbsp;"); // will later become an HTML <td></td>
        }
        else
        {
            tm->assign(time_24(*dt));
        }
    }
    else if ((*dd)[0] == '*') // We have only arrival times.
    {
        dy->assign(*ad);
        if ((*at)[0] == '*')
        {
            tm->assign("&nbsp;");
        }
        else
        {
            tm->assign(time_24(*at));
        }
    }
    else if (*ad == *dd) // We have both arrival and departure times,
    {                    // and they're on the same day.
        dy->assign(*ad);

        tm->assign(time_24(*at));
        tm->append("<br>"); // will later become an HTML <td></td>
        tm->append(time_24(*dt));
    }
    else // We arrive and depart on different days.
    {
        dy->assign(*ad);
        dy->append("<br>");
        dy->append(*dd);

        tm->assign(time_24(*at));
        tm->append("<br>");
        tm->append(time_24(*dt));
    }
}

void make_station(detail_data& det, const stop& st)
{
    det.code = st.code;
    det.name = st.name;
    det.tz = st.tz;
}

void accumulate_data(const train& left, const train& right)
{
    output.left_nbr.assign(left.nbr);
    output.right_nbr.assign(right.nbr);
    output.train_name.assign(left.name);
    if (!right.name.empty() && left.name != right.name)
    {
        output.train_name.append("<br>and ");
        output.train_name.append(right.name);
    }

    detail_data this_detail;

    stop_iter lbeg = left.stp.cbegin(),  lend = left.stp.cend(),
              rbeg = right.stp.cbegin(), rend = right.stp.cend();

    //
    // If we have a train on the read-up side, we already know
    // that both trains make the same stops, so we can be sure
    // that, at most, one of the following loops will run.
    //
    for ( ; lbeg != lend && rbeg != rend; ++lbeg, ++rbeg)
    {
        make_time(this_detail, *lbeg, false);
        make_station(this_detail, *lbeg);
        make_time(this_detail, *rbeg, true);
        output.dets.push_back(this_detail);
    }
    for ( ; lbeg != lend; ++lbeg)
    {
        make_time(this_detail, *lbeg, false);
        make_station(this_detail, *lbeg);
        output.dets.push_back(this_detail);
    }
}

//
// Writing one detail row:
//

void write_day(ostream& os, const string& dy, int dyrows)
{
    if (dyrows > 0)
    {
        os << "<td";
        if (dyrows > 1)
        {
            os << " rowspan=" << dyrows;
        }
        os << '>' << dy << "</td>";
    }
}

void write_time(ostream& os, const string& tm)
{
    os << "<td>" << tm << "</td>";
}

void write_station(ostream& os,
                   const string& code,
                   const string& name,
                   const string& tz,
                   int tzrows)
{
    os << "<td>" << code << "</td><td>" << name << "</td>";
    if (tzrows > 0)
    {
        os << "<td";
        if (tzrows > 1)
        {
            os << " rowspan=" << tzrows;
        }
        os << '>' << tz << "</td>";
    }
}

void detail_data::write(ostream& os, bool both) const
{
    os << "<tr align=center>";
    write_day(os, leftdy, leftdy_rows);
    write_time(os, lefttm);
    write_station(os, code, name, tz, tz_rows);
    if (both)
    {
        write_time(os, righttm);
        write_day(os, rightdy, rightdy_rows);
    }
    os << "</tr>" CRLF;
}

//
// Fixing the rowspans for one column:
//
void fixup(detiter beg,
           detiter end,
           const string detail_data::*val,
           int detail_data::*rows)
{
    detiter first = beg;
    string first_value((*beg).*val);
    int cnt = 1;

    while (++beg != end)
    {
        if (first_value != (*beg).*val)
        {
            (*first).*rows = cnt;
            first_value.assign((*beg).*val);
            cnt = 1;
            first = beg;
        }
        else
        {
            (*beg).*rows = 0;
            ++cnt;
        }
    }
    (*first).*rows = cnt;
}

//
// Fixing the rowspans for all columns:
//
void output_data::fixup_rowspans(bool both)
{
    detiter beg = dets.begin(), end = dets.end();

    fixup(beg, end, &detail_data::leftdy, &detail_data::leftdy_rows);
    fixup(beg, end, &detail_data::tz, &detail_data::tz_rows);
    if (both)
    {
        fixup(beg, end, &detail_data::rightdy, &detail_data::rightdy_rows);
    }
}

//
// Writing the whole page:
//

void output_data::begin_html(ostream& os, bool both)
{
    //
    // The page headings:
    //
    os << "<html>" CRLF "<head><title>Timetable for " << left_nbr;
    if (both)
    {
        os << " &amp; " << right_nbr;
    }
    os << "</title></head>" CRLF "<body>" CRLF "<center>" CRLF
          "<h2>Timetable for Train";
    if (both)
    {
        os << "s " << left_nbr << " and " << right_nbr;
    }
    else
    {
        os << ' ' << left_nbr;
    }
    os << ",<br>the " << train_name << "</h2>" CRLF;

    //
    // The table headings:
    //
    static char const * const left_arrow  = "&#x1F820;";
    static char const * const right_arrow = "&#x1F822;";

    os << "<div style=\"display:table-cell; vertical-align:middle\">" CRLF
       << "<table border>" CRLF "<tr><th colspan=2>"
       << left_nbr
       << "</th><th colspan=3>" << left_arrow << "&nbsp;Train Number";
    if (both)
    {
        os << "&nbsp;" << right_arrow << "</th><th colspan=2>" << right_nbr;
    }
    os << "</th></tr>" CRLF;

    os << "<tr><th colspan=2>Read Down</th><th colspan=3>Station</th>";
    if (both)
    {
        os << "<th colspan=2>Read Up</th>";
    }
    os << "</tr>" CRLF;

    os << "<tr><th>Day</th><th>Time</th><th>Code</th><th>Name</th>"
          "<th>Time<br>Zone</th>";
    if (both)
    {
        os << "<th>Time</th><th>Day</th>";
    }
    os << "</tr>" CRLF;
}

void output_data::end_html(ostream& os)
{
    os << "</table>" CRLF "</div>" CRLF "</center>" CRLF "</body>" CRLF
          "</html>" CRLF CRLF
       << flush;
}

void output_data::write(ostream& os)
{
    bool both = !right_nbr.empty();

    fixup_rowspans(both);
    begin_html(os, both);
    for (detciter beg = dets.cbegin(), end = dets.cend(); beg != end; ++beg)
    {
        beg->write(os, both);
    }
    end_html(os);
}

} // anonymous namespace

// End of make-timetable.cpp
