#include <cassert>
#include <cctype>
#include <string>
#include <vector>

#include "../basic.h"
#include "../declaration.h"
#include "../tokenizer.h"

using std::string;
typedef std::string::size_type strpos;

namespace {
    // returns an iterator in c pointing to the first element that has a key
    // that begins with str, or c.end() if none exists.  c must be a sorted
    // associative container with strings as keys, sorted lexically.
    template<typename C>
    typename C::const_iterator partial_lookup(const C& c, string str) {
        if(str.empty()) {
            return c.begin();
        }
        typedef typename C::const_iterator iter;
        iter start = c.lower_bound(str);
        ++str[str.size()-1];        // increment last character
        iter end  = c.lower_bound(str);

        if(start != end) {
            return start;
        }
        else {
            return c.end();
        }
    }

    const char W_SPACE[] = " \n\t\r";
}

namespace lang {
    namespace cpp {

        Tokenizer::Token::Token(Operator o, bool l_h, bool r_h, Assoc a, int p):
            op(o),
            l_hook(l_h),
            r_hook(r_h),
            assoc(a),
            precedence(p)
        {
        }



        bool Tokenizer::Token::operator< (const Token& r) const
        {
            if(r.precedence == 0 || precedence < r.precedence) {
                return true;
            }
            else if(precedence == r.precedence) {
                return (assoc == RIGHT);
            }
            else {
                return false;
            }
        }



        bool Tokenizer::Token::operator> (const Token& r) const
        {
            if(precedence == 0 || precedence > r.precedence) {
                return true;
            }
            else if(precedence == r.precedence) {
                return (assoc == LEFT);
            }
            else {
                return false;
            }
        }



        void Tokenizer::get_token_list(
                string str,
                std::list<Token>& tokens,
                const Scope& scope)
        {
            tokens.clear();

            Token token;
            token.r_hook = true;    // right-hook implicitly on token "zero"

            strpos ws_begin = 0;
            strpos ws_end;
            string ws;

            while(ws_begin < str.size()) {
                ws_end = str.find_first_not_of(W_SPACE, ws_begin);

                if(ws_end >= str.size()) {
                    // just trailing whitespace
                    if(tokens.size()) {
                        tokens.back().text += str.substr(ws_begin);
                    }
                    break;
                }

                ws = str.substr(ws_begin, ws_end-ws_begin);
                token = get_token(str, ws_end, scope, token);

                if(token.text.size()) {
                    // advance past parsed token
                    ws_begin = ws_end + token.text.size();

                    // add skipped whitespace
                    token.text.insert(0, ws);
                    tokens.push_back(token);
                }
                else {
                    // couln't get a Token; whole expression is invalid
                    tokens.clear();
                    return;
                }
            }

            if(tokens.size() && tokens.back().r_hook) {
                // last token expected an operand but there was none
                tokens.clear();
                return;
            }
        }



        Tokenizer::Token Tokenizer::get_token(
                string str,
                strpos pos,
                const Scope& scope,
                const Token& last_token)
        {
            if(last_token.r_hook) {
                Token token = get_lterminal_token(str, pos, scope);
                // make sure precedence makes sense
                if(last_token < token) {
                    return token;
                }
            }
            else {
                Token token = get_lhooked_token(str, pos, scope);
                // make sure precedence makes sense
                if(last_token > token) {
                    return token;
                }
            }

            // no valid token could be found
            return Token();
        }



        void Tokenizer::consolidate_tokens(std::list<Token>& tokens)
        {
            assert(tokens.size() >= 1);

            typedef std::list<Token>::iterator iter;

            // op_stack contains iterators to operator-tokens that always
            // have increasing precedence from bottom to top
            std::vector<iter> op_stack;

            iter it = tokens.begin();

            while(it != tokens.end()) {
                if(it->op) {
                    while(op_stack.size() && *op_stack.back() > *it) {
                        // back of stack points to an operator surrounded by
                        // operators with lower precedence; let it eat its
                        // operands and pop it

                        if( !expand_op_token(tokens, op_stack.back()) ) {
                            // expansion failed; token list is invalid
                            tokens.clear();
                            return;
                        }
                        op_stack.pop_back();
                    }
                    op_stack.push_back(it);
                }
                ++it;
            }

            // pop the rest of the operator stack
            while(op_stack.size()) {
                if( !expand_op_token(tokens, op_stack.back()) ) {
                    // expansion failed; token list is invalid
                    tokens.clear();
                    return;
                }
                op_stack.pop_back();
            }

            if(tokens.size() != 1) {
                // something went wrong; token-list was invalid
                tokens.clear();
            }
        }



        Tokenizer::Token Tokenizer::get_lterminal_token(
                string str,
                strpos pos,
                const Scope& scope)
        {
            Token token;

            if(pos >= str.size()) {
                return token;
            }

            if(str[pos] == '(') {
                strpos match = find_matching(str, pos);
                if(match != string::npos) {
                    string inner_text = str.substr(pos+1, match-(pos+1));
                    token = get_lterminal_paren_token(inner_text, scope);
                }
            }
            else {
                token = get_token_from_map(left_terminal_ops(), str, pos);

                if(token.text.size()) {
                    // distinguish delete from delete []
                    if(token.op == DELETE) {
                        strpos n = pos + token.text.size();
                        n = str.find_first_not_of(W_SPACE, n);
                        if(n < str.size() && str[n] == '[') {
                            strpos match = find_matching(str, n);
                            if(str.find_first_not_of(W_SPACE, n+1) == match) {
                                token.op = DELETE_ARRAY;
                                token.text = str.substr(pos, (match+1)-pos);
                            }
                        }
                    }
                }
            }

            if(token.text.empty()) {
                token = get_id_token(str, pos);
            }

            return token;
        }



        Tokenizer::Token Tokenizer::get_lhooked_token(
                string str,
                strpos pos,
                const Scope& scope)
        {
            Token token;

            if(pos >= str.size()) {
                return token;
            }

            // look for left-hooked expressions
            if(str[pos] == '(' || str[pos] == '[' || str[pos] == '?') {
                // function call, subscript, or ternary op
                strpos match = find_matching(str, pos);

                if(match != string::npos) {
                    string inner_text = str.substr(pos+1, match-(pos+1));

                    if(str[pos] == '(') {
                        token = get_function_token(inner_text, scope);
                    }
                    else if(str[pos] == '[') {
                        token = get_subscript_token(inner_text, scope);
                    }
                    else {
                        token = get_ternary_token(inner_text, scope);
                    }
                }
            }
            else {
                token = get_token_from_map(left_hooked_ops(), str, pos);
            }

            return token;
        }



        Tokenizer::Token Tokenizer::get_lterminal_paren_token(
                    string inner_text,
                    const Scope& scope)
        {
            Token token;
            token.text = "(" + inner_text + ")";

            // if the inner text is a valid declaration, it's a c-style cast
            Declaration decl(inner_text, scope);

            if(decl.count_names() == 1) {
                // c-style cast
                token.op = C_CAST;
                token.l_hook = false;
                token.r_hook = true;
                token.assoc = Token::RIGHT;
                token.precedence = 16;

                // add inner (cast) operand
                token.expr.reset(new Expression());
                Expression::operand operand (new Expression());
                operand->text = inner_text;
                operand->op   = NONE;
                token.expr->operands.push_back(operand);
            }
            else {
                // subexpression
                token.expr.reset(new Expression(inner_text, scope));

                // this token is now fully expanded
                token.op = NONE;
                token.precedence = 0;
            }

            return token;
        }



        Tokenizer::Token Tokenizer::get_id_token(string str, strpos pos)
        {
            // try to get a literal
            string id = get_literal(str, pos);

            // if none, try an identifier
            if(id.empty()) {
                id = get_identifier(str, pos);
            }

            if(id.empty()) {
                // no token or identifier here
                return Token();
            }

            Token token;
            token.text = id;

            token.expr.reset(new Expression());
            token.expr->text = id;
            token.expr->op   = NONE;

            // mark token as completed/terminal
            token.op = NONE;
            token.precedence = 0;

            return token;
        }



        Tokenizer::Token Tokenizer::get_function_token(
                string inner_text,
                const Scope& scope)
        {
            Token token;

            token.text = "(" + inner_text + ")";
            token.op = FUNCTION;
            token.l_hook = true;
            token.r_hook = false;
            token.assoc = Token::LEFT;
            token.precedence = 17;

            std::vector<string> args;
            split_args(inner_text, args);

            token.expr.reset(new Expression());
            for(int i=0; i<args.size(); ++i) {
                Expression::operand arg(new Expression(args[i], scope));
                token.expr->operands.push_back(arg);
            }

            return token;
        }



        Tokenizer::Token Tokenizer::get_subscript_token(
                string inner_text,
                const Scope& scope)
        {
            Token token;

            token.text = "[" + inner_text + "]";
            token.op = SUBSCRIPT;
            token.l_hook = true;
            token.r_hook = false;
            token.assoc = Token::LEFT;
            token.precedence = 17;

            token.expr.reset(new Expression());
            Expression::operand arg(new Expression(inner_text, scope));
            token.expr->operands.push_back(arg);

            return token;
        }



        Tokenizer::Token Tokenizer::get_ternary_token(
                string inner_text,
                const Scope& scope)
        {
            Token token;

            token.text = "?" + inner_text + ":";
            token.op = TERNARY;
            token.l_hook = true;
            token.r_hook = true;
            token.assoc = Token::LEFT;
            token.precedence = 4;

            token.expr.reset(new Expression());
            Expression::operand arg(new Expression(inner_text, scope));
            token.expr->operands.push_back(arg);

            return token;
        }



        Tokenizer::Token Tokenizer::get_token_from_map(
                const std::map<string, Token>& map,
                string str,
                strpos pos)
        {
            Token token;

            if(pos >= str.size()) {
                return token;
            }

            string sub = str.substr(pos, 1);
            strpos n = pos;
            std::map<string, Token>::const_iterator it;

            it = partial_lookup(map, sub);
            while(it != map.end()) {
                if(it->first == sub) {
                    // full match (not just partial)
                    token = it->second;
                    token.text = sub;
                }
                if(++n >= str.size()) {
                    break;
                }
                sub += str[n];
                it = partial_lookup(map, sub);
            }

            // make sure a keyword operator isn't grabbed from an id
            char a = token.text[token.text.size()-1];
            char b = str[pos + token.text.size()];
            using std::isalnum;
            if((isalnum(a) || a == '_') && (isalnum(b) || b == '_')) {
                // cancel this token
                token = Token();
            }

            return token;
        }



        bool Tokenizer::expand_op_token(
                std::list<Token> tokens,
                std::list<Token>::iterator it)
        {
            if(!it->expr) {
                it->expr.reset(new Expression());
            }

            // pull left operand in
            if(it->l_hook) {
                if(it != tokens.begin()) {
                    std::list<Token>::iterator prev = it;
                    --prev;
                    if(prev->op == NONE && prev->expr) {
                        it->text.insert(0, prev->text);
                        it->expr->operands.push_front(prev->expr);
                        tokens.erase(prev);
                    }
                    else {
                        return false;
                    }
                }
                else {
                    return false;
                }
            }

            // pull right operand in
            if(it->r_hook) {
                std::list<Token>::iterator next = it;
                ++next;
                if(next != tokens.end() && next->op == NONE && next->expr) {
                    it->text += next->text;
                    it->expr->operands.push_back(next->expr);
                    tokens.erase(next);
                }
                else {
                    return false;
                }
            }

            it->expr->op = it->op;
            it->expr->text = it->text;
            it->op = NONE;
            it->precedence = 0;
            return true;
        }



        void Tokenizer::split_args(string str, std::vector<string> args)
        {
            args.clear();
            strpos arg_begin = 0;   // first char of current arg
            strpos arg_end;         // one past last char of current arg

            // loop for each argument
            while(arg_begin < str.size()) {
                arg_end = str.find_first_not_of(W_SPACE, arg_begin);

                if(arg_end >= str.size()) {
                    // just trailing whitespace
                    if(args.size()) {
                        args.back() += str.substr(arg_begin);
                    }
                    return;
                }

                char quote = 0;         // '\'', '"', or 0
                // loop for each remaining character in current arg
                while(arg_end < str.size()) {
                    char n = str[arg_end];
                    if(quote) {
                        if(n == '\\') {
                            ++arg_end;
                        }
                        else if(n == quote) {
                            quote = 0;
                        }
                    }
                    else if(n == '\'' || n == '\"') {
                        quote = n;
                    }
                    else if(n == '(') {
                        arg_end = find_matching(str, arg_end);
                        if(arg_end == string::npos) {
                            // ill-formed; use rest of string as last arg
                            break;
                        }
                    }
                    else if(n == ',') {
                        break;
                    }
                    ++arg_end;
                }

                if(arg_end > str.size()) {
                    arg_end = str.size();
                }
                args.push_back(str.substr(arg_begin, arg_end-arg_begin));
                arg_begin = arg_end + 1;    // skip a char (the comma)
            }
        }



        const std::map<string, Tokenizer::Token>& Tokenizer::left_terminal_ops()
        {
            static std::map<string, Token> lto;
            if(lto.empty()) {
                lto["::"]  = Token(G_SCOPE,     false, true, Token::RIGHT, 19);

                lto["++"]  = Token(PRE_INC,     false, true, Token::RIGHT, 16);
                lto["--"]  = Token(PRE_DEC,     false, true, Token::RIGHT, 16);
                lto["~"]   = Token(COMPLEMENT,  false, true, Token::RIGHT, 16);
                lto["!"]   = Token(NOT,         false, true, Token::RIGHT, 16);
                lto["-"]   = Token(NEGATIVE,    false, true, Token::RIGHT, 16);
                lto["+"]   = Token(POSITIVE,    false, true, Token::RIGHT, 16);
                lto["&"]   = Token(ADDRESS_OF,  false, true, Token::RIGHT, 16);
                lto["*"]   = Token(DEREFERENCE, false, true, Token::RIGHT, 16);

                lto["sizeof"] = Token(SIZEOF,   false, true, Token::RIGHT, 16);
                lto["new"]    = Token(NEW,      false, true, Token::RIGHT, 16);
                lto["delete"] = Token(DELETE,   false, true, Token::RIGHT, 16);
            }
            return lto;
        }



        const std::map<string, Tokenizer::Token>& Tokenizer::left_hooked_ops()
        {
            static std::map<string, Token> lho;
            if(lho.empty()) {
                lho["::"]  = Token(SCOPE,       true, true,  Token::LEFT, 18);

                lho["."]   = Token(MEMBER,      true, true,  Token::LEFT, 17);
                lho["->"]  = Token(P_MEMBER,    true, true,  Token::LEFT, 17);
                lho["++"]  = Token(POST_INC,    true, false, Token::LEFT, 17);
                lho["--"]  = Token(POST_DEC,    true, false, Token::LEFT, 17);

                lho[".*"]  = Token(MEMBER_P,    true, true,  Token::LEFT, 15);
                lho["->*"] = Token(P_MEMBER_P,  true, true,  Token::LEFT, 15);

                lho["*"]   = Token(MULTIPLY,    true, true,  Token::LEFT, 14);
                lho["/"]   = Token(DIVIDE,      true, true,  Token::LEFT, 14);
                lho["%"]   = Token(MODULO,      true, true,  Token::LEFT, 14);

                lho["+"]   = Token(ADD,         true, true,  Token::LEFT, 13);
                lho["-"]   = Token(SUBTRACT,    true, true,  Token::LEFT, 13);

                lho["<<"]  = Token(SHIFT_LEFT,  true, true,  Token::LEFT, 12);
                lho[">>"]  = Token(SHIFT_RIGHT, true, true,  Token::LEFT, 12);

                lho["<"]   = Token(LESS,        true, true,  Token::LEFT, 11);
                lho["<="]  = Token(LESS_EQUAL,  true, true,  Token::LEFT, 11);
                lho[">"]   = Token(GREATER,     true, true,  Token::LEFT, 11);
                lho[">="]  = Token(GREATER_EQUAL,true,true,  Token::LEFT, 11);

                lho["=="]  = Token(EQUAL,       true, true,  Token::LEFT, 10);
                lho["!="]  = Token(NOT_EQUAL,   true, true,  Token::LEFT, 10);

                lho["&"]   = Token(BIT_AND,     true, true,  Token::LEFT, 9);

                lho["^"]   = Token(BIT_XOR,     true, true,  Token::LEFT, 8);

                lho["|"]   = Token(BIT_OR,      true, true,  Token::LEFT, 7);

                lho["&&"]  = Token(AND,         true, true,  Token::LEFT, 6);

                lho["||"]  = Token(OR,          true, true,  Token::LEFT, 5);

                lho["="]   = Token(ASSIGN,            true,true,Token::RIGHT,3);
                lho["*="]  = Token(MULTIPLY_ASSIGN,   true,true,Token::RIGHT,3);
                lho["/="]  = Token(DIVIDE_ASSIGN,     true,true,Token::RIGHT,3);
                lho["%="]  = Token(MODULO_ASSIGN,     true,true,Token::RIGHT,3);
                lho["+="]  = Token(ADD_ASSIGN,        true,true,Token::RIGHT,3);
                lho["-="]  = Token(SUBTRACT_ASSIGN,   true,true,Token::RIGHT,3);
                lho["<<="] = Token(SHIFT_LEFT_ASSIGN, true,true,Token::RIGHT,3);
                lho[">>="] = Token(SHIFT_RIGHT_ASSIGN,true,true,Token::RIGHT,3);
                lho["&="]  = Token(BIT_AND_ASSIGN,    true,true,Token::RIGHT,3);
                lho["^="]  = Token(BIT_XOR_ASSIGN,    true,true,Token::RIGHT,3);
                lho["|="]  = Token(BIT_OR_ASSIGN,     true,true,Token::RIGHT,3);

                lho[","]   = Token(COMMA,       true, true,  Token::LEFT, 1);
            }
            return lho;
        }

    }
}