#include "Search.h"
#include "SysUtil.h"
#include "StrUtil.h"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>

//-----------------------------------------------------------------
/**
  Otevre soubor a zjisti si jeho velikost.
  @param cmp poskytovac cmp() 
  @param the_filename slovnikovy soubor 
  @param vzad kolik slovicek vypisovat vzad
  @param vpred kolik slovicek vypisovat vpred
  @param encoding preferovane vstupni/vystupni kodovani, napr. "UTF-8"
  (data slovniku jsou vzdy v iso-8859-2)
 */
Search::Search(Compare *cmp, const char *the_filename,
        int vzad, int vpred, const char *encoding)
{
    filename = the_filename;
    compare = cmp;

    listVzad = vzad;
    listVpred = vpred;

    m_inputCD = iconv_open("ISO-8859-2", encoding);
    if ((iconv_t)-1 == m_inputCD) {
        SysUtil::systemError(encoding);
    }
    m_outputCD = iconv_open(encoding, "ISO-8859-2");
    if ((iconv_t)-1 == m_outputCD) {
        SysUtil::systemError(encoding);
    }
    m_starts = NULL;

    file = fopen(filename, "rb");
    if (file == NULL) {
        SysUtil::systemError(filename);
    }

    //FIXME: neexistuje lepsi zpusob jak zjistit velikost souboru?
    if (fseek(file, 0L, SEEK_END) != 0) {
        SysUtil::systemError(filename);
    }
    middleset = ftell(file);
    if (middleset < 0) {
        SysUtil::systemError(filename);
    }
    middleset = (middleset / POLOZKA_W + 1) >> 1;

    if (middleset == 0) {
        SysUtil::error("Search", "empty vocabulary");
    }
} 

//-----------------------------------------------------------------
/**
  Zavre soubor.
 */
Search::~Search()
{
    if (m_starts) {
        delete m_starts;
    }
    iconv_close(m_inputCD);
    iconv_close(m_outputCD);
    fclose(file);
}

//-----------------------------------------------------------------
/**
  Binarnim pulenim se presune k prvnimu vetsi-nebo-rovno slovicku.
 */
    void
Search::seekFile(const char *text)
{
    if (fseek(file, 0L, SEEK_SET) != 0) {
        SysUtil::systemError(filename);
    }

    long set = middleset;
    int cmp = 0;
    int stav = 0;

    while (true) {
        if (fseek(file, set * POLOZKA_W, SEEK_CUR) != 0) {
            //TEST: meze souboru 
            SysUtil::log_debug("seek end, set:%li", set);

            if (labs(set) <= 1) {
                // mimo meze souboru
                return;
            }
            else {
                if (set > 0) {
                    // na posledni slovicko
                    fseek(file, -POLOZKA_W, SEEK_END);
                }
                else {
                    // na prvni slovicko
                    fseek(file, 0L, SEEK_SET);
                }
            }
        }

        if (fread(buffer, POLOZKA_W, 1, file) != 1) {
            if (ferror(file)) {
                SysUtil::systemError(filename);
            }

            // Zde se octneme, 
            // kdyz fseek nas presune presne na konec souboru.
            //TEST: end of file
            SysUtil::log_debug("-end of file-, set:%li", set);

            if (abs(set) > 1) {
                // probrani poslednich slovicek v souboru
                fseek(file,  -(set + 1) * POLOZKA_W, SEEK_CUR);
                continue;
            }
            return;
        }
        // posun zpatky, tam kde jsme byli pred ctenim
        fseek(file, -POLOZKA_W, SEEK_CUR);

        cmp = compare->cmp(text, buffer);

        if (labs(set) <= 1) {
            stav |= ((cmp >= 0) << 1);
            stav |= (cmp <= 0);
            if (stav == 3 && cmp <= 0) {
                // konec
                break;
            }
        }

        if (cmp > 0) {
            set = (labs(set) + 1) >> 1;
        }
        else {
            set = - ((labs(set) + 1) >> 1);
        }
    }

}

//-----------------------------------------------------------------
// This adaptor works around the signature problems of the second
// argument to iconv():  SUSv2 and others use 'const char**', but glibc 2.2
// uses 'char**', which matches the POSIX 1003.1-2001 standard.
// Using this adaptor, g++ will do the work for us.
template<typename _T>
inline size_t
__iconv_adaptor(size_t(*iconv_func)(iconv_t, _T, size_t *, char**, size_t*),
                iconv_t cd, char **inbuf, size_t *inbytesleft,
                char **outbuf, size_t *outbytesleft)
{
    return iconv_func(cd, (_T)inbuf, inbytesleft, outbuf, outbytesleft);
}
//-----------------------------------------------------------------
/**
 * Convert input string from prefered encoding
 * to ISO-8859-2.
 * @return new string, free it
 */
char *
Search::inputEncode(const char *text)
{
    return createEncoded(text, m_inputCD);
}
//-----------------------------------------------------------------
/**
 * Convert string from ISO-8859-2 to prefered encoding
 * for output.
 * @return new string, free it
 */
char *
Search::outputEncode(const char *text)
{
    return createEncoded(text, m_outputCD);
}
//-----------------------------------------------------------------
/**
 * Convert string using given conversion descriptor.
 * @return new string, free it
 */
char *
Search::createEncoded(const char *text, iconv_t cd)
{
    size_t inleft = strlen(text);
    size_t outleft = inleft * 2 + 1;
    char *inbuf = const_cast<char *>(text);
    char *outbuf, *result;
    result = outbuf = (char *)SysUtil::xmalloc(outleft);
    memset(outbuf, 0, outleft);
    if ((size_t)-1 == __iconv_adaptor(iconv, cd, &inbuf, &inleft,
                &outbuf, &outleft))
    {
        SysUtil::warning("iconv", strerror(errno));
        memcpy(outbuf, inbuf, inleft);
    }
    return result;
}
//-----------------------------------------------------------------
/**
  Nalezne prvni slovicko zacinajici na 'text'.
  Pokud nic nenalezne vraci NULL.
  @param text vstupni text v preferovanem kodovani
  @return retezec v preferovanem kodovani
 */
    char *
Search::complete(const char *text)
{
    if (m_starts) {
        free(m_starts);
    }
    m_starts = inputEncode(text);
    // predpoctena velikost
    m_len = strlen(m_starts);

    if (m_len > (POLOZKA_W / 2)) {
        // too long text
        return NULL;
    }
    if (m_len < 1) {
        // bylo by prilis mnoho nalezu
        return NULL;
    }

    //TEST: hledany string
    SysUtil::log_debug("hledame:%s--", m_starts);

    seekFile(m_starts);

    char *result = outputEncode(StrUtil::trimstr(buffer, m_len));
    // zkratime nabufferovany retezec na stejnou delku
    buffer[m_len] = '\0';

    if (compare->cmp(m_starts, buffer) == 0) {
        return result;
    }
    else {
        //TEST: nerovnost
        SysUtil::log_debug("--%s!=%s--1", m_starts, buffer);

        printf("\n-not found-\n%s", text);
        free(result);
        return NULL;
    }
}
//-----------------------------------------------------------------
/**
  Nalezne dalsi slovicko zacinajici na 'text'.
  Pokud nic nenalezne vraci NULL.
  @return retezec v preferovanem kodovani
 */
    char *
Search::nextComplete()
{
    if (fread(buffer, POLOZKA_W, 1, file) != 1) {
        if (ferror(file)) {
            SysUtil::systemError(filename);
        }
        return NULL;
    }

    char *result = outputEncode(StrUtil::trimstr(buffer, m_len));
    // zkratime nabufferovany retezec na stejnou delku
    buffer[m_len] = '\0';

    if (compare->cmp(m_starts, buffer) == 0) {
        return result;
    }
    else {
        //TEST: nerovnost
        SysUtil::log_debug("--%s!=%s--2", m_starts, buffer);

        free(result);
        return NULL;
    }
}

//-----------------------------------------------------------------
/**
  Vypise z okoli textu listVzad slovika vzad a listVpred slovicek vpred.
  @param text vstupni retezec v preferovanem kodovani
 */
    void
Search::findList(const char *text)
{
    char *in_text = inputEncode(text);
    seekFile(in_text);
    free(in_text);

    // kdyz na zacatku slovniku se seek vzad nepovede,
    // tak pak se zacnou rovnou vypisovat slovicka vpred
    int i = 0;
    //FIXME: kdyz listVzad je velke a jsme na zacatku souboru,
    // pak se pri selhani neposune dozadu ani o mozny kus
    if (fseek(file, -listVzad * POLOZKA_W, SEEK_CUR) != 0) {
        i = listVzad;
    }

    printf("===============================================================\n");
    for (; i < listVzad + listVpred; i++) {
        if (fread(buffer, POLOZKA_W, 1, file) != 1) {
            if (ferror(file)) {
                SysUtil::systemError(filename);
            }
            // end of file
            return;
        }

        char *out_first = outputEncode(StrUtil::trimstr(buffer, 0));
        char *out_second = outputEncode(
                StrUtil::trimstr(buffer + (POLOZKA_W / 2), 0));
        printf("%-38s- %-s\n", out_first, out_second);
        free(out_first);
        free(out_second);

        if (i == listVzad - 1) {
            printf("----------------------------------------"
                    "-----------------------\n");
        }
    }
    printf("----------------------------------------"
            "-----------------------\n");
}
//-----------------------------------------------------------------
/**
  Vypise slovicka, ktera jsou presnym prekladem vstupu.
  @param text vstupni retezec v preferovanem kodovani
 */
void
Search::printMatches(const char *text)
{
    char *in_text = inputEncode(text);
    seekFile(in_text);

    while (true) {
        if (fread(buffer, POLOZKA_W, 1, file) != 1) {
            if (ferror(file)) {
                SysUtil::systemError(filename);
            }
            // end of file
            break;
        }
        char *match = StrUtil::trimstr(buffer, 0);
        if (!checkMatch(in_text, match)) {
            break;
        }

        char *out_second = outputEncode(
                StrUtil::trimstr(buffer + (POLOZKA_W / 2), 0));
        printf("%s\n", out_second);
        free(out_second);
    }

    free(in_text);
}
//-----------------------------------------------------------------
/**
 * Hleda presne matche.
 * @param text text ke kteremu hledame matche
 * @param match kandidat na match (bude oriznut zprava)
 * @return true pokud texty povazuje za vyznamove stejne
 */
bool
Search::checkMatch(const char *text, char *match)
{
    char *bracket = strchr(match, '(');
    if (bracket) {
        bracket[0] = '\0';
        match = StrUtil::trimstr(match, 0);
    }
    return (compare->cmp(text, match) == 0);
}


