/*
 * genv.c - fast and simple interface to morfologik
 * Copyright (C) Bohdan R. Rau 2012 <ethanak@polip.com>
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with Milena.  If not, write to:
 * 	The Free Software Foundation, Inc.,
 * 	51 Franklin Street, Fifth Floor
 * 	Boston, MA  02110-1301, USA.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>

struct header {
    u_int32_t magic;
    u_int32_t string_size;
    u_int32_t word_count;
    u_int32_t uniword_count;
    u_int32_t base_count;
    u_int32_t basevector_count;
} header;

struct real_base {
    u_int32_t name;
    u_int32_t vector;
} *real_base;

struct real_word {
    u_int32_t name;
    u_int32_t writename;
    u_int32_t basename;
    u_int32_t baseword;
    u_int32_t next_word;
    u_int32_t dummy;
    u_int64_t grama;
} *real_words;

u_int32_t *names;

struct bin_word {
    u_int32_t writename;
    u_int32_t basename;
    u_int32_t baseword;
    u_int32_t next_word;
    u_int64_t grama;
} *bin_words;


void load(void *whereto,size_t size,char *name)
{
    int fd=open(name,O_RDONLY);
    if (fd<0) {
        perror(name);
        exit(1);
    }
    if (read(fd,whereto,size) != size) {
        perror(name);
        exit(1);
    }
    close(fd);
}


int outfd;

void save(void *memo,size_t size)
{
    if (write(outfd,memo,size) != size) {
        perror("Write error");
        exit(1);
    }
}

void fcopy(char *name)
{
    char buffer[10240];
    int slen;
    int fd;
    fd=open(name,O_RDONLY);
    if (fd < 0) {
        perror(name);
        exit(1);
    }
    for (;;) {
        slen=read(fd,buffer,10240);
        if (slen <= 0) break;
        if (write(outfd,buffer,slen) != slen) {
            perror("Write error");
            exit(1);
        }
    }
    close(fd);
}

int32_t *vector;

int vvcomp(int32_t *v1,int32_t *v2)
{
    u_int64_t g1,g2;
    g1 = bin_words[*v1].grama;
    g2 = bin_words[*v2].grama;
    if (g1 < g2) return -1;
    if (g1 > g2) return 1;
    return 0;
}

void sort_vpart(int32_t *v,int len)
{
    if (len == 0) {
        printf("Bad vector length\n");
        exit(1);
    }
    if (len == 1) return;
    qsort(v,len,sizeof(int),(void *)vvcomp);
}

void sort_vector()
{
    int32_t *vs;
    int vl;
    int step,vstart;
    for (step=0;step<header.basevector_count;) {
        vstart=step;
        for (;step <header.basevector_count;step++) {
            if (vector[step] < 0) break;
        }
        vl=step-vstart;
        if (step <header.basevector_count) step++;
        sort_vpart(vector+vstart,vl);
    }
}

main()
{
    int i;
    header.magic=0x31EF0190;
    load(&header.string_size,sizeof(header)-4,"semi/header.dat");
    real_words = malloc(sizeof(struct real_word) * header.word_count);
    load(real_words,sizeof(struct real_word) * header.word_count,"semi/words.dat");
    names=malloc(4 * header.uniword_count);
    outfd=open("morfologik.bin",O_WRONLY | O_CREAT | O_TRUNC, 0644);
    if (outfd < 0) {
        perror("morfologik.bin");
        exit(1);
    }
    save(&header,sizeof(header));
    for (i=0;i<header.uniword_count;i++) names[i]=real_words[i].name;
    save(names,4 * header.uniword_count);
    free(names);
    bin_words=malloc(sizeof(struct bin_word) * header.word_count);
    for (i=0;i<header.word_count;i++) {
        bin_words[i].writename=real_words[i].writename;
        bin_words[i].basename=real_words[i].basename;
        bin_words[i].baseword=real_words[i].baseword;
        bin_words[i].next_word=real_words[i].next_word;
        bin_words[i].grama=real_words[i].grama;
    }
    save(bin_words,sizeof(struct bin_word) * header.word_count);
    //free(bin_words);
    fcopy("semi/base.dat");
    vector = malloc(sizeof(int32_t) * header.basevector_count);
    load(vector,sizeof(int32_t) * header.basevector_count,"semi/vector.dat");
    sort_vector();
    save(vector,sizeof(int32_t) * header.basevector_count);
    //fcopy("semi/vector.dat");
    fcopy("semi/string.dat");
    close(outfd);
    printf("OK\n");
    exit(0);
}
