/*
 * libmorfologik.c - fast and simple interface to morfologik
 * Copyright (C) Bohdan R. Rau 2012-2014 <ethanak@polip.com>
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program.  If not, write to:
 * 	The Free Software Foundation, Inc.,
 * 	51 Franklin Street, Fifth Floor
 * 	Boston, MA  02110-1301, USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#ifndef __WIN32
#include <sys/ipc.h>
#include <sys/shm.h>
#else
#include <stdint.h>
#define u_int32_t uint32_t
#endif


#include "libmorfologik.h"



struct header {
    u_int32_t magic;
    u_int32_t string_size;
    u_int32_t word_count;
    u_int32_t uniword_count;
    u_int32_t base_count;
    u_int32_t basevector_count;
};

struct base_word {
    u_int32_t name;
    u_int32_t vector;
};

struct bin_word {
    u_int32_t writename;
    u_int32_t basename;
    u_int32_t baseword;
    u_int32_t next_word;
    u_int64_t grama;
};

struct morfologik_data {
    struct header header;
    u_int32_t *names;
    struct bin_word *bin_words;
    struct base_word *base_words;
    u_int32_t *vector;
    char *strings;
#ifndef __WIN32
    /* dla właściciela bloku wspólnego */
    int self_shmid;
    int names_shmid;
    int bin_words_shmid;
    int base_words_shmid;
    int vectors_shmid;
    int strings_shmid;
#endif    
};

static void mf_free(struct morfologik_data *data)
{
    if (!data) return;
    if (data->header.magic == MF_FILE_MAGIC) {
        if (data->strings) free(data->strings);
        if (data->vector) free(data->vector);
        if (data->base_words) free(data->base_words);
        if (data->bin_words) free(data->bin_words);
        if (data->names) free(data->names);
    }
#ifndef __WIN32
    else {
        if (data->strings) shmdt(data->strings);
        if (data->vector) shmdt(data->vector);
        if (data->base_words) shmdt(data->base_words);
        if (data->bin_words) shmdt(data->bin_words);
        if (data->names) shmdt(data->names);
    }
#endif
    free(data);
}

void morfologik_Free(struct morfologik_data *data)
{
    mf_free(data);
}

static int __read_it(int fd,void **mem,size_t size)
{
    *mem=malloc(size);
    return read(fd,*mem,size) == size;
}

#ifndef __WIN32

static int __shm_read(int fd,u_int32_t key,int *id,void **memout,size_t size)
{
    void *mem;
    int keyid=shmget(key,size,IPC_CREAT | IPC_EXCL | 0644);
    //fprintf(stderr,"Keyid %d for %08X/%ld\n",keyid,key,size);
    if (keyid < 0) return 0;
    mem=shmat(keyid,NULL,0);
    if (!mem) {
	//fprintf(stderr,"Bad mem\n");
        shmctl(keyid,IPC_RMID,NULL);
        return 0;
    }
    if (read(fd,mem,size) != size) {
        //fprintf(stderr,"Bad read\n");
        shmdt(mem);
        shmctl(keyid,IPC_RMID,NULL);
        return 0;
    }
    *id=keyid;
    *memout=mem;
    return 1;
}


static void mf_shmdelete(struct morfologik_data *data)
{
    int kid=data->self_shmid;
    if (data->strings) shmdt(data->strings);
    if (data->vector) shmdt(data->vector);
    if (data->base_words) shmdt(data->base_words);
    if (data->bin_words) shmdt(data->bin_words);
    if (data->names) shmdt(data->names);
    if (data->names_shmid >= 0) shmctl(data->names_shmid,IPC_RMID,NULL);
    if (data->bin_words_shmid >= 0) shmctl(data->bin_words_shmid,IPC_RMID,NULL);
    if (data->base_words_shmid >= 0) shmctl(data->base_words_shmid,IPC_RMID,NULL);
    if (data->vectors_shmid >= 0) shmctl(data->vectors_shmid,IPC_RMID,NULL);
    if (data->strings_shmid >= 0) shmctl(data->strings_shmid,IPC_RMID,NULL);
    shmdt(data);
    shmctl(kid,IPC_RMID,0);
}

void morfologik_CloseShm(struct morfologik_data *data)
{
    mf_shmdelete(data);
}

struct morfologik_data *morfologik_CreateShm(char *fname)
{
    struct morfologik_data *shm_md;
    struct header hdr;
    int md_shmid;
#ifdef MORFDIR
    if (!fname) fname=MORFDIR;
#endif

    int fd=open(fname,O_RDONLY);
    if (fd<0) {
        return NULL;
    }
    if (read(fd,&hdr,sizeof(struct header)) != sizeof(struct header)) {
        close(fd);
        return NULL;
    }
    //fprintf(stderr,"%X %X\n",hdr.magic,MF_FILE_MAGIC);
    if (hdr.magic != MF_FILE_MAGIC) {
        close(fd);
        return NULL;
    }
    //fprintf(stderr,"Morfol starting\n");
    md_shmid=shmget(MF_SHMAT_MAGIC,sizeof(struct morfologik_data),IPC_CREAT | IPC_EXCL | 0644);
    if (md_shmid < 0) return NULL;
    //fprintf(stderr,"SHMID OK\n");
    shm_md=shmat(md_shmid,NULL,0);
    if (!shm_md) {
        close(fd);
        shmctl(md_shmid,IPC_RMID,NULL);
        return NULL;
    }
    //fprintf(stderr,"SHMAT OK\n");
    memset(shm_md,0,sizeof(struct morfologik_data));
    shm_md->header=hdr;
    shm_md->header.magic = MF_SHMAT_MAGIC;
    shm_md->self_shmid=md_shmid;
    shm_md->names_shmid=-1;
    shm_md->bin_words_shmid=-1;
    shm_md->base_words_shmid=-1;
    shm_md->vectors_shmid=-1;
    shm_md->strings_shmid=-1;
    if (!__shm_read(fd,MF_NAMES_KEY,&shm_md->names_shmid,(void **)&shm_md->names,4 * hdr.uniword_count)) goto bad;
//    fprintf(stderr,"M1 OK\n");
    if (!__shm_read(fd,MF_BINWORDS_KEY,&shm_md->bin_words_shmid,(void **)&shm_md->bin_words,sizeof(struct bin_word) * hdr.word_count)) goto bad;
//    fprintf(stderr,"M2 OK\n");
    if (!__shm_read(fd,MF_BASEWORDS_KEY,&shm_md->base_words_shmid,(void **)&shm_md->base_words,sizeof(struct base_word) * hdr.base_count)) goto bad;
//    fprintf(stderr,"M3 OK\n");
    if (!__shm_read(fd,MF_VECTORS_KEY,&shm_md->vectors_shmid,(void **)&shm_md->vector,4 * hdr.basevector_count)) goto bad;
//    fprintf(stderr,"M4 OK\n");
    if (!__shm_read(fd,MF_STRINGS_KEY,&shm_md->strings_shmid,(void **)&shm_md->strings,hdr.string_size)) goto bad;
//    fprintf(stderr,"M5 OK\n");
    close(fd);
    return shm_md;
bad:
    close(fd);
    mf_shmdelete(shm_md);
    return NULL;
}

static int __shm_connect(int key,void **mem,size_t count)
{
    int kid=shmget(key,count,0);
    if (kid < 0) return 0;
    *mem=shmat(kid,NULL,SHM_RDONLY);
    return (*mem != NULL);
}

struct morfologik_data *morfologik_InitSHM(void)
{
    struct morfologik_data *md,*shm_md;
    int shm_mid;
    
    shm_mid=shmget(MF_SHMAT_MAGIC,sizeof(*shm_md),0);
    if (shm_mid < 0) return NULL;
    shm_md=shmat(shm_mid,NULL,SHM_RDONLY);
    md=malloc(sizeof(*md));
    memset(md,0,sizeof(*md));
    md->header=shm_md->header;
    md->header.magic = MF_SHMAT_MAGIC;
    shmdt(shm_md);
    if (!__shm_connect(MF_NAMES_KEY,(void **)&md->names,4 * md->header.uniword_count)) goto bad;
    if (!__shm_connect(MF_BINWORDS_KEY,(void **)&md->bin_words,sizeof(struct bin_word) * md->header.word_count)) goto bad;
    if (!__shm_connect(MF_BASEWORDS_KEY,(void **)&md->base_words,sizeof(struct base_word) * md->header.base_count)) goto bad;
    if (!__shm_connect(MF_VECTORS_KEY,(void **)&md->vector,4 * md->header.basevector_count)) goto bad;
    if (!__shm_connect(MF_STRINGS_KEY,(void **)&md->strings,md->header.string_size)) goto bad;
    return md;
bad:
    mf_free(md);
    return NULL;
}

#endif

struct morfologik_data *morfologik_Init(char *fname)
{
    
#ifdef MORFDIR
    if (!fname) fname=MORFDIR;
#endif
    int fd=open(fname,O_RDONLY
#ifdef __WIN32
    | O_BINARY
#endif
    );
    struct morfologik_data *md;
    if (!fd) {
        return NULL;
    }
    md=malloc(sizeof(*md));
    memset(md,0,sizeof(*md));
    if (read(fd,&md->header,sizeof(struct header)) != sizeof(struct header)) {
bad:    close(fd);
        mf_free(md);
        return NULL;
    }
    if (md->header.magic != 0x31EF0190) {
        close(fd);
        mf_free(md);
        errno=EINVAL;
        return NULL;
    }
    if (!__read_it(fd,(void **)&md->names,4 * md->header.uniword_count)) goto bad;
    if (!__read_it(fd,(void **)&md->bin_words,sizeof(struct bin_word) * md->header.word_count)) goto bad;
    if (!__read_it(fd,(void **)&md->base_words,sizeof(struct base_word) * md->header.base_count)) goto bad;
    if (!__read_it(fd,(void **)&md->vector,4 * md->header.basevector_count)) goto bad;
    if (!__read_it(fd,(void **)&md->strings,md->header.string_size)) goto bad;
    close(fd);
    return md;
}

#define NMID_SHIFT 24
#define NMID_MASK (0x7f << NMID_SHIFT)
#define NMID_GET(a) (((a) & NMID_MASK) >> NMID_SHIFT)
#define NMID_ID(a) ((a) & ~NMID_MASK)
#define NMID_SET(a,b) (((a) & ~NMID_MASK) | ((b) << NMID_SHIFT))

/*
#define NMID_advemi 1
#define NMID_advem 2
#define NMID_impspot 3
#define NMID_pindysmy 4
#define NMID_pindyscie 5
#define NMID_pindybym 6
#define NMID_pindybys 7
#define NMID_pindyby 8
*/

enum {
    NMID_advemi=1,
    NMID_advem,
    NMID_impspot,
    // połączenia z czasownikiem
    NMID_pindysmy,
    NMID_pindyscie,
    NMID_pindybym,
    NMID_pindybys,
    NMID_pindyby,
    NMID_pindybysmy,
    NMID_pindybyscie,
    // archaiczne formy czasowników
    
    NMID_alim,
    NMID_ista
    };
    
#define NMID_negate 64
#define NMID_super 32
#define NMID_cnt 16

static u_int64_t __filter_nmid(u_int64_t grama,int smy)
{
    int cm=WT_GET(grama);

    if (smy & NMID_negate) {
	if (cm != WT_adj && cm != WT_ppas && cm != WT_pact) return 0;
	if (grama & WM_neg) return 0;
	grama &= ~WM_aff;
	grama |= WM_neg;
	smy &= ~NMID_negate;
	if (!smy) return grama;
	return __filter_nmid(grama,smy);
    }
    
    if (smy & NMID_super) {
	if (cm != WT_adj && cm != WT_subst && cm != WT_adv) return 0;
	smy &= ~NMID_super;
	grama |= WM_super | WM_cmplx;
	if (!smy) return grama;
	return __filter_nmid(grama,smy);
    }

    if (smy & NMID_cnt) {
	if (cm != WT_adj && cm != WT_subst && cm != WT_adv) return 0;
	smy &= ~NMID_cnt;
	grama |= WM_cnt | WM_cmplx;
	if (!smy) return grama;
	return __filter_nmid(grama,smy);
    }
    
    if (smy == NMID_ista) {
	if (cm != WT_verb) return 0;
	if (!(grama & (WM_pl))) return 0;
	if (!(grama & (WM_pri | WM_sec))) return 0;
	return grama | WM_depr | WM_nstd;
    }
    if (smy == NMID_alim) {
	if (cm != WT_verb) return 0;
	if ((grama & (WM_pl | WM_praet)) != (WM_pl | WM_praet)) return 0;
	return (grama | WM_depr | WM_nstd) & ~WM_str3;
    }
    if (smy >= NMID_pindysmy && smy <= NMID_pindybyscie) {
	if (cm != WT_subst && cm != WT_adj && cm != WT_adv && cm != WT_advp) {
	    return 0;
	}
	if (cm == WT_subst || cm == WT_adj) {
	    grama &= ~WM_voc;
	    if (smy != NMID_pindyscie) {
		//if (grama & WM_sg) {
		//    grama &= ~(WM_nom | WM_pl);
		//}
		grama &= ~WM_nom;
	    }
	    if (!(grama & WM_CASU_MASK)) return 0;
	}
	if (smy == NMID_pindysmy) grama |= WM_pri | WM_str3;
	else if (smy == NMID_pindyscie) grama |= WM_sec | WM_str3;
	else if (smy == NMID_pindybym) grama |= WM_pri | WM_sgcplx | WM_pot | WM_illegal | WM_str3;
	else if (smy == NMID_pindybys) grama |= WM_sec | WM_sgcplx | WM_pot | WM_illegal | WM_str3;
	else if (smy == NMID_pindyby) grama |= WM_tri | WM_pot | WM_illegal | WM_str3;
	else if (smy == NMID_pindybysmy) grama |= WM_pri | WM_pot | WM_illegal;
	else if (smy == NMID_pindybyscie) grama |= WM_sec | WM_pot | WM_illegal;
	return grama | WM_cmplx;
    }
    if (smy==NMID_impspot) {
	if (cm != WT_verb) return 0;
	if (!(grama & WM_imps)) return 0;
	return grama | WM_pot | WM_depr | WM_illegal;
    }
    if (smy==NMID_advem) {
	// niebieskiem aniełem
	if (cm != WT_adj && cm != WT_ppas) return 0;
	if ((grama & (WM_sg | WM_inst)) == (WM_sg | WM_inst)) {
	    return (grama & ~((WM_CASU_MASK | WM_NUM_MASK) ^ (WM_inst | WM_sg)))  | WM_depr;
	}
	// niebieskiem aniełom
	if ((grama & (WM_pl | WM_dat)) == (WM_pl | WM_dat)) {
	    return (grama & ~((WM_CASU_MASK | WM_NUM_MASK) ^ (WM_dat | WM_pl)))  | WM_depr;
	}
	return 0;
    }
    if (smy=NMID_advemi) {
	// niebieskiemi aniołami
	if (cm != WT_adj) return 0;
	if ((grama & (WM_pl | WM_inst)) != (WM_pl | WM_inst)) return 0;
	return  (grama & ~((WM_NUM_MASK | WM_CASU_MASK) ^ (WM_inst | WM_pl))) | WM_depr;
    }
}




static int __morfologik_IdentifyNext(struct morfologik_data *md,int id,
                            char **pisownia,
                            char ** baza,
                            int *baseid,
                            u_int64_t *grama)
{
    id=md->bin_words[id].next_word;
    if (id < 0) return id;
    if (pisownia) *pisownia=md->strings+md->bin_words[id].writename;
    if (baza) *baza=md->strings+md->bin_words[id].basename;
    if (baseid) *baseid=md->bin_words[id].baseword;
    if (grama) *grama=md->bin_words[id].grama;
    return id;    
}

int morfologik_IdentifyNext(struct morfologik_data *md,int id,
                            char **pisownia,
                            char ** baza,
                            int *baseid,
                            u_int64_t *grama)
{
    u_int64_t loc_grama;
    char *loc_baza;
    char *loc_pisownia;
    int loc_baseid;
    int nid=NMID_GET(id);
//    printf("Next NID=%d/id\n",nid,id);
    if (!nid) {
	return __morfologik_IdentifyNext(md,id,
                            pisownia,
                            baza,
                            baseid,
                            grama);
	}
    id=NMID_ID(id);
    for (;;) {
	id=__morfologik_IdentifyNext(md,id,
                            &loc_pisownia,
                            &loc_baza,
                            &loc_baseid,
                            &loc_grama);
	if (id < 0) return -1;
	loc_grama=__filter_nmid(loc_grama,nid);
	if (loc_grama) break;
    }
    if (grama)*grama=loc_grama;
    if (baseid) *baseid=loc_baseid;
    if (pisownia) *pisownia=loc_pisownia;
    if (baza) *baza=loc_baza;
    return NMID_SET(id,nid);
	
}

static int __endswith(char *word,int len,char *fin)
{
    int n;
    if ((n=strlen(fin))>=len-1) return 0;
    return !strcmp(word+(len-n),fin);
}

    
	
    
static int __morfologik_IdentifyWordSimple(struct morfologik_data *md,char *word,
                            char **pisownia,char ** baza,int *baseid,u_int64_t *grama)
{
    int lo,hi,mid,n;
    char *c;
    lo=0;
    hi=md->header.uniword_count-1;
    while (lo <= hi) {
        mid=(hi+lo)/2;
        c=md->strings + md->names[mid];
        n=strcmp(word,c);
        if (n < 0) hi=mid-1;
        else if (n>0) lo=mid+1;
        else {
            if (pisownia) *pisownia=md->strings+md->bin_words[mid].writename;
            if (baza) *baza=md->strings+md->bin_words[mid].basename;
            if (baseid) *baseid=md->bin_words[mid].baseword;
            if (grama) *grama=md->bin_words[mid].grama;
            return mid;
        }
    }
    return -1;
}


static int startswith(char *word,char *with)
{
    int n;
    for (n=0;*with;n++) {
	if (*with++ != *word++) return 0;
    }
    return n;
}


static char *counters[]={
    "wielo",
    "multi",
    "półtora",
    "pół",
    "ćwierć",
    "tysiąco",
    "kilku",
    "kilkunasto",
    "kilkudziesięcio",
    NULL};

static int is_count(char *word)
{
    int n,i;
    for (i=0;counters[i];i++) {
	if ((n=startswith(word,counters[i]))) {
	    return n;
	}
    }
    return 0;
}

static char *setki[]={
    "stu","dwustu","trzystu","czterystu","pięćset","sześćset",
    "siedmiuset","ośmiuset","dziewięciuset",
    "siedenset","osiemset","dziewięćset",NULL};

static char *jednostki[]={
    "jedno","dwu","trój","trzy","cztero","czworo",
    "pięcio","sześcio","siedmio","ośmio","dziewięcio",NULL};
static char *nastki[]={
    "dziesięcio","jedenasto","dwunasto","trzynasto",
    "czternasto","piętnasto","szesnasto","siedemnasto",
    "osiemnasto","dziewiętnasto",NULL};
static char *dziestki[]={
    "dwudziesto","trzydziesto","czterdziesto",
    "pięćdziesięcio","sześćdziesięcio","siedemdziesięcio",
    "osiemdziesięcio","dziewięćdziesięcio",NULL};

static int __morfologik_IdentifyWord(struct morfologik_data *md,char *word,char **pisownia,char ** baza,int *baseid,u_int64_t *grama,int flags);
  
static int __morfologik_IdentifyCount(struct morfologik_data *md,char *word,char **pisownia,char ** baza,int *baseid,u_int64_t *grama,int flags)
{
    int n,i,rc;
    u_int64_t loc_grama;
    char *loc_baza;
    char *loc_pisownia;
    int loc_baseid;
    char buf[strlen(word)+32];
    if (!flags && (n=startswith(word,"tysiąc"))) {
	word+=n;
	return __morfologik_IdentifyCount(md,word,pisownia,baza,baseid,grama,1);
    }
    if (flags <= 1) { //setki
	for (i=0;setki[i];i++) if ((n=startswith(word,setki[i]))) {
	    word += n;
	    return __morfologik_IdentifyCount(md,word,pisownia,baza,baseid,grama,2);
	}
    }
    if (flags <= 2) { //dziesiątki
	for (i=0;dziestki[i];i++) if ((n=startswith(word,dziestki[i]))) {
	    word += n;
	    return __morfologik_IdentifyCount(md,word,pisownia,baza,baseid,grama,3);
	}
	for (i=0;nastki[i];i++) if ((n=startswith(word,nastki[i]))) {
	    word += n;
	    goto idword;
	    return __morfologik_IdentifyWord(md,word,pisownia,baza,baseid,grama,MFLAG_CNT);
	}
	for (i=0;jednostki[i];i++) if ((n=startswith(word,jednostki[i]))) {
	    word += n;
	    goto idword;
	    return __morfologik_IdentifyWord(md,word,pisownia,baza,baseid,grama,MFLAG_CNT);
	}
    }
    if (flags <= 3) {
	for (i=0;jednostki[i];i++) if ((n=startswith(word,jednostki[i]))) {
	    word += n;
	    goto idword;
	    return __morfologik_IdentifyWord(md,word,pisownia,baza,baseid,grama,MFLAG_CNT);
	}
    }
    if (!flags) return -1;
idword:
    strcpy(buf,"wielo");
    strcat(buf,word);
    //fprintf(stderr,"Checking buf [%s]\n",buf);
    for (
	rc= __morfologik_IdentifyWord(md,buf,&loc_pisownia,&loc_baza,&loc_baseid,&loc_grama,MFLAG_CNT);
	rc >= 0;
	rc= __morfologik_IdentifyNext(md,rc,&loc_pisownia,&loc_baza,&loc_baseid,&loc_grama)) {
	    //fprintf(stderr,"ERD=%d [%s]\n",rc,(rc>=0)?loc_baza:"chuj");
	    loc_grama=__filter_nmid(loc_grama,NMID_cnt);
	    if (loc_grama) break;
    }
    if (rc < 0) for (
	rc= __morfologik_IdentifyWord(md,word,&loc_pisownia,&loc_baza,&loc_baseid,&loc_grama,MFLAG_CNT);
	rc >= 0;
	rc= __morfologik_IdentifyNext(md,rc,&loc_pisownia,&loc_baza,&loc_baseid,&loc_grama)) {
	    //fprintf(stderr,"ERD=%d [%s]\n",rc,(rc>=0)?loc_baza:"chuj");
	    loc_grama=__filter_nmid(loc_grama,NMID_cnt);
	    if (loc_grama) break;
    }
    if (rc < 0) return -1;
    if (grama)*grama=loc_grama;
    if (baseid) *baseid=loc_baseid;
    if (pisownia) *pisownia=loc_pisownia;
    if (baza) *baza=loc_baza;
    return NMID_SET(rc, NMID_cnt);
    
}

	    
	    
static int __morfologik_IdentifyWord(struct morfologik_data *md,char *word,char **pisownia,char ** baza,int *baseid,u_int64_t *grama,int flags)
{
    u_int64_t loc_grama;
    char *loc_baza;
    char *loc_pisownia;
    int loc_baseid;
    int len=strlen(word);
    int rc;
    int pgsg;
    char buf[2*len];

    //fprintf(stderr,"%s/%d\n",word,flags);
    rc=__morfologik_IdentifyWordSimple(md,word,pisownia,baza,baseid,grama);
    if (rc >= 0) {
	return rc;
    }
    
    if (!(flags & MFLAG_NEGATED) && !strncmp(word,"nie",3)) {
	flags |= MFLAG_NEGATED;
	word += 3;
	pgsg=NMID_negate;
	goto prefixer;
    }
    
    if (flags & MFLAG_STANDARD) return -1;
    if (!(flags & MFLAG_SUPER) && !strncmp(word,"arcy",4)) {
	flags |= MFLAG_SUPER;
	word += 4;
	pgsg=NMID_super;
	goto prefixer;
    }
    if (!(flags & MFLAG_SUPER) && (!strncmp(word,"super",5) || !strncmp(word,"ultra",5))) {
	flags |= MFLAG_SUPER;
	word += 5;
	pgsg=NMID_super;
	goto prefixer;
    }
    if (!(flags & (MFLAG_CNT))) {
	int n=is_count(word);
	if (n) {
	    word += n;
	    pgsg=NMID_cnt;
	    flags |= MFLAG_CNT;
	    goto prefixer;
	}
	rc=__morfologik_IdentifyCount(md,word,pisownia,baza,baseid,grama,0);
	if (rc >= 0) return rc;
    }

    if (__endswith(word,len,"noby") || __endswith(word,len,"toby")) {
	strcpy(buf,word);
	buf[len-2]=0;
	pgsg=NMID_impspot;
	goto get_emi;
    }
    
    if (flags & MFLAG_NOANCIENT) return -1;
    
    if (__endswith(word,len,"śwa")) {
	strcpy(buf,word);
	strcpy(buf+len-2,"my");
	pgsg=NMID_ista;
	goto get_emi;
    }

    if (__endswith(word,len,"ta")) {
	strcpy(buf,word);
	strcpy(buf+len-2,"cie");
	pgsg=NMID_ista;
	goto get_emi;
    }


    if (__endswith(word,len,"lim") || __endswith(word,len,"łym")) {
	strcpy(buf,word);
	strcpy(buf+len-1,"śmy");
	pgsg=NMID_alim;
	goto get_emi;
    }
    if (__endswith(word,len,"emi")) {
	strcpy(buf,word);
	if (buf[len-4] == 'i') {
	    strcpy(buf+len-3,"mi");
	}
	else {
	    strcpy(buf+len-3,"ymi");
	}
	pgsg=NMID_advemi;
	goto get_emi;
    }
    if (__endswith(word,len,"em")) {
	strcpy(buf,word);
	if (buf[len-3] == 'i') {
	    strcpy(buf+len-2,"mi");
	}
	else {
	    strcpy(buf+len-2,"ym");
	}
	pgsg=NMID_advem;
	goto get_emi;
    }
    if (__endswith(word,len,"śmy")) {
	strcpy(buf,word);
	buf[len-4]=0;
	pgsg=NMID_pindysmy;
	goto get_emi;
    }
    if (__endswith(word,len,"ście")) {
	strcpy(buf,word);
	buf[len-5]=0;
	pgsg=NMID_pindyscie;
	goto get_emi;
    }
    if (__endswith(word,len,"bym")) {
	strcpy(buf,word);
	buf[len-3]=0;
	pgsg=NMID_pindybym;
	goto get_emi;
    }
    if (__endswith(word,len,"byś")) {
	strcpy(buf,word);
	buf[len-4]=0;
	pgsg=NMID_pindybys;
	goto get_emi;
    }
    if (__endswith(word,len,"by")) {
	strcpy(buf,word);
	buf[len-2]=0;
	pgsg=NMID_pindyby;
	goto get_emi;
    }
    return -1;

prefixer:
	
	for (
	    rc=__morfologik_IdentifyWord(md,word,&loc_pisownia,&loc_baza,&loc_baseid,&loc_grama,flags);
	    rc >=0;
	    rc=__morfologik_IdentifyNext(md,rc,
                            &loc_pisownia,
                            &loc_baza,
                            &loc_baseid,
                            &loc_grama)) {
		loc_grama=__filter_nmid(loc_grama,pgsg);
		if (loc_grama) break;
	}
	if (rc < 0) return -1;
	if (grama)*grama=loc_grama;
	if (baseid) *baseid=loc_baseid;
	if (pisownia) *pisownia=loc_pisownia;
	if (baza) *baza=loc_baza;
	return NMID_SET(rc, pgsg);
    

get_emi:
	for (
	    rc=__morfologik_IdentifyWordSimple(md,buf,&loc_pisownia,&loc_baza,&loc_baseid,&loc_grama);
	    rc >= 0;
	    rc=__morfologik_IdentifyNext(md,rc,
                            &loc_pisownia,
                            &loc_baza,
                            &loc_baseid,
                            &loc_grama)) {
	    
	    loc_grama=__filter_nmid(loc_grama,pgsg);
	    if (loc_grama) break;
	}
	if (rc < 0) return -1;
	if (grama)*grama=loc_grama;
	if (baseid) *baseid=loc_baseid;
	if (pisownia) *pisownia=loc_pisownia;
	if (baza) *baza=loc_baza;
	return NMID_SET(rc, pgsg);
    return -1;
}
    
int morfologik_IdentifyWord(struct morfologik_data *md,char *word,char **pisownia,char ** baza,int *baseid,u_int64_t *grama)
{
    return __morfologik_IdentifyWord(md,word,pisownia,baza,baseid,grama,0);
}

int morfologik_IdentifyWordStd(struct morfologik_data *md,char *word,char **pisownia,char ** baza,int *baseid,u_int64_t *grama)
{
    return __morfologik_IdentifyWord(md,word,pisownia,baza,baseid,grama,MFLAG_STANDARD);
}

int morfologik_IdentifyWordWithFlags(struct morfologik_data *md,char *word,char **pisownia,char ** baza,int *baseid,u_int64_t *grama,int flags)
{
    return __morfologik_IdentifyWord(md,word,pisownia,baza,baseid,grama,flags);
}

static int __good_filter(u_int64_t filter,u_int64_t grama)
{
    int wt=WT_GET(filter);
    if (wt && wt != WT_GET(grama)) return 0;
    filter &= (1LL << WM_SEQ_END) -1;
    if (!filter) return 1;
    if (filter & WM_CASU_MASK) {
        if (!(filter & WM_CASU_MASK & grama)) return 0;
        filter |= WM_CASU_MASK;
        grama |= WM_CASU_MASK;
    }
    if (filter & WM_GENR_MASK) {
        if (!(filter & WM_GENR_MASK & grama)) return 0;
        filter |= WM_GENR_MASK;
        grama |= WM_GENR_MASK;
    }
    if ((grama & filter) != filter) return 0;
    return 1;
}

int morfologik_FilterGrama(u_int64_t grama,u_int64_t filter)
{
    return __good_filter(filter,grama);
}

static int __genWord(struct morfologik_data *md,
                       int vid,
                       char **pisownia,
                       u_int64_t *grama,
                       u_int64_t filter)
{
    for (;;) {
        int word=md->vector[vid];
        if (word < 0) return -1;
        vid ++;
        if (filter && !__good_filter(filter,md->bin_words[word].grama)) continue;
        if (pisownia) *pisownia=md->strings+md->bin_words[word].writename;
        if (grama) *grama=md->bin_words[word].grama;
        return vid;
    }
}

int morfologik_GenNext(struct morfologik_data *md,
                       int vid,
                       char **pisownia,
                       u_int64_t *grama,
                       u_int64_t filter)
{
    return __genWord(md,vid,pisownia,grama,filter);
}

int morfologik_GenWordById(struct morfologik_data *md,
                       int baseid,
                       char **pisownia,
                       u_int64_t *grama,
                       u_int64_t filter
                       )
{
    return __genWord(md,md->base_words[baseid].vector,pisownia,grama,filter);
}

int morfologik_GenWord(struct morfologik_data *md,
                       char *baseword,
                       char **pisownia,
                       u_int64_t *grama,
                       u_int64_t filter
                       )
{
    int lo,hi,mid,n;
    char *c;
    lo=0;
    hi=md->header.base_count-1;
    while (lo <= hi) {
        mid=(lo+hi)/2;
        c=md->strings+md->base_words[mid].name;
        n=strcmp(baseword,c);
        if (n < 0) hi=mid-1;
        else if (n>0) lo=mid+1;
        else return __genWord(md,md->base_words[mid].vector,pisownia,grama,filter);
    }
    return -1;
}

static char *wt_markers[]={
    "adj",    "adjp",    "adv",    "conj",
    "num",    "pact",    "pant",    "pcon",
    "ppas",    "ppron12",    "ppron3",    "pred",
    "adjc",    "siebie",    "subst",    "verb",
    "brev",    "interj",    "xxx",    "nie",
    "advp",	"prep",	"comp",
    NULL};

static char *wm_casa[]={"nom","gen","dat","acc","inst","loc","voc"};
static char *wm_grad[]={"pos","comp","sup"};
static char *wm_pers[]={"pri","sec","ter"};
static char *wm_genr[]={"m1","m2","m3","n1","n2","p1","p2","p3","m","n","f","p"};
static u_int64_t xm_genr[]={WM_m1,WM_m2,WM_m3,WM_n1,WM_n2,WM_p1,WM_p2,WM_p3,
    WM_m1 | WM_m2 | WM_m3,
    WM_n1 | WM_n2,
    WM_f,
    WM_p1 | WM_p2 | WM_p3};

static char *wm_seq[][2]={
    {"aff",NULL},
    {"neg",NULL},
    {"perf",NULL},
    {"imperf",NULL},
    {"nakc","str2"},
    {"akc","str3"},
    {"praep",NULL},
    {"npraep",NULL},
    {"imps",NULL},
    {"impt",NULL},
    {"inf",NULL},
    {"fin",NULL},
    {"praet",NULL},
    {"pot",NULL},
    {"nstd",NULL},
    {"pun","super"},
    {"npun","cnt"},
    {"rec",NULL},
    {"congr",NULL},
    {"winien",NULL},
    {"bedzie",NULL},
    {"refl",NULL},
    {"nonrefl",NULL},
    {"depr",NULL},
    {"vulgar",NULL},
    {"ill",NULL},
    {"ger",NULL},
    {"wok","str4"},
    {"nwok","sgcplx"},
    {"cplx",NULL}};
    
    
    
u_int64_t morfologik_ParseGrama(char *str)
{
    char buf[256];
    char *cs,*ce;
    u_int64_t grama=0;
    int i,found;
    if (strlen(str)>=256) {
        errno=E2BIG;
        return 0;
    }
    strcpy(buf,str);
    cs=buf;
    ce=strpbrk(cs,".:");
    if (ce) *ce++=0;
    if (*cs) {
        for (i=0;wt_markers[i];i++) {
            if (!strcmp(wt_markers[i],cs)) {
                grama=WT_SET(grama,i+1);
                break;
            }
        }
        if (!wt_markers[i]) {
            errno=EINVAL;
            return 0;
        }
        if (!ce) return grama;
    }
    else if (!ce) {
        errno=EINVAL;
        return 0;
    }
    for(cs=ce;cs && *cs;cs=ce) {
        ce=strpbrk(cs,".:");
        if (ce) *ce++=0;
        found=0;
        if (!strcmp(cs,"sg")) {
            grama |= WM_sg;
            continue;
        }
        if (!strcmp(cs,"pl")) {
            grama |= WM_pl;
            continue;
        }
        if (!strcmp(cs,"pred")) {
            grama |= WM_pred;
            continue;
        }
        for (i=0;i<7;i++) if (!strcmp(cs,wm_casa[i])) {
            grama |= WM_nom << i;
            found=1;
            break;
        }
        if (found) continue;
        for (i=0;i<3;i++) if (!strcmp(cs,wm_grad[i])) {
            grama |= WM_pos << i;
            found=1;
            break;
        }
        if (found) continue;
        for (i=0;i<3;i++) if (!strcmp(cs,wm_pers[i])) {
            grama |= WM_pri << i;
            found=1;
            break;
        }
        for (i=0;i<12;i++) if (!strcmp(cs,wm_genr[i])) {
            grama |= xm_genr[i];
            found=1;
            break;
        }
        if (found) continue;
        for (i=WM_SEQ_FIRST;i<WM_SEQ_END;i++) if (!strcmp(cs,wm_seq[i-WM_SEQ_FIRST][0])) {
            grama |= 1LL << i;
            found=1;
            break;
        }
        if (found) continue;
        errno=EINVAL;
        return 0;
    }
    if (!grama) errno=EINVAL;
    return grama;
}

int morfologik_DecodeGrama(u_int64_t grama,char *buf)
{
    int g=WT_GET(grama);
    int p,i,ext=0;
    buf[0]=0;
    if (g < 1 || g > WT_last) return -1;
    if (g == WT_subst || g == WT_verb || (grama & WM_cmplx)) ext=1;
    strcpy(buf,wt_markers[g-1]);
//    printf("BUF=[%s]\n",buf);
    if (grama & WM_NUM_MASK) {
        strcat(buf,":");
        p=0;
        if (grama & WM_sg) {
            strcat(buf,"sg");
            p++;
        }
        if (grama & WM_pl) {
            if (p) strcat(buf,".");
            strcat(buf,"pl");
        }
//	printf("BUF=[%s]\n",buf);
    }
    if (grama & WM_pred) {
        strcat(buf,":pred");
//	printf("BUF=[%s]\n",buf);
    }
    if (grama & WM_CASU_MASK) {
        for (i=p=0;i<7;i++) {
            if (! (grama & (WM_nom << i))) continue;
            if (p) strcat(buf,".");else strcat(buf,":");
            p++;
            strcat(buf,wm_casa[i]);
//	    printf("BUF=[%s]\n",buf);
        }
    }
    if (grama & WM_GRAD_MASK) {
        for (i=p=0;i<3;i++) {
            if (! (grama & (WM_pos << i))) continue;
            if (p) strcat(buf,".");else strcat(buf,":");
            p++;
            strcat(buf,wm_grad[i]);
//	    printf("BUF=[%s]\n",buf);
        }
    }
    if (grama & WM_PERS_MASK) {
        for (i=p=0;i<3;i++) {
            if (! (grama & (WM_pri << i))) continue;
            if (p) strcat(buf,".");else strcat(buf,":");
            p++;
            strcat(buf,wm_pers[i]);
//	    printf("BUF=[%s]\n",buf);
        }
    }
    if (grama & WM_GENR_MASK) {
        p=0;
        if (grama & (WM_m1 | WM_m2 | WM_m3)) {
            if ((grama & (WM_m1 | WM_m2 | WM_m3)) == (WM_m1 | WM_m2 | WM_m3)) {
                if (p) strcat(buf,".");else strcat(buf,":");p++;
                strcat(buf,"m");
            }
            else {
                for (i=0;i<3;i++) if (grama & (WM_m1 << i)) {
                    if (p) strcat(buf,".");else strcat(buf,":");p++;
                    strcat(buf,wm_genr[i]);
                }
            }
        }
        if (grama & WM_f) {
            if (p) strcat(buf,".");else strcat(buf,":");p++;
            strcat(buf,"f");
        }
        if (grama & (WM_n1 | WM_n2)) {
            
            if ((grama & (WM_n1 | WM_n2)) == (WM_n1 | WM_n2)) {
                if (p) strcat(buf,".");else strcat(buf,":");p++;
                strcat(buf,"n");
            }
            else {
                for (i=0;i<2;i++) if (grama & (WM_n1 << i)) {
                    if (p) strcat(buf,".");else strcat(buf,":");p++;
                    strcat(buf,wm_genr[i+3]);
                }
            }
        }
        if (grama & (WM_p1 | WM_p2 | WM_p3)) {
            if ((grama & (WM_p1 | WM_p2 | WM_p3)) == (WM_p1 | WM_p2 | WM_p3)) {
                if (p) strcat(buf,".");else strcat(buf,":");p++;
                strcat(buf,"p");
            }
            else {
                for (i=0;i<3;i++) if (grama & (WM_p1 << i)) {
                    if (p) strcat(buf,".");else strcat(buf,":");p++;
                    strcat(buf,wm_genr[i+5]);
                }
            }
        }
//        printf("BUF=[%s]\n",buf);
    }
    for (i=WM_SEQ_FIRST;i<WM_SEQ_END;i++) {
        u_int64_t mask = 1LL << i;
        if (!(grama & mask)) continue;
        if (mask == WM_imperf && (grama & WM_perf)) strcat(buf,".");
        else strcat(buf,":");
	char *c=wm_seq[i-WM_SEQ_FIRST][ext];
	if (!c) c=wm_seq[i-WM_SEQ_FIRST][0];
//	printf("I=%d\n",i);
        strcat(buf,c);
//	printf("BUF=[%s]\n",buf);
    }
}


#if 0
main(int argc,char *argv[])
{
    char *pi,*ba;u_int64_t grama,filter;char buf[256];
    int id,bid,vid;
    struct morfologik_data *md;
    
    
    //md=morfologik_Init("morfologik.bin");
    md=morfologik_InitSHM();
    if (!md) md=morfologik_Init("morfologik.bin");
    if (!md) {
        perror("Chujy");
        exit(1);
    }
    
    if (argc >2) {
        filter=morfologik_ParseGrama(argv[2]);
        if (!filter) {
            perror("Filter");
            exit(1);
        }
        
        morfologik_DecodeGrama(filter,buf);
        printf("%s\n",buf);
    }
    else filter=0;
    
    printf("OK, starting\n");
    id=morfologik_IdentifyWord(md,argv[1],&pi,&ba,&bid,&grama);
    while (id >= 0) {
        morfologik_DecodeGrama(grama,buf);
        printf("%s/%s %s\n",pi,ba,buf);
        printf("Odmianka\n");
        vid=morfologik_GenWordById(md,bid,&pi,&grama,filter);
        while (vid >= 0) {
            morfologik_DecodeGrama(grama,buf);
            printf("    %s %s\n",pi,buf);
            vid=morfologik_GenNext(md,vid,&pi,&grama,filter);
        }
        printf("\n");
        id=morfologik_IdentifyNext(md,id,&pi,&ba,&bid,&grama);
    }
    
}
#endif
