#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX 26
typedef struct TrieNode {
char ch;
int times;
struct TrieNode * next[MAX];
}TrieNode, *Node;
typedef struct Topt {
int times;
char c[MAX];
}Topt, *Topten;
void Init(Node Root, FILE *fp) { //根据Vocabulary文件夹下的vocabulary.txt文件建立字典序
char a[100], b;
int i, j, k, t;
Node p;
Root->ch = '\0';Root->times = 0;
for (i=0;i<MAX;i++) {Root->next[i] = '\0';}
while (1) { //逐个导入单词并插入到字典序中
for (i=0;;i++) { //从文件中取出一个单词存入到数组中
b = fgetc(fp);
if (b==10||b==EOF) break;
else a[i] = b;
}
p = Root;j=0;
while(j<i) { //将单词插入到字典序中
for (k=0;;k++) {
if (p->next[k] == '\0') { //节点指向空字符,说明下面还未有字母经过
p->next[k]=(Node)malloc(sizeof(TrieNode)); //生成一个节点并初始化
p->next[k]->ch=a[j];
if (j==i-1) p->next[k]->times=1;
else p->next[k]->times=0;
for (t=0;t<MAX;t++) p->next[k]->next[t]='\0';
p = p->next[k];
break;
}
else if (p->next[k]->ch == a[j]) { //之前有单词经过该节点且该字母与所检验的字母相同
if (j==i-1) p->next[k]->times++;
p = p->next[k];
break;
}
else continue; //之前有单词经过该节点但该字母与所检验的字母不同
}
j++;
}
if (b==EOF) break; //到达文件结尾,单词全部录入
}
}
void SearchWordInVocabulary(Node Root, FILE *fp, FILE *fq) { //统计给定单词在字典中出现的次数
char a[100] = {'\0'}, b;
int i, j ,k, t=1; //用t来标记给定单词的ID号,若t为负值,则说明该单词不在字典中
Node p;
while(1) {
for (i=0;;i++) { //读取一个给定的单词
b=fgetc(fp);
if(b==10 || b==EOF) break;
else a[i]=b;
}
p = Root; j=0;
while (j<i) { //在字典中查找该单词
for (k=0;k<MAX;k++) {
if (p->next[k]=='\0') {t=-t;break;} //判定该单词不在字典中,标记t未负值
else if (p->next[k]->ch==a[j]) {p=p->next[k];break;} //节点字母符合,继续下一个字母
else continue; //节点字母不符合,继续查找下一个兄弟节点
}
if (t<0) break;
j++;
}
if (b==10 && i==0) continue; //连续多个回车出现的情况
if (b==EOF && i==0) break; //最后一个单词结束后没有回车的情况
if (t<0) { //t为负值,输出单词的ID,并输出“NO”
fprintf(fq,"CASE %d:\nNO\n",-t);
t=(-t)+1;
}
else {
fprintf(fq,"CASE %d:\n%d\n",t,p->times); //输出单词的ID,并输出出现的次数
t++;
}
if (b==EOF) break; //到达文件末尾
}
}
int Traverse (Node p, char *a, int i, FILE *fq) { //按照字母表顺序深度遍历字典序
int k, t;
Node q=p;
if (p->times!=0) { //所在单词出现频率不为零则输出该单词
for (k=0;k<i;k++) fprintf(fq,"%c",a[k]);
fprintf(fq,"\n");
}
if (p->next[0]=='\0') return 0; //已到达字典序叶子节点,返回
else { //未到达字典序叶子节点,继续向下检索
for (t=97;t<123;t++) //根据字母表顺序进行检索
for (k=0;p->next[k]!='\0';k++)
if (p->next[k]->ch==t) {
q=p;
a[i++]=p->next[k]->ch;
p=p->next[k];
Traverse (p,a,i,fq); //递归
p=q;
}
}
return 0;
}
void TotPrefixWord (Node Root, FILE *fp, FILE *fq) { //对给定的前缀进行检索
int id=1, i, j, k, tag;
Node p, q;
char a[MAX] = {'\0'}, b;
while (1) {
for (i=0;i<MAX;i++) { //一次读入文件中给定的前缀
b = fgetc(fp);
if (b==10 || b==EOF) break;
else a[i] = b;
}
if (i!=0) fprintf(fq, "CASE %d:\n",id++); //输出单词的ID号
j=0; p=Root; tag=1;
while(j<i) {
for (k=0;k<MAX;k++) {
if(p->next[k]=='\0') {tag=0;break;}
else if (p->next[k]->ch==a[j]) {q=p;p=p->next[k];break;}
else continue;
}
if (tag==0) break; //没有该前缀的单词
j++;
}
if (tag!=0 && i!=0) Traverse(p,a,i,fq); //在字典中找出所有该前缀的单词
if (b == EOF) break;
}
}
int Traverse_Topten (Node p, char *a, int i, struct Topt * top) {
int k, t;
Node q=p;
if (p->times!=0) { //所在单词出现频率不为零则输出该单词
if (p->times<top[8]->times && p->times>=top[9]->times) {
top[9]->times=p->times;
top[9]->c[]={"\0"};
strncpy(top[9]->c,a,i-1);
}
else
for (k=0;k<9;k++)
if (p->times>=top[k].times) {
for (t=8;t>=k;t--) {
top[t+1]->times=top[t]->times;
top[t+1]->c[]={"\0"};
strcpy(top[t+1]->c,top[t]->c);
}
top[k]->times=p->times;
top[k]->c[]={"\0"};
strncpy(top[k]->c,a,i-1);
}
//for (k=0;k<i;k++) fprintf(fq,"%c",a[k]);
//fprintf(fq,"\n");
}
if (p->next[0]=='\0') return 0; //已到达字典序叶子节点,返回
else //未到达字典序叶子节点,继续向下检索
for (k=0;p->next[k]!='\0';k++) {
q=p;
a[i++]=p->next[k]->ch;
p=p->next[k];
Traverse_Topten (p,a,i,top); //递归
p=q;
}
return 0;
}
void PrefixFrequence (Node Root, FILE *fp, FILE *fq) {
int id=1, i, j, k, t, tag;
Node p, q;
char a[MAX] = {'\0'}, b;
struct Topt * top[10];
for (k=0;k<10;k++) { //初始化数组
top[k]=(Topten)malloc(sizeof(Topt));
top[k]->times=0;
top[k]->char[]={"\0"};
}
while (1) {
for (i=0;i<MAX;i++) { //一次读入文件中给定的前缀
b = fgetc(fp);
if (b==10 || b==EOF) break;
else a[i] = b;
}
if (i!=0) fprintf(fq, "CASE %d:\n",id++); //输出单词的ID号
j=0; p=Root; tag=1;
while(j<i) {
for (k=0;k<MAX;k++) {
if(p->next[k]=='\0') {tag=0;break;}
else if (p->next[k]->ch==a[j]) {q=p;p=p->next[k];break;}
else continue;
}
if (tag==0) break; //没有该前缀的单词
j++;
}
if (tag!=0 && i!=0) { //在字典中找出优先级最高的十个符合前缀的单词
Traverse_Topten(p,a,i,top);
k=0;
while(top[k]->times!=0) {
for (t=0;top[k]->c[t]!='\0';t++) fprintf(fq,"%c",top[k]->c[t]);
fprintf(fq,"\n");
k++;
if (k>9) break;
}
}
if (b == EOF) break;
}
}
int main(void) {
FILE *fp, *fq;
Node Root;
Root = (Node)malloc(sizeof(TrieNode));
fp = fopen("./Vocabulary/vocabulary.txt","r");
printf("根据Vocabulary目录下的vocabulary.txt文件建立字典序...\n");
Init (Root, fp); //建立字典序
fclose(fp);
/* fp = fopen("./SearchWordInVocabulary/SearchWordInVocabulary.txt","r");
fq = fopen("./SearchWordInVocabulary/SearchWordInVocabulary_Result.txt","a+");
printf("根据SearchWordInVocabulary目录下SearchWordInVocabulary.txt文件中的单词进行检索(结果参见同目录下的SearchWordInVocabular_Result.txt文件)...\n");
SearchWordInVocabulary(Root, fp, fq); //统计给定单词出现的次数
fclose(fp);fclose(fq);
fp = fopen("./SearchWordInVocabulary/SmallSearchWordInVocabulary.txt","r");
fq = fopen("./SearchWordInVocabulary/SmallSearchWordInVocabulary_Result.txt","a+");
printf("根据SearchWordInVocabulary目录下SmallSearchWordInVocabulary.txt文件中的单词进行检索(结果参见同目录下的SmallSearchWordInVocabular_Result.txt文件)...\n");
SearchWordInVocabulary(Root, fp, fq); //统计给定单词出现的次数
fclose(fp);fclose(fq);
fp = fopen("./TotPrefixWord/TotPrefixWord.txt","r");
fq = fopen("./TotPrefixWord/TotPrefixWord_Result.txt","a+");
printf("根据TotPrefixWord目录下TotPrefixWord.txt文件中的前缀数据进行检索(结果参见同目录下的TotPrefixWord_Result.txt文件)...\n");
TotPrefixWord (Root, fp, fq); //对给定的前缀进行检索
fclose(fp);fclose(fq);
fp = fopen("./TotPrefixWord/SmallTotPrefixWord.txt","r");
fq = fopen("./TotPrefixWord/SmallTotPrefixWord_Result.txt","a+");
printf("根据TotPrefixWord目录下TotPrefixWord.txt文件中的前缀数据进行检索(结果参见同目录下的TotPrefixWord_Result.txt文件)...\n");
TotPrefixWord (Root, fp, fq); //对给定的前缀进行检索
fclose(fp);fclose(fq); */
fp = fopen("./PrefixFrequence/PrefixFrequence.txt","r");
fq = fopen("./PrefixFrequence/PrefixFrequence_Result.txt","a+");
printf("根据PrefixFrequence目录下PrefixFrequence.txt文件中的单词进行检索(结果参见同目录下的PrefixFrequence_Result.txt文件)...\n");
PrefixFrequence(Root, fp, fq); //检索优先级最高的十个符合前缀的单词
fclose(fp);fclose(fq);
fp = fopen("./PrefixFrequence/SmallPrefixFrequence.txt","r");
fq = fopen("./PrefixFrequence/SmallPrefixFrequence_Result.txt","a+");
printf("根据PrefixFrequence目录下SmallPrefixFrequence.txt文件中的单词进行检索(结果参见同目录下的SmallPrefixFrequence_Result.txt文件)...\n");
PrefixFrequence(Root, fp, fq); //检索优先级最高的十个符合前缀的单词
fclose(fp);fclose(fq);
return 0;
}