codepad
[
create a new paste
]
login
|
about
Language:
C
C++
D
Haskell
Lua
OCaml
PHP
Perl
Plain Text
Python
Ruby
Scheme
Tcl
#include <sys/types.h> #include <sys/stat.h> #include <stdlib.h> #include <string.h> #include <limits.h> #define MAXLINE 1024 typedef struct FieldNode { int field; struct FieldNode* right; }field_node; typedef struct AttributeNode { int key; struct FieldNode* right; struct AttributeNode* down; }attr; /* buffer for first file */ static char buffer1[MAXLINE]; static int occupied1 = 0; /* buffer for second file */ static char buffer2[MAXLINE]; static int occupied2 = 0; static int read_line(char *s, int lim, FILE *fp); static field_node* create_field_node(int field); static attr* create_attribute_node(int key); static attr* create_head_node(); static int read_key(char *s); static attr* read_group(attr *head, FILE *fp); static void free_group_memory(attr **headp); static void join_lines(char *s, attr *group, FILE *fp); static FILE* open_file(char *filename); static int getline(char *s, int lim, FILE *fp); static char* getword(char *s, const char *sep); /* join to files according to matching keys assuming that both files are sorted */ int natural_join(char *inputfile1, char *inputfile2, char *output) { FILE *fp1, *fp2, *fp_out; attr *head; char line[MAXLINE]; /* open files */ if((fp1 = open_file(inputfile1)) == NULL) return 0; if((fp2 = open_file(inputfile2)) == NULL) return 0; if((fp_out = fopen(output, "w")) == NULL) return 0; head = create_head_node(); /* join files*/ for(;;) { int key; char *bufp; /* read a group of lines with equal keys from second file */ head->down = read_group(head, fp2); if(head->down == NULL) /* EOF for second file */ break; /* read line from first file until an equal key or greater key than the group is found */ for(;;) { /* read a line from first file */ if(occupied1) { bufp = &buffer1[0]; occupied1 = 0; } else { if(read_line(line, sizeof(line), fp1) == EOF) /* end of first file, join completed */ break; bufp = &line[0]; } /* read key */ key = read_key(bufp); if(key > head->down->key) { /* free memory for the group of records with equal keys and save the line from first file file in static buffer */ free_group_memory(&head); strcpy(buffer1, bufp); occupied1 = 1; break; } if(key == head->down->key) join_lines(bufp, head->down, fp_out); } } /* free memory */ free_group_memory(&head); /* free head node */ free(head); /* close open files */ fclose(fp1); fclose(fp2); fclose(fp_out); return 1; } static void join_lines(char *s, attr *group, FILE *fp) { char copy[MAXLINE]; char *sep = "\t "; char *p; while(group != NULL) { field_node *f; strcpy(copy, s); for(p = getword(copy, sep); p; p = getword(NULL, sep)) fprintf(fp, "%s ", p); for(f = group->right; f != NULL; f = f->right) fprintf(fp, "%d ", f->field); fprintf(fp, "\n"); group = group->down; } } static attr* read_group(attr *head, FILE *fp) { char line[MAXLINE]; attr *group; line[0] = '\0'; group = head; for(;;) { char *bufp, *sep, *p; int new_key; field_node *fieldp; sep = "\t "; /* read line */ if(occupied2) /* buffer is occupied, read line from buffer */ { bufp = &buffer2[0]; occupied2 = 0; } else /* read line from file */ { if(read_line(line, sizeof(line), fp) == EOF) break; bufp = &line[0]; } /* read key */ new_key = read_key(bufp); /* compare new key with previous key */ if(new_key > group->key) /* this condition will always be false for the first line as initially key is INT_MAX */ { strcpy(buffer2, bufp); occupied2 = 1; break; } /* read key into memory */ group->down = create_attribute_node(atoi(getword(bufp, sep))); if(group->down == NULL) return NULL; /* read fields into memory */ if((p = getword(NULL, sep)) != NULL) { fieldp = create_field_node(atoi(p)); if(fieldp == NULL) return NULL; group->down->right = fieldp; } while((p = getword(NULL, sep)) != NULL) { fieldp->right = create_field_node(atoi(p)); if(fieldp->right == NULL) return NULL; fieldp = fieldp->right; } /* move down one level */ group = group->down; } return head->down; } static void free_group_memory(attr **headp) { attr *d; if((*headp)->down != NULL) { field_node *r; for(d = (*headp)->down; d != NULL;) { attr *dd; for(r = d->right; r != NULL;) { field_node *rr; rr = r->right; free(r); r = rr; } dd = d->down; free(d); d = dd; } } (*headp)->down = NULL; } static int read_key(char *s) { int i, j; char key[MAXLINE]; /* by pass space and tab */ for(i = 0; s[i] == '\t' || s[i] == ' '; i++); /* copy key until space or tab is encountered */ for(j = 0; s[i] != '\t' && s[i] != ' '; i++, j++) key[j] = s[i]; key[j] = '\0'; return atoi(key); } static int read_line(char *s, int lim, FILE *fp) { int len; while(1) { len = getline(s, lim, fp); if(len == 0) return EOF; /* skip empty lines */ if(s[0] != '\n') break; } /* remove newline from line */ s[len-1] = '\0'; return (len-1); } static attr* create_attribute_node(int key) { attr *p; if((p = (attr*)malloc(sizeof(*p))) == NULL) return NULL; p->key = key; p->down = NULL; p->right = NULL; return p; } static field_node* create_field_node(int field) { field_node *p; if((p = (field_node*)malloc(sizeof(*p))) == NULL) return NULL; p->field = field; p->right = NULL; return p; } static attr* create_head_node() { attr *p; if((p = (attr*)malloc(sizeof(*p))) == NULL) return NULL; p->key = INT_MAX; p->down = NULL; p->right = NULL; return p; } FILE* open_file(char *filename) { struct _stat buffer; int status; FILE *fp; /* check file status */ status = _stat(filename, &buffer); /* file doesn't exist */ if(status != 0) return NULL; /* path specifies an ordinary file and the file has read permission */ if((buffer.st_mode & _S_IFREG) && (buffer.st_mode & _S_IREAD)) { /* file is empty */ if(buffer.st_size == 0) return NULL; /* open file in read mode */ fp = fopen(filename, "r"); if(fp == NULL) return NULL; return fp; } return NULL; } int getline(char *s, int lim, FILE *fp) { int i; char c; for(i = 0; i < (lim-1) && (c = getc(fp)) != EOF && c != '\n'; i++) s[i] = c; if(c == '\n') s[i++] = c; s[i] = '\0'; return i; } char* getword(char *s, const char *sep) { char *p, *tokenptr; /* static pointer pointing to the current position in s */ static char *currentp = NULL; /* if s is not NULL, point currentp to starting of NULL */ if(s != NULL) currentp = s; /* when the function is called for the first time and s is NULL, return NULL */ if(s == NULL && currentp == NULL) return NULL; /* bypass the initial seperator characters */ for(p = currentp; *p != '\0'; p++) { const char *sep_p; /* check if the character is seperator */ for(sep_p = sep; *sep_p != '\0'; sep_p++) { if(*sep_p == *p) break; } if(*sep_p == '\0') break; /* if character other then separator is found, break out of the loop */ } /* end of string is reached */ if(*p == '\0') { currentp = NULL; return NULL; } /* find the next seperator character */ for(tokenptr = p; *p != '\0'; p++) { const char *sep_p; for(sep_p = sep; *sep_p != '\0'; sep_p++) { if(*sep_p == *p) break; } if(*sep_p != '\0') break; /* if the character is separator then break out of loop */ } if(*p == '\0') currentp = p; else { currentp = p+1; *p = '\0'; } return tokenptr; }
Private
[
?
]
Run code
Submit