relation.cpp

// saved from url // http://www-cs.canisius.edu/PL_TUTORIALS/C++/EXAMPLES/MRD/SRC/relation.c --> //---------------------------------- relation.c ---------------------------- #include <stdio.h> #include <string.h> #include <stdlib.h> #include "relation.h" #define min(a,b) ((a)<(b)?(a):(b)); static int contains_blank (char *somestring) { while (*somestring) if (*somestring++ == ' ') return 1; return 0; } void relation::info () { printf ("Relation loaded from file: %s\n", relfilename); printf ("Number of records = %d\n", numrecords); for (int i=0; i<numfields; i++) printf ("%3d. %-15s %4d %c\n", i, dict[i].name, dict[i].printwidth, dict[i].datatype); } int relation::findfield (char *whichfield) { for (int i=0; i<numfields; i++) if (strcmp (whichfield, dict[i].name) == 0) return i; return -1; } void relation::copydict (relation *source, relation *dest) { for (int i=0; i<source->numfields; i++) dest->addfield (source->dict[i].name, source->dict[i].position, source->dict[i].datatype, source->dict[i].printwidth); } // The next version is used only for cross product and ejoin. It deals with // the problem that the two relations may have the same field names. The // solution is to see if the same field name appears in both relations, and // if it does, then the second relation's field is renamed using _fieldname. // This relies on the hope that no one used THAT name in the first relation // or in the second. If they did, the field just can't be found. void relation::copydict2 (relation *source1, relation *source2, relation *dest) { // First copy the first relation's fields over for (int i=0; i<source1->numfields; i++) dest->addfield (source1->dict[i].name, source1->dict[i].position, source1->dict[i].datatype, source1->dict[i].printwidth); // Now copy the second's over, but for each one search the dictionary // of the first relation, and if found then rename for (int i=0; i<source2->numfields; i++) { int found = 0; for (int j=0; j<source1->numfields; j++) if (strcmp(source1->dict[j].name, source2->dict[i].name) == 0) { found = 1; break; } if (found) { char temp[60]; strcpy (temp, "_"); strcat (temp, source2->dict[i].name); dest->addfield (temp, source2->dict[i].position + source1->numfields, source2->dict[i].datatype, source2->dict[i].printwidth); } else dest->addfield (source2->dict[i].name, source2->dict[i].position + source1->numfields, source2->dict[i].datatype, source2->dict[i].printwidth); } } int relation::xmatch1c (parseline *x, int pos, char *comparison, char *fieldvalueC) { if (strcmp (comparison, "=") == 0) return (strcmp (x->word(pos), fieldvalueC)==0); if (strcmp (comparison, "!=") == 0) return (strcmp (x->word(pos), fieldvalueC)!=0); if (strcmp (comparison, "<") == 0) return (strcmp (x->word(pos), fieldvalueC)<0); if (strcmp (comparison, "<=") == 0) return (strcmp (x->word(pos), fieldvalueC)<=0); if (strcmp (comparison, ">") == 0) return (strcmp (x->word(pos), fieldvalueC)>0); if (strcmp (comparison, ">=") == 0) return (strcmp (x->word(pos), fieldvalueC)>=0); } int relation::xmatch1i (parseline *x, int pos, char *comparison, char *fieldvalueC) { if (strcmp (comparison, "=") == 0) return atoi(x->word(pos)) == atoi(fieldvalueC); if (strcmp (comparison, "!=") == 0) return atoi(x->word(pos)) != atoi(fieldvalueC); if (strcmp (comparison, "<") == 0) return atoi(x->word(pos)) < atoi(fieldvalueC); if (strcmp (comparison, "<=") == 0) return atoi(x->word(pos)) <= atoi(fieldvalueC); if (strcmp (comparison, ">") == 0) return atoi(x->word(pos)) > atoi(fieldvalueC); if (strcmp (comparison, ">=") == 0) return atoi(x->word(pos)) >= atoi(fieldvalueC); } int relation::xmatch1d (parseline *x, int pos, char *comparison, char *fieldvalueC) { if (strcmp (comparison, "=") == 0) return atof(x->word(pos)) == atof(fieldvalueC); if (strcmp (comparison, "!=") == 0) return atof(x->word(pos)) != atof(fieldvalueC); if (strcmp (comparison, "<") == 0) return atof(x->word(pos)) < atof(fieldvalueC); if (strcmp (comparison, "<=") == 0) return atof(x->word(pos)) <= atof(fieldvalueC); if (strcmp (comparison, ">") == 0) return atof(x->word(pos)) > atof(fieldvalueC); if (strcmp (comparison, ">=") == 0) return atof(x->word(pos)) >= atof(fieldvalueC); } int relation::match1 (parseline *x, spec comp) { int n = findfield (comp.fieldname); if (n == -1) return 0; int pos = dict[n].position; // See if the given field name even exists switch (dict[n].datatype) { case 'c': return xmatch1c (x, pos, comp.comparison, comp.fieldvalueC); case 'i': return xmatch1i (x, pos, comp.comparison, comp.fieldvalueC); case 'd': return xmatch1d (x, pos, comp.comparison, comp.fieldvalueC); } } int relation::match (parseline *x, speclist comps) { int truth = 0; for (int i=0; i<comps.numspecs; i++) { int val = match1 (x, comps.specs[i]); if (comps.specs[i].relation_to_previous == 'a') truth = truth && val; else if (comps.specs[i].relation_to_previous == 'o') truth = truth || val; else truth = val; } return truth; } relation* relation::select (speclist specs) { relation *newone; newone = new relation(); copydict (this, newone); for (int i=0; i<numrecords; i++) if (match (records[i], specs)) { char line[2000]; records[i]->makeline(line); newone->addrecord (line); } return newone; } void relation::append (relation *other) { // What if the dictionaries do not match???? for (int i=0; i<other->numrecords; i++) { char line[2000]; other->records[i]->makeline(line); addrecord (line); } } relation* relation::copy () { relation *newone; newone = new relation(); copydict (this, newone); for (int i=0; i<numrecords; i++) { char line[2000]; records[i]->makeline(line); newone->addrecord (line); } return newone; } void relation::makewidthline (char *line) { line[0] = 0; for (int i=0; i<numfields; i++) { char temp[50]; sprintf (temp, "%d ", dict[i].printwidth); strcat (line, temp); } } void relation::print (FILE *fp) { char field[2000], line[5000]; for (int i = 0; i<numrecords; i++) { strcpy (line, ""); for (int j = 0; j<numfields; j++) { formatvalue (i, j, field, sizeof(field)); strcat (line, field); strcat (line, " "); } fprintf (fp, "%s\n", line); } } void relation::printheader (FILE *fp) { char fmtstring[20]; int width=0, i; for (i=0; i<numfields; i++) { sprintf (fmtstring, "%%-%ds", dict[i].printwidth); width += dict[i].printwidth; fprintf (fp, fmtstring, dict[i].name); fprintf (fp, " "); } fprintf (fp, "\n"); fprintf (fp, "--------------------------------------------------------------------------\n"); } void relation::print () { print (stdout); } relation* relation::project (char *namelist) { relation *newone; char line[1000], fieldnumlist[500]; parseline names(namelist); newone = new relation(); // Put in only the fields that are mentioned. for (int j=0; j<names.size(); j++) { int k = findfield(names.word(j)); newone->addfield (names.word(j), j, dict[k].datatype, dict[k].printwidth); } for (int i=0; i<numrecords; i++) { strcpy (line, ""); for (int j=0; j<names.size(); j++) { int m = findfield(names.word(j)); int pos = dict[m].position; if (contains_blank (records[i]->word(pos))) { strcat (line, "\""); strcat (line, records[i]->word(pos)); strcat (line, "\" "); } else { strcat (line, records[i]->word(pos)); strcat (line, " "); } } newone->addrecord(line); } return newone; } // For the default case of adding a record, we just make a copy of the // previous record. void relation::addrecord () { char temp[2000]; records[numrecords-1]->makeline(temp); addrecord (temp); } void relation::addrecord (char *someline) { parseline *newrec = new parseline (someline); if (numrecords >= MAXRECORDS) return; // later set error code numrecords++; // check for consistency in number of fields??? records[numrecords-1] = newrec; } void relation::addfield (char *name, int pos, char type, int width) { if (numfields >= MAXFIELDS) return; strcpy (dict[numfields].name, name); // check to see if already there?? dict[numfields].datatype = type; dict[numfields].position = pos; dict[numfields].printwidth = width; numfields++; } void relation::load (char *filename) { FILE *fp; char line[1000]; string fname(filename); if ((fp = fopen (filename, "r")) == NULL) { fname += ".txt"; if ((fp = fopen (fname.data(), "r")) == NULL ) { printf ("Cannot open file %s\n", filename); printf ("Cannot open file %s\n", fname.data()); return; } } numfields = 0; while (fgets(line, sizeof(line), fp) != NULL) { line[strlen(line)-1] = 0; if (line[0] == '#') break; parseline pline(line); addfield (pline.word(0), numfields, pline.word(1)[0], atoi(pline.word(2))); } // Now load the data records numrecords = 0; while (fgets(line, sizeof(line), fp) != NULL) { line[strlen(line)-1] = 0; addrecord(line); } fclose (fp); strcpy (relfilename, fname.data()); } void relation::save (char *filename) { FILE *fp; string fname(filename); fname += ".txt"; if ((fp = fopen (fname.data(), "w")) == NULL) { printf ("Cannot open file %s for saving\n", fname.data()); return; } // Save the data dictionary and make up the width list for saving the // line. char widths[500]; widths[0] = 0; for (int i=0; i<numfields; i++) { fprintf (fp, "%s %c %d\n", dict[i].name, dict[i].datatype, dict[i].printwidth); char temp[50]; sprintf (temp, "%d ", dict[i].printwidth); strcat (widths, temp); } fprintf (fp, "#\n"); print (fp); fclose (fp); } relation* relation::ejoin (relation *other, char *whichf1, char *whichf2) { char newline[5000], newline2[2000]; relation *newone; int which1, which2; int n = this->findfield (whichf1); if (n == -1) return (relation *) NULL; which1 = this->dict[n].position; n = other->findfield (whichf2); if (n == -1) return (relation *) NULL; which2 = other->dict[n].position; newone = new relation(); copydict2 (this, other, newone); for (int i=0; i < this->numrecords; i++) for (int j=0; j< other->numrecords; j++) if (strcmp(this->records[i]->word(which1), other->records[j]->word(which2)) == 0) { this->records[i]->makeline(newline); strcat (newline, " "); other->records[j]->makeline(newline2); strcat (newline, newline2); newone->addrecord (newline); } return newone; } relation* relation::cross (relation *other) { char newline[5000], newline2[2000]; relation *newone; newone = new relation(); copydict2 (this, other, newone); for (int i=0; i < this->numrecords; i++) for (int j=0; j < other->numrecords; j++) { this->records[i]->makeline(newline); strcat (newline, " "); other->records[j]->makeline(newline2); strcat (newline, newline2); newone->addrecord (newline); } return newone; } static int greaterthan (char *s1, char *s2, char datatype) { switch (datatype) { case 'c': return strcmp(s1, s2) > 0; case 'i': return atoi(s1) > atoi(s2); case 'd': return atof(s1) > atof(s2); } } void relation::sort (char *whichfield, bool ascending) { int n = findfield(whichfield); if (n == -1) return; int pos = dict[n].position; char datatype = dict[n].datatype; for (int i=0; i<numrecords-1; i++) for (int j=i+1; j<numrecords; j++) { bool temp = greaterthan (records[i]->word(pos), records[j]->word(pos), datatype); if (ascending && temp) { parseline *temp = records[i]; records[i] = records[j]; records[j] = temp; } if (!ascending && !temp) { parseline *temp = records[i]; records[i] = records[j]; records[j] = temp; } } } void relation::sort () { for (int i=0; i<numrecords-1; i++) for (int j=i+1; j<numrecords; j++) { char line1[2000], line2[2000]; records[i]->makeline(line1); records[j]->makeline(line2); if (strcmp (line1, line2) > 0) { parseline *temp = records[i]; records[i] = records[j]; records[j] = temp; } } } relation* relation::istats (int pos) { relation *newone; char templine[1000]; int sum=0, n, max, min; double avg, sumsq=0.0; for (int i=0; i<numrecords; i++) { n = atoi(records[i]->word(pos)); sum += n; if (i==0) max = n; if (i==0) min = n; if (n > max) max = n; if (n < min) min = n; } avg = (float)sum/numrecords; for (int i=0; i<numrecords; i++) { n = atoi(records[i]->word(pos)); sumsq += (avg - n) * (avg - n); } sprintf (templine, "%d %d %d %d %.5f %.5f", numrecords, sum, min, max, avg, sumsq); newone = new relation(); newone->addfield ("num", 0, 'i', 9); // number of records in relation newone->addfield ("sum", 1, 'i', 9); // sum of the stat field's values newone->addfield ("min", 2, 'i', 9); // min value of stat field newone->addfield ("max", 3, 'i', 9); // max value of stat field newone->addfield ("avg", 4, 'd', 9); // average (floating point) newone->addfield ("ssq", 5, 'd', 9); // sum of squares newone->addrecord (templine); return newone; } relation* relation::dstats (int pos) { relation *newone; char templine[1000]; double sum=0, n, max, min; double avg, sumsq=0.0; for (int i=0; i<numrecords; i++) { n = atof(records[i]->word(pos)); sum += n; if (i==0) max = n; if (i==0) min = n; if (n > max) max = n; if (n < min) min = n; } avg = sum/numrecords; for (int i=0; i<numrecords; i++) { n = atof(records[i]->word(pos)); sumsq += (avg - n) * (avg - n); } sprintf (templine, "%d %.5f %.5f %.5f %.5f %.5f", numrecords, sum, min, max, avg, sumsq); newone = new relation(); newone->addfield ("num", 0, 'i', 9); // number of records in relation newone->addfield ("sum", 1, 'd', 9); // sum of the stat field's values newone->addfield ("min", 2, 'd', 9); // min value of stat field newone->addfield ("max", 3, 'd', 9); // max value of stat field newone->addfield ("avg", 4, 'd', 9); // average newone->addfield ("ssq", 5, 'd', 9); // sum of squares newone->addrecord (templine); return newone; } relation* relation::summarystats (char *whichfield) { int n = findfield(whichfield); if (n == -1) return (relation *) NULL; if (dict[n].datatype == 'i') return istats (dict[n].position); else if (dict[n].datatype == 'd') return dstats (dict[n].position); } relation* relation::addseqnums () { relation *newone; char temp[1000]; newone = new relation(); // Make new data dictionary newone->addfield ("seqnum", 0, 'i', 6); for (int i=0; i<numfields; i++) newone->addfield (dict[i].name, i+1, dict[i].datatype, dict[i].printwidth); for (int i=0; i<numrecords; i++) { char line[2000]; records[i]->makeline(line); sprintf (temp, "%d %s", i, line); newone->addrecord (temp); } return newone; } void relation::showdict () { for (int i=0; i<numfields; i++) { printf ("%3d. %-20s %3d %3d ", i, dict[i].name, dict[i].position, dict[i].printwidth); switch (dict[i].datatype) { case 'c': printf ("character string\n"); break; case 'd': printf ("floating point\n"); break; case 'i': printf ("integer\n"); break; } } } void relation::print1record (int recnum) { for (int i=0; i<numfields; i++) printf ("%s: %s\n", dict[i], records[recnum]->word(i)); } static int sametuples (parseline *x, parseline *y) { if (x->size() != y->size()) return 0; for (int i=0; i<x->size(); i++) if (strcmp (x->word(i), y->word(i)) != 0) return 0; return 1; } void relation::unique () { if (numrecords < 2) return; sort(); int nums[1000], numdels=0; int i=0, j=1; for (j=1; j<numrecords; j++) if (sametuples(records[i], records[j])) nums[numdels++] = j; else i = j; if (numdels == 0) return; int k=1; int delindex = 0; for (i=1; i<numrecords; i++) if (nums[delindex] == i) delindex++; else records[k++] = records[i]; numrecords = numrecords - numdels; } int relation::findrecord (char *fieldname, char *value) { int n = findfield (fieldname); if (n == -1) return 0; for (int i=0; i<numrecords; i++) if (strcmp(records[i]->word(dict[n].position), value) == 0) return i; } int relation::findrecord (char *fieldname, int value) { int n = findfield (fieldname); if (n == -1) return 0; for (int i=0; i<numrecords; i++) if (atoi(records[i]->word(dict[n].position)) == value) return i; } int relation::findrecord (char *fieldname, double value) { int n = findfield (fieldname); if (n == -1) return 0; for (int i=0; i<numrecords; i++) if (atof(records[i]->word(dict[n].position)) == value) return i; } void relation::setcurrent (int recnum) { if (recnum >= 0 && recnum < numrecords) current = recnum; } void relation::advance () { if (current < numrecords-1) current++; } bool relation::atend () { return (current == numrecords-1); } char* relation::getvalueC (char *fieldname) { int n = findfield (fieldname); if (n == -1) return (char *) NULL; return records[current]->word(dict[n].position); } int relation::getvalueI (char *fieldname) { int n = findfield (fieldname); if (n == -1) return 0; return atoi(records[current]->word(dict[n].position)); } double relation::getvalueD (char *fieldname) { int n = findfield (fieldname); if (n == -1) return 0.0; return atof(records[current]->word(dict[n].position)); } void relation::formatvalue (int recnum, int fieldnum, char *result, int resultmaxlen) { char thisvalue[1000]; char format[100]; int cb, len, j; strcpy (thisvalue, records[recnum]->word(dict[fieldnum].position)); int width = dict[fieldnum].printwidth; for (int i=0; i<resultmaxlen; i++) result[i] = 0; switch (dict[fieldnum].datatype) { case 'c': cb = contains_blank (thisvalue); len = min(resultmaxlen, strlen(thisvalue)); j=0; if (cb) { result[0] = '"'; j=1; } for (int i=0; i<len; i++) result[j++] = thisvalue[i]; if (cb) result[j++] = '"'; while (j<width-1) result[j++] = ' '; break; case 'i': sprintf (format, "%%%dd", width); sprintf (result, format, atoi(thisvalue)); break; case 'd': sprintf (format, "%%%d.3lf", width); double x; sscanf (thisvalue, "%lf", &x); sprintf (result, format, x); break; } } void relation::setvalue (char *fieldname, char *newvalueC) { int n = findfield (fieldname); if (n == -1) return; records[current]->setword (dict[n].position, newvalueC); } void relation::setvalue (char *fieldname, int newvalueI) { int n = findfield (fieldname); if (n == -1) return; char newvalueC[100]; sprintf (newvalueC, "%d", newvalueI); records[current]->setword (dict[n].position, newvalueC); } void relation::setvalue (char *fieldname, double newvalueD) { int n = findfield (fieldname); if (n == -1) return; char newvalueC[100]; sprintf (newvalueC, "%f", newvalueD); records[current]->setword (dict[n].position, newvalueC); } void relation::changefieldname (char *fieldname, char *newname) { int n = findfield (fieldname); if (n == -1) return; strcpy (dict[n].name, newname); } void relation::changefieldwidth (char *fieldname, int newwidth) { int n = findfield (fieldname); if (n == -1) return; dict[n].printwidth = newwidth; } int relation::getfieldwidth (char *fieldname) { int n = findfield (fieldname); if (n == -1) return -1; return dict[n].printwidth; } void relation::delrecord (int recnum) { if (recnum >= numrecords) return; delete records[recnum]; for (int i=recnum; i<numrecords-1; i++) records[i] = records[i+1]; numrecords--; } void relation::addfieldvalue (char *newfield, int recnum, char *newvalue) { char small[2]; small[0] = ' '; small[1] = 0; char temp[1000]; records[recnum]->makeline(temp); strcat (temp, small); strcat (temp, newvalue); delete records[recnum]; records[recnum] = new parseline (temp); } char relation::getfieldtype (char *fieldname) { int n = findfield (fieldname); if (n == -1) return -1; return dict[n].datatype; } relation* relation::difference (char *field1, relation *other, char *field2) { int fn1, fn2; if ((fn1 = findfield (field1)) == -1) return (relation *) NULL; if ((fn2 = other->findfield (field2)) == -1) return (relation *) NULL; relation *newrel = new relation(); copydict (this, newrel); // Go through our records and try to find a record in other with // same value for "whichfield." If not found, then add to a third // relation. char value[2000]; for (int i=0; i<numrecords; i++) { int found = 0; strcpy (value, records[i]->word(fn1)); for (int j=0; j<other->numrecords; j++) if (strcmp (value, other->records[j]->word(fn2)) == 0) { found = 1; break; } if (!found) { char line[2000]; records[i]->makeline(line); newrel->addrecord (line); } } return newrel; }