#include #include #include "getbingraph.c" #define RNSC 0 #define GS 1 #define VERBOSE 1 #define BY_OVLP 1 #define OBSERVED 0 #define DEBUG 0 #define TRACKING 0 #define STATS 0 #define LARGEST_ONLY 0 #define ALLOC_BIG_CHUNKS 1 #include "maxcliques.c" #define SymMtxSize(n) (((n)*((n)+1))>>1) #define _rowBase(n,i) (((i) * (n*2 - 1 - (i)))>>1) #define SymMtxIndex(n,smaller,larger) (_rowBase ((n), (smaller)) + (larger)) #define ScanSymMtx(m,n,i,j,v,r,c,x) { \ int __mtx_nodes = (n), __mtx_rowbase = 0; \ for (i = 0; i < __mtx_nodes; ++i, __mtx_rowbase += __mtx_nodes-i) { \ r \ for (j = i; j < __mtx_nodes; ++j) { \ v = (m)[__mtx_rowbase+j]; \ c \ } \ x \ } \ } #define SymMtxOp(op,m,n,smaller,larger) \ (op ((m), SymMtxIndex ((n), (smaller), (larger)))) typedef struct AddedEdge *AddedEdge; struct AddedEdge { AddedEdge tail, sub; int ovlp, x1, x2, count; }; static int targetId; int mapToTargetId (const char *s) { return targetId; } #if RNSC ASCIITrie readXlatTbl (const char *fname, ASCIITrie target) { char *start, *c, *be, buf[20]; struct stat st; FILE *f; int i; ASCIITrie source = NULL; c = start = mapFile (f = openFile (fname), &st); be = start + st.st_size; while (c < be) { char *s1 = c; while (*c++ != '\t'); int l1 = c-s1-1; char *s2 = c; while (*c++ != '\n'); int l2 = c-s2-1; strncpy (buf, s2, l2); buf[l2] = '\0'; targetId = trieLookup (target, buf); strncpy (buf, s1, l1); buf[l1] = '\0'; trieAdd (&source, buf, mapToTargetId); } return source; } #endif int countEdges (struct graphInfo *g) { int i, k, n = g->nNodes; for (i = k = 0; i < n; ++i) k += SetSizeBetween (BitMatrixRow (g->graph, n, i), i+1, n); return k; } int countOverlap (struct graphInfo *g, struct graphInfo *g1, int *tr) { int i, j, n = g->nNodes, m = g1->nNodes, overlap = 0; for (i = 0; i < n; ++i) { int i1 = tr[i]; if (i1 >= 0) { IntSet r = BitMatrixRow (g->graph, n, i), r1 = BitMatrixRow (g1->graph, m, i1); for (j = i-1; (j = SetFindNext (r, n, j+1)) < n;) { int j1 = tr[j]; if (j1 >= 0 && Member (r1, j1)) ++overlap; } } } return overlap; } int countNew (struct graphInfo *g, struct graphInfo *g1, int *tr) { int i, j, n = g->nNodes, m = g1->nNodes, overlap = 0; for (i = 0; i < n; ++i) { int i1 = tr[i]; IntSet r = BitMatrixRow (g->graph, n, i); if (i1 >= 0) { IntSet r1 = BitMatrixRow (g1->graph, m, i1); for (j = i; (j = SetFindNext (r, n, j+1)) < n;) { int j1 = tr[j]; if (j1 >= 0 && !Member (r1, j1)) ++overlap; } } else overlap += SetSizeBetween (r, i+1, n); } return overlap; } int *makeXlatTbl (struct graphInfo *g, ASCIITrie t) { int i, j, n = g->nNodes, *tr = (int *) malloc (n * sizeof (int)); int bad = 0; for (i = 0; i < n; ++i) { j = trieLookup (t, g->names[i]); if (j < 0) ++bad; // fprintf (stderr, "%s not found\n", g->names[i]); tr[i] = j; } return tr; } #if GS static int gsPosEdges, gsNegEdges; double printGSoverlap (int pos, int neg) { double lr = (double) (pos * gsNegEdges) / (double) (neg * gsPosEdges); printf ("overlap with the Gold Standard:\n\ %d positive, %d negative, LR = %.2f\n", pos, neg, lr); return lr; } #endif #if BY_OVLP void addEdgeInfo (AddedEdge *added, int i, int s1, int missed1, int missed2) { int d = s1-missed1; AddedEdge *r, e; for (r = &added[i]; (e = *r) != NULL && e->ovlp < d; r = &e->tail); if (e == NULL || e->ovlp > d) goto create; for (r = &e->sub; (e = *r) != NULL && e->x1 < missed1; r = &e->tail); if (e == NULL || e->x1 > missed1) goto create; for (r = &e->sub; (e = *r) != NULL && e->x2 < missed2; r = &e->tail); if (e == NULL || e->x2 > missed2) { create: { AddedEdge n = (AddedEdge) malloc (sizeof (struct AddedEdge)); n->count = 1; n->ovlp = d; n->x1 = missed1; n->x2 = missed2; n->tail = e; n->sub = NULL; *r = n; } } else ++e->count; } #endif void usage (const char *name) { fprintf (stderr, "usage: %s [-r] " #if RNSC " []" #elif GS " []" #endif "\nlist of added edges as space-separated node names on output\n\ -r repeat until no additions\n", name); exit (1); } int main (int argc, const char **argv) { int minCore, maxDiff, i, j, nextArg, nNodes, nAdded = 0, nCliques, maxcliqsz, repeat = 0, round; struct graphInfo g; if (argc < 2) usage (argv[0]); nextArg = 1; if (argv[1][0] == '-') if (argv[1][1] == 'r') { ++nextArg; repeat = 1; } else usage (argv[0]); if (argc - nextArg < #if RNSC || GS 5 #else 3 #endif || sscanf (argv[nextArg++], "%d", &minCore) != 1 || minCore < 2 || sscanf (argv[nextArg++], "%d", &maxDiff) != 1 || maxDiff < 1) usage (argv[0]); getGraph (argv[nextArg++], &g); nNodes = g.nNodes; #if VERBOSE printf ("input graph on %d nodes\n", nNodes); printf ("looking for pairs of cliques with min overlap %d and max exts %d\n", minCore, maxDiff); #endif #if RNSC struct graphInfo RNSCgraph; int nRNSCedges, initialOverlap, *tr; getGraph (argv[nextArg++], &RNSCgraph); nRNSCedges = countEdges (&RNSCgraph); tr = makeXlatTbl (&RNSCgraph, readXlatTbl (argv[nextArg++], g.nodeDict)); initialOverlap = countOverlap (&RNSCgraph, &g, tr); printf ("RNSC predicted %d new edges\n", nRNSCedges - initialOverlap); #elif GS struct graphInfo gsPos, gsKnown; getGraph (argv[nextArg++], &gsKnown); getGraph (argv[nextArg++], &gsPos); int *trPosKnown = makeXlatTbl (&gsPos, gsKnown.nodeDict); gsPosEdges = countEdges (&gsPos); gsNegEdges = countEdges (&gsKnown) - gsPosEdges; printf ("in the Gold Standard: %d positive, %d negative edges, %d bogus\n", gsPosEdges, gsNegEdges, countNew (&gsPos, &gsKnown, trPosKnown)); int *trPos = makeXlatTbl (&g, gsPos.nodeDict), *trKnown = makeXlatTbl (&g, gsKnown.nodeDict), origPos = countOverlap (&g, &gsPos, trPos), origNeg = countOverlap (&g, &gsKnown, trKnown) - origPos, curPos = origPos, curNeg = origNeg; printGSoverlap (origPos, origNeg); int sz = SymMtxSize (nNodes) * sizeof (AddedEdge); AddedEdge *added = (AddedEdge *) malloc (sz); memset ((void *) added, 0, sz); #endif for (round = 1;; ++round) { // maxCliques_traceout = stdout; CliqTree maxcliqs = maxCliques (&g); maxcliqsz = maxCliqueSize; nCliques = 0; #if VERBOSE printf ("round %d:\nmax clique size %d\n", round, maxcliqsz); #endif int nAddedThisRound = 0; CliqTree p1, p2; int *m1 = (int *) malloc (maxcliqsz * sizeof (int)); int *m2 = (int *) malloc (maxcliqsz * sizeof (int)); int *cliq1 = (int *) malloc (maxcliqsz * sizeof (int)); int *missed1Stk = (int *) malloc (maxcliqsz * sizeof (int)); int *missed2Stk = (int *) malloc (maxcliqsz * sizeof (int)); int *cmpNdxStk = (int *) malloc (maxcliqsz * sizeof (int)); int s1 = 0, s2, missed1, missed2, cmpNdx, j1, j2; #if BY_OVLP #define AddEdge \ addEdgeInfo (added, SymMtxIndex (nNodes, i, j), \ s1, missed1, missed2) #else #define AddEdge \ printf ("%s %s\n", g.names[i], g.names[j]) #endif #if BY_OVLP #define RepeatEdge { \ int k = SymMtxIndex (nNodes, i, j); \ int i1 = trKnown[i]; \ int j1 = trKnown[j]; \ if (added[k] != NULL \ || (i1 >= 0 && j1 >= 0 \ && Member (BitMatrixRow (gsKnown.graph, \ gsKnown.nNodes, \ i1), \ j1))) \ addEdgeInfo (added, k, s1, missed1, missed2); \ } #else #define RepeatEdge #endif ScanCliqTree (maxcliqs, p1, {}, { cliq1[s1++] = p1->node; }, { int maxdiff1 = intMin (maxDiff, s1-minCore); ++nCliques; if (maxdiff1 > 0) { for (i = 0; i < s1; ++i) { missed1Stk[i] = missed2Stk[i] = 0; cmpNdxStk[i] = i; } s2 = s1; p2 = p1; goto resumeScan; ScanCliqTree (maxcliqs, p2, {}, { missed1Stk[s2] = missed1; missed2Stk[s2] = missed2; cmpNdxStk[s2] = cmpNdx; ++s2; i = p2->node; while (cmpNdx < s1 && (j = cliq1[cmpNdx]) < i) { if (missed1 >= maxdiff1) goto resumeScan; m1[missed1++] = j; ++cmpNdx; } if (cmpNdx == s1 || j > i) { if (missed2 >= maxDiff) goto resumeScan; m2[missed2++] = i; } else ++cmpNdx; }, { if (missed2 <= s2-minCore && missed1+s1-cmpNdx <= maxdiff1) { while (cmpNdx < s1) m1[missed1++] = cliq1[cmpNdx++]; for (j1 = 0; j1 < missed1; ++j1) { for (j2 = 0; j2 < missed2; ++j2) { i = m1[j1]; j = m2[j2]; if (i > j) { int t = i; i = j; j = t; } if (!Member (BitMatrixRow (g.graph, nNodes, i), j)) { SetAdd (BitMatrixRow (g.graph, nNodes, i), j); SetAdd (BitMatrixRow (g.graph, nNodes, j), i); ++nAddedThisRound; AddEdge; } else RepeatEdge; } } } }, { resumeScan: --s2; cmpNdx = cmpNdxStk[s2]; missed2 = missed2Stk[s2]; missed1 = missed1Stk[s2]; }); } }, { --s1; }); free (cmpNdxStk); free (missed2Stk); free (missed1Stk); free (cliq1); free (m2); free (m1); freeCliqSpace (); fprintf (stderr, "%d max cliques, max size %d; added %d edge%s\n", nCliques, maxcliqsz, nAddedThisRound, nAddedThisRound == 1 ? "" : "s"); if (nAddedThisRound) { #if VERBOSE printf ("found %d maximal cliques, added %d new edge%s\n", nCliques, nAddedThisRound, nAddedThisRound == 1 ? "" : "s"); #endif nAdded += nAddedThisRound; #if GS int *allNew = (int *) calloc (maxcliqsz, sizeof (int)); // pull out... int *posNew = (int *) calloc (maxcliqsz, sizeof (int)); int *negNew = (int *) calloc (maxcliqsz, sizeof (int)); for (i = 0; i < nNodes; ++i) { int ik = trKnown[i], ip = trPos[i]; int ri = _rowBase (nNodes, i); IntSet gski = BitMatrixRow (gsKnown.graph, gsKnown.nNodes, ik), gspi = BitMatrixRow (gsPos.graph, gsPos.nNodes, ip); for (j = i; j < nNodes; ++j) { AddedEdge e = added[ri+j], e1; if (e != NULL) { int maxsz = 0, known = 0, pos = 0; added[ri+j] = NULL; printf ("%s %s", g.names[i], g.names[j]); if (ik >= 0 && ip >= 0) { int jk = trKnown[j], jp = trPos[j]; if (jk >= 0 && jp >= 0 && Member (gski, jk)) { known = 1; if (Member (gspi, jp)) { printf (" +"); pos = 1; } else printf (" -"); } } int k; for (k = 0; e != NULL; e = e1) { printf (" %d[%d]%d (%d)", e->x1, e->ovlp, e->x2, e->count); k += e->count; if (maxsz < e->ovlp) maxsz = e->ovlp; e1 = e->tail; free (e); } printf (" =%d", k); printf ("\n"); ++allNew[maxsz-1]; if (known) ++((pos ? posNew : negNew)[maxsz-1]); } } } int pos = countOverlap (&g, &gsPos, trPos), neg = countOverlap (&g, &gsKnown, trKnown) - pos; printGSoverlap (pos-curPos, neg-curNeg); curPos = pos; curNeg = neg; for (i = 0; i < maxcliqsz; ++i) printf ("%7d", i+1); printf ("\n"); for (i = 0; i < maxcliqsz; ++i) printf ("%7d", posNew[i]); printf ("\n"); for (i = 0; i < maxcliqsz; ++i) printf ("%7d", negNew[i]); printf ("\n"); for (i = 0; i < maxcliqsz; ++i) printf ("%7d", allNew[i]); printf ("\n"); for (i = 0; i < maxcliqsz; ++i) printf ("%7.2f", (double) (posNew[i] * gsNegEdges) / (double) (negNew[i] * gsPosEdges)); printf ("\n"); #endif } else #if VERBOSE || RNSC printf ("no new edges\n"); #endif if (nAddedThisRound == 0 || !repeat) break; } #if VERBOSE || RNSC printf ("total %d predicted new interactions\n", nAdded); #endif #if RNSC int v = countOverlap (&RNSCgraph, &g, tr) - initialOverlap; printf (" %d of which predicted also by RNSC\n", v); #elif GS double v = printGSoverlap (curPos-origPos, curNeg-origNeg); #endif fclose (g.baseFile); #if RNSC || GS if (argc > nextArg) { FILE *f = fopen (argv[nextArg], "a"); if (f != NULL) { fprintf (f, #if RNSC "%3d %3d %5d %3d %2d %5d %5d %5d\n", minCore, maxDiff, nCliques, maxcliqsz, round-1, nRNSCedges - initialOverlap, nAdded, v); #else // if GS "%3d %3d %5d %3d %2d %5d %8.2f\n", minCore, maxDiff, nCliques, maxcliqsz, round-1, nAdded, v); #endif fclose (f); } } #endif }