123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476 |
- #include "filters_common.inc.h"
- #include "ifilter_bitsum.h"
- #ifdef INTFILTER
- # ifdef OMITMASK
- static inline int filter_compare(const void *p1,const void *p2)
- {
- if (((const struct intfilter *)p1)->f < ((const struct intfilter *)p2)->f)
- return -1;
- if (((const struct intfilter *)p1)->f > ((const struct intfilter *)p2)->f)
- return 1;
- return 0;
- }
- # ifdef EXPANDMASK
- /*
- * so we have 2 masks with basically random bits
- * we first gonna find where these masks are common
- * then we gonna find where new mask has more bits than old
- * common areas must be unchanged
- * gaps in both must be unchanged
- * but new bits must be filled
- * therefore, lets just fill old gaps and common areas with 1s
- * before add, OR with these 1s
- * then perform add. these 1s have property to push positive bits to 0s
- * we already know how much new gaps we need to fill, so this wont overflow
- * after this addition, AND result with NEG of combined mask, and OR with old value
- * this will produce new proper value
- * we need to re-fill 1s before every add to keep structure working
- */
- int flattened = 0;
- // add expanded set of values
- // allocates space on its own
- static void ifilter_addexpanded(
- struct intfilter *ifltr,
- register IFT newbits,
- register IFT notnewbits,
- register IFT newbitsum)
- {
- flattened = 1;
- size_t i = VEC_LENGTH(filters);
- VEC_ADDN(filters,newbitsum + 1);
- register IFT x = ifltr->f;
- register IFT y = 0;
- for (size_t j = 0;;++j) {
- VEC_BUF(filters,i + j).f = x | y;
- if (j == newbitsum)
- break;
- y = ((y | notnewbits) + 1) & newbits;
- }
- }
- // expand existing stuff
- // allocates needed stuff on its own
- static void ifilter_expand(
- register IFT newbits,
- register IFT notnewbits,
- register IFT newbitsum)
- {
- flattened = 1;
- size_t len = VEC_LENGTH(filters);
- VEC_ADDN(filters,newbitsum * len);
- size_t esz = newbitsum + 1; // size of expanded elements
- for (size_t i = len - 1;;--i) {
- register IFT x = VEC_BUF(filters,i).f;
- register IFT y = 0;
- for (IFT j = 0;;++j) {
- VEC_BUF(filters,i * esz + j).f = x | y;
- if (j == newbitsum)
- break;
- y = ((y | notnewbits) + 1) & newbits;
- }
- if (i == 0)
- break;
- }
- }
- static inline void ifilter_addflatten(struct intfilter *ifltr,IFT mask)
- {
- if (VEC_LENGTH(filters) == 0) {
- // simple
- VEC_ADD(filters,*ifltr);
- ifiltermask = mask;
- return;
- }
- if (ifiltermask == mask) {
- // lucky
- VEC_ADD(filters,*ifltr);
- return;
- }
- IFT newbits = ifiltermask ^ mask;
- IFT notnewbits = ~newbits;
- IFT newbitsum = ifilter_bitsum(newbits);
- if (ifiltermask > mask) {
- // current mask covers more bits
- // expand new filter
- ifilter_addexpanded(ifltr,newbits,notnewbits,newbitsum);
- }
- else {
- // new filter mask covers more bits
- // adjust current mask and expand current filters
- ifiltermask = mask;
- ifilter_expand(newbits,notnewbits,newbitsum);
- VEC_ADD(filters,*ifltr);
- }
- }
- # endif // EXPANDMASK
- # else // OMITMASK
- /*
- * struct intfilter layout: filter,mask
- * stuff is compared in big-endian way, so memcmp
- * filter needs to be compared first
- * if its equal, mask needs to be compared
- * memcmp is aplicable there too
- * due to struct intfilter layout, it all can be stuffed into one memcmp call
- */
- static inline int filter_compare(const void *p1,const void *p2)
- {
- return memcmp(p1,p2,sizeof(struct intfilter));
- }
- # endif // OMITMASK
- static void filter_sort(void)
- {
- size_t len = VEC_LENGTH(filters);
- if (len > 0)
- qsort(&VEC_BUF(filters,0),len,sizeof(struct intfilter),&filter_compare);
- }
- #endif // INTFILTER
- #ifdef BINFILTER
- static inline int filter_compare(const void *p1,const void *p2)
- {
- const struct binfilter *b1 = (const struct binfilter *)p1;
- const struct binfilter *b2 = (const struct binfilter *)p2;
- size_t l = b1->len <= b2->len ? b1->len : b2->len;
- int cmp = memcmp(b1->f,b2->f,l);
- if (cmp != 0)
- return cmp;
- if (b1->len < b2->len)
- return -1;
- if (b1->len > b2->len)
- return +1;
- u8 cmask = b1->mask & b2->mask;
- if ((b1->f[l] & cmask) < (b2->f[l] & cmask))
- return -1;
- if ((b1->f[l] & cmask) > (b2->f[l] & cmask))
- return +1;
- if (b1->mask < b2->mask)
- return -1;
- if (b1->mask > b2->mask)
- return +1;
- return 0;
- }
- static void filter_sort(void)
- {
- size_t len = VEC_LENGTH(filters);
- if (len > 0)
- qsort(&VEC_BUF(filters,0),len,sizeof(struct binfilter),&filter_compare);
- }
- #endif // BINFILTER
- #ifndef PCRE2FILTER
- static inline int filters_a_includes_b(size_t a,size_t b)
- {
- # ifdef INTFILTER
- # ifdef OMITMASK
- return VEC_BUF(filters,a).f == VEC_BUF(filters,b).f;
- # else // OMITMASK
- return VEC_BUF(filters,a).f == (VEC_BUF(filters,b).f & VEC_BUF(filters,a).m);
- # endif // OMITMASK
- # else // INTFILTER
- const struct binfilter *fa = &VEC_BUF(filters,a);
- const struct binfilter *fb = &VEC_BUF(filters,b);
- if (fa->len > fb->len)
- return 0;
- size_t l = fa->len;
- int cmp = memcmp(fa->f,fb->f,l);
- if (cmp != 0)
- return 0;
- if (fa->len < fb->len)
- return 1;
- if (fa->mask > fb->mask)
- return 0;
- return fa->f[l] == (fb->f[l] & fa->mask);
- # endif // INTFILTER
- }
- static void filters_dedup(void)
- {
- size_t last = ~(size_t)0; // index after last matching element
- size_t chk; // element to compare against
- size_t st; // start of area to destroy
- size_t len = VEC_LENGTH(filters);
- for (size_t i = 1;i < len;++i) {
- if (last != i) {
- if (filters_a_includes_b(i - 1,i)) {
- if (last != ~(size_t)0) {
- memmove(&VEC_BUF(filters,st),
- &VEC_BUF(filters,last),
- (i - last) * VEC_ELSIZE(filters));
- st += i - last;
- }
- else
- st = i;
- chk = i - 1;
- last = i + 1;
- }
- }
- else {
- if (filters_a_includes_b(chk,i))
- last = i + 1;
- }
- }
- if (last != ~(size_t)0) {
- memmove(&VEC_BUF(filters,st),
- &VEC_BUF(filters,last),
- (len - last) * VEC_ELSIZE(filters));
- st += len - last;
- VEC_SETLENGTH(filters,st);
- }
- }
- #endif // !PCRE2FILTER
- static void filters_clean(void)
- {
- #ifdef PCRE2FILTER
- for (size_t i = 0;i < VEC_LENGTH(filters);++i) {
- pcre2_code_free(VEC_BUF(filters,i).re);
- free(VEC_BUF(filters,i).str);
- }
- #endif
- VEC_FREE(filters);
- }
- size_t filters_count(void)
- {
- return VEC_LENGTH(filters);
- }
- static void filters_print(void)
- {
- if (quietflag)
- return;
- size_t i,l;
- l = VEC_LENGTH(filters);
- if (l)
- fprintf(stderr,"filters:\n");
- for (i = 0;i < l;++i) {
- #ifdef NEEDBINFILTER
- char buf0[256],buf1[256];
- u8 bufx[128];
- #endif
- if (!verboseflag && i >= 20) {
- size_t notshown = l - i;
- fprintf(stderr,"[another " FSZ " %s not shown]\n",
- notshown,notshown == 1 ? "filter" : "filters");
- break;
- }
- #ifdef INTFILTER
- size_t len = 0;
- u8 *imraw;
- # ifndef OMITMASK
- imraw = (u8 *)&VEC_BUF(filters,i).m;
- # else
- imraw = (u8 *)&ifiltermask;
- # endif
- while (len < sizeof(IFT) && imraw[len] != 0x00) ++len;
- u8 mask = imraw[len-1];
- u8 *ifraw = (u8 *)&VEC_BUF(filters,i).f;
- #endif // INTFILTER
- #ifdef BINFILTER
- size_t len = VEC_BUF(filters,i).len + 1;
- u8 mask = VEC_BUF(filters,i).mask;
- u8 *ifraw = VEC_BUF(filters,i).f;
- #endif // BINFILTER
- #ifdef NEEDBINFILTER
- base32_to(buf0,ifraw,len);
- memcpy(bufx,ifraw,len);
- bufx[len - 1] |= ~mask;
- base32_to(buf1,bufx,len);
- char *a = buf0,*b = buf1;
- while (*a && *a == *b)
- ++a, ++b;
- *a = 0;
- fprintf(stderr,"\t%s\n",buf0);
- #endif // NEEDBINFILTER
- #ifdef PCRE2FILTER
- fprintf(stderr,"\t%s\n",VEC_BUF(filters,i).str);
- #endif // PCRE2FILTER
- }
- fprintf(stderr,"in total, " FSZ " %s\n",l,l == 1 ? "filter" : "filters");
- }
- void filters_add(const char *filter)
- {
- #ifdef NEEDBINFILTER
- struct binfilter bf;
- size_t ret;
- # ifdef INTFILTER
- union intconv {
- IFT i;
- u8 b[sizeof(IFT)];
- } fc,mc;
- # endif
- // skip regex start symbol. we do not support regex tho
- if (*filter == '^')
- ++filter;
- memset(&bf,0,sizeof(bf));
- if (!base32_valid(filter,&ret)) {
- fprintf(stderr,"filter \"%s\" is not valid base32 string\n",filter);
- fprintf(stderr," ");
- while (ret--)
- fputc(' ',stderr);
- fprintf(stderr,"^\n");
- return;
- }
- ret = BASE32_FROM_LEN(ret);
- if (!ret)
- return;
- # ifdef INTFILTER
- size_t maxsz = sizeof(IFT);
- # else
- size_t maxsz = sizeof(bf.f);
- # endif
- if (ret > maxsz) {
- fprintf(stderr,"filter \"%s\" is too long\n",filter);
- fprintf(stderr," ");
- maxsz = (maxsz * 8) / 5;
- while (maxsz--)
- fputc(' ',stderr);
- fprintf(stderr,"^\n");
- return;
- }
- base32_from(bf.f,&bf.mask,filter);
- bf.len = ret - 1;
- # ifdef INTFILTER
- mc.i = 0;
- for (size_t i = 0;i < bf.len;++i)
- mc.b[i] = 0xFF;
- mc.b[bf.len] = bf.mask;
- memcpy(fc.b,bf.f,sizeof(fc.b));
- fc.i &= mc.i;
- struct intfilter ifltr = {
- .f = fc.i,
- # ifndef OMITMASK
- .m = mc.i,
- # endif
- };
- # ifdef OMITMASK
- ifilter_addflatten(&ifltr,mc.i);
- # else // OMITMASK
- VEC_ADD(filters,ifltr);
- # endif // OMITMASK
- # endif // INTFILTER
- # ifdef BINFILTER
- VEC_ADD(filters,bf);
- # endif // BINFILTER
- #endif // NEEDBINFILTER
- #ifdef PCRE2FILTER
- int errornum;
- PCRE2_SIZE erroroffset;
- pcre2_code *re;
- re = pcre2_compile((PCRE2_SPTR8)filter,PCRE2_ZERO_TERMINATED,
- PCRE2_NO_UTF_CHECK | PCRE2_ANCHORED,&errornum,&erroroffset,0);
- if (!re) {
- PCRE2_UCHAR buffer[1024];
- pcre2_get_error_message(errornum,buffer,sizeof(buffer));
- fprintf(stderr,"PCRE2 compilation failed at offset " FSZ ": %s\n",
- (size_t)erroroffset,buffer);
- return;
- }
- // attempt to JIT. ignore error
- (void) pcre2_jit_compile(re,PCRE2_JIT_COMPLETE);
- struct pcre2filter f;
- memset(&f,0,sizeof(f));
- f.re = re;
- size_t fl = strlen(filter) + 1;
- f.str = (char *) malloc(fl);
- if (!f.str)
- abort();
- memcpy(f.str,filter,fl);
- VEC_ADD(filters,f);
- #endif // PCRE2FILTER
- }
- static void filters_prepare(void)
- {
- #ifndef PCRE2FILTER
- if (!quietflag)
- fprintf(stderr,"sorting filters...");
- filter_sort();
- if (wantdedup) {
- if (!quietflag)
- fprintf(stderr," removing duplicates...");
- filters_dedup();
- }
- if (!quietflag)
- fprintf(stderr," done.\n");
- #endif
- }
- static bool loadfilterfile(const char *fname)
- {
- char buf[128];
- FILE *f = fopen(fname,"r");
- if (!f) {
- fprintf(stderr,"failed to load filter file \"%s\": %s\n",fname,strerror(errno));
- return false;
- }
- while (fgets(buf,sizeof(buf),f)) {
- for (char *p = buf;*p;++p) {
- if (*p == '\n') {
- *p = 0;
- break;
- }
- }
- if (*buf && *buf != '#' && memcmp(buf,"//",2) != 0)
- filters_add(buf);
- }
- int fe = ferror(f);
- fclose(f);
- if (fe != 0) {
- fprintf(stderr,"failure while reading filter file \"%s\": %s\n",fname,strerror(fe));
- return false;
- }
- return true;
- }
|