filters_main.inc.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. #include "filters_common.inc.h"
  2. #include "ifilter_bitsum.h"
  3. #ifdef INTFILTER
  4. # ifdef OMITMASK
  5. static inline int filter_compare(const void *p1,const void *p2)
  6. {
  7. if (((const struct intfilter *)p1)->f < ((const struct intfilter *)p2)->f)
  8. return -1;
  9. if (((const struct intfilter *)p1)->f > ((const struct intfilter *)p2)->f)
  10. return 1;
  11. return 0;
  12. }
  13. # ifdef EXPANDMASK
  14. /*
  15. * so we have 2 masks with basically random bits
  16. * we first gonna find where these masks are common
  17. * then we gonna find where new mask has more bits than old
  18. * common areas must be unchanged
  19. * gaps in both must be unchanged
  20. * but new bits must be filled
  21. * therefore, lets just fill old gaps and common areas with 1s
  22. * before add, OR with these 1s
  23. * then perform add. these 1s have property to push positive bits to 0s
  24. * we already know how much new gaps we need to fill, so this wont overflow
  25. * after this addition, AND result with NEG of combined mask, and OR with old value
  26. * this will produce new proper value
  27. * we need to re-fill 1s before every add to keep structure working
  28. */
  29. int flattened = 0;
  30. // add expanded set of values
  31. // allocates space on its own
  32. static void ifilter_addexpanded(
  33. struct intfilter *ifltr,
  34. register IFT newbits,
  35. register IFT notnewbits,
  36. register IFT newbitsum)
  37. {
  38. flattened = 1;
  39. size_t i = VEC_LENGTH(filters);
  40. VEC_ADDN(filters,newbitsum + 1);
  41. register IFT x = ifltr->f;
  42. register IFT y = 0;
  43. for (size_t j = 0;;++j) {
  44. VEC_BUF(filters,i + j).f = x | y;
  45. if (j == newbitsum)
  46. break;
  47. y = ((y | notnewbits) + 1) & newbits;
  48. }
  49. }
  50. // expand existing stuff
  51. // allocates needed stuff on its own
  52. static void ifilter_expand(
  53. register IFT newbits,
  54. register IFT notnewbits,
  55. register IFT newbitsum)
  56. {
  57. flattened = 1;
  58. size_t len = VEC_LENGTH(filters);
  59. VEC_ADDN(filters,newbitsum * len);
  60. size_t esz = newbitsum + 1; // size of expanded elements
  61. for (size_t i = len - 1;;--i) {
  62. register IFT x = VEC_BUF(filters,i).f;
  63. register IFT y = 0;
  64. for (IFT j = 0;;++j) {
  65. VEC_BUF(filters,i * esz + j).f = x | y;
  66. if (j == newbitsum)
  67. break;
  68. y = ((y | notnewbits) + 1) & newbits;
  69. }
  70. if (i == 0)
  71. break;
  72. }
  73. }
  74. static inline void ifilter_addflatten(struct intfilter *ifltr,IFT mask)
  75. {
  76. if (VEC_LENGTH(filters) == 0) {
  77. // simple
  78. VEC_ADD(filters,*ifltr);
  79. ifiltermask = mask;
  80. return;
  81. }
  82. if (ifiltermask == mask) {
  83. // lucky
  84. VEC_ADD(filters,*ifltr);
  85. return;
  86. }
  87. IFT newbits = ifiltermask ^ mask;
  88. IFT notnewbits = ~newbits;
  89. IFT newbitsum = ifilter_bitsum(newbits);
  90. if (ifiltermask > mask) {
  91. // current mask covers more bits
  92. // expand new filter
  93. ifilter_addexpanded(ifltr,newbits,notnewbits,newbitsum);
  94. }
  95. else {
  96. // new filter mask covers more bits
  97. // adjust current mask and expand current filters
  98. ifiltermask = mask;
  99. ifilter_expand(newbits,notnewbits,newbitsum);
  100. VEC_ADD(filters,*ifltr);
  101. }
  102. }
  103. # endif // EXPANDMASK
  104. # else // OMITMASK
  105. /*
  106. * struct intfilter layout: filter,mask
  107. * stuff is compared in big-endian way, so memcmp
  108. * filter needs to be compared first
  109. * if its equal, mask needs to be compared
  110. * memcmp is aplicable there too
  111. * due to struct intfilter layout, it all can be stuffed into one memcmp call
  112. */
  113. static inline int filter_compare(const void *p1,const void *p2)
  114. {
  115. return memcmp(p1,p2,sizeof(struct intfilter));
  116. }
  117. # endif // OMITMASK
  118. static void filter_sort(void)
  119. {
  120. size_t len = VEC_LENGTH(filters);
  121. if (len > 0)
  122. qsort(&VEC_BUF(filters,0),len,sizeof(struct intfilter),&filter_compare);
  123. }
  124. #endif // INTFILTER
  125. #ifdef BINFILTER
  126. static inline int filter_compare(const void *p1,const void *p2)
  127. {
  128. const struct binfilter *b1 = (const struct binfilter *)p1;
  129. const struct binfilter *b2 = (const struct binfilter *)p2;
  130. size_t l = b1->len <= b2->len ? b1->len : b2->len;
  131. int cmp = memcmp(b1->f,b2->f,l);
  132. if (cmp != 0)
  133. return cmp;
  134. if (b1->len < b2->len)
  135. return -1;
  136. if (b1->len > b2->len)
  137. return +1;
  138. u8 cmask = b1->mask & b2->mask;
  139. if ((b1->f[l] & cmask) < (b2->f[l] & cmask))
  140. return -1;
  141. if ((b1->f[l] & cmask) > (b2->f[l] & cmask))
  142. return +1;
  143. if (b1->mask < b2->mask)
  144. return -1;
  145. if (b1->mask > b2->mask)
  146. return +1;
  147. return 0;
  148. }
  149. static void filter_sort(void)
  150. {
  151. size_t len = VEC_LENGTH(filters);
  152. if (len > 0)
  153. qsort(&VEC_BUF(filters,0),len,sizeof(struct binfilter),&filter_compare);
  154. }
  155. #endif // BINFILTER
  156. #ifndef PCRE2FILTER
  157. static inline int filters_a_includes_b(size_t a,size_t b)
  158. {
  159. # ifdef INTFILTER
  160. # ifdef OMITMASK
  161. return VEC_BUF(filters,a).f == VEC_BUF(filters,b).f;
  162. # else // OMITMASK
  163. return VEC_BUF(filters,a).f == (VEC_BUF(filters,b).f & VEC_BUF(filters,a).m);
  164. # endif // OMITMASK
  165. # else // INTFILTER
  166. const struct binfilter *fa = &VEC_BUF(filters,a);
  167. const struct binfilter *fb = &VEC_BUF(filters,b);
  168. if (fa->len > fb->len)
  169. return 0;
  170. size_t l = fa->len;
  171. int cmp = memcmp(fa->f,fb->f,l);
  172. if (cmp != 0)
  173. return 0;
  174. if (fa->len < fb->len)
  175. return 1;
  176. if (fa->mask > fb->mask)
  177. return 0;
  178. return fa->f[l] == (fb->f[l] & fa->mask);
  179. # endif // INTFILTER
  180. }
  181. static void filters_dedup(void)
  182. {
  183. size_t last = ~(size_t)0; // index after last matching element
  184. size_t chk; // element to compare against
  185. size_t st; // start of area to destroy
  186. size_t len = VEC_LENGTH(filters);
  187. for (size_t i = 1;i < len;++i) {
  188. if (last != i) {
  189. if (filters_a_includes_b(i - 1,i)) {
  190. if (last != ~(size_t)0) {
  191. memmove(&VEC_BUF(filters,st),
  192. &VEC_BUF(filters,last),
  193. (i - last) * VEC_ELSIZE(filters));
  194. st += i - last;
  195. }
  196. else
  197. st = i;
  198. chk = i - 1;
  199. last = i + 1;
  200. }
  201. }
  202. else {
  203. if (filters_a_includes_b(chk,i))
  204. last = i + 1;
  205. }
  206. }
  207. if (last != ~(size_t)0) {
  208. memmove(&VEC_BUF(filters,st),
  209. &VEC_BUF(filters,last),
  210. (len - last) * VEC_ELSIZE(filters));
  211. st += len - last;
  212. VEC_SETLENGTH(filters,st);
  213. }
  214. }
  215. #endif // !PCRE2FILTER
  216. static void filters_clean(void)
  217. {
  218. #ifdef PCRE2FILTER
  219. for (size_t i = 0;i < VEC_LENGTH(filters);++i) {
  220. pcre2_code_free(VEC_BUF(filters,i).re);
  221. free(VEC_BUF(filters,i).str);
  222. }
  223. #endif
  224. VEC_FREE(filters);
  225. }
  226. size_t filters_count(void)
  227. {
  228. return VEC_LENGTH(filters);
  229. }
  230. static void filters_print(void)
  231. {
  232. if (quietflag)
  233. return;
  234. size_t i,l;
  235. l = VEC_LENGTH(filters);
  236. if (l)
  237. fprintf(stderr,"filters:\n");
  238. for (i = 0;i < l;++i) {
  239. #ifdef NEEDBINFILTER
  240. char buf0[256],buf1[256];
  241. u8 bufx[128];
  242. #endif
  243. if (!verboseflag && i >= 20) {
  244. size_t notshown = l - i;
  245. fprintf(stderr,"[another " FSZ " %s not shown]\n",
  246. notshown,notshown == 1 ? "filter" : "filters");
  247. break;
  248. }
  249. #ifdef INTFILTER
  250. size_t len = 0;
  251. u8 *imraw;
  252. # ifndef OMITMASK
  253. imraw = (u8 *)&VEC_BUF(filters,i).m;
  254. # else
  255. imraw = (u8 *)&ifiltermask;
  256. # endif
  257. while (len < sizeof(IFT) && imraw[len] != 0x00) ++len;
  258. u8 mask = imraw[len-1];
  259. u8 *ifraw = (u8 *)&VEC_BUF(filters,i).f;
  260. #endif // INTFILTER
  261. #ifdef BINFILTER
  262. size_t len = VEC_BUF(filters,i).len + 1;
  263. u8 mask = VEC_BUF(filters,i).mask;
  264. u8 *ifraw = VEC_BUF(filters,i).f;
  265. #endif // BINFILTER
  266. #ifdef NEEDBINFILTER
  267. base32_to(buf0,ifraw,len);
  268. memcpy(bufx,ifraw,len);
  269. bufx[len - 1] |= ~mask;
  270. base32_to(buf1,bufx,len);
  271. char *a = buf0,*b = buf1;
  272. while (*a && *a == *b)
  273. ++a, ++b;
  274. *a = 0;
  275. fprintf(stderr,"\t%s\n",buf0);
  276. #endif // NEEDBINFILTER
  277. #ifdef PCRE2FILTER
  278. fprintf(stderr,"\t%s\n",VEC_BUF(filters,i).str);
  279. #endif // PCRE2FILTER
  280. }
  281. fprintf(stderr,"in total, " FSZ " %s\n",l,l == 1 ? "filter" : "filters");
  282. }
  283. void filters_add(const char *filter)
  284. {
  285. #ifdef NEEDBINFILTER
  286. struct binfilter bf;
  287. size_t ret;
  288. # ifdef INTFILTER
  289. union intconv {
  290. IFT i;
  291. u8 b[sizeof(IFT)];
  292. } fc,mc;
  293. # endif
  294. // skip regex start symbol. we do not support regex tho
  295. if (*filter == '^')
  296. ++filter;
  297. memset(&bf,0,sizeof(bf));
  298. if (!base32_valid(filter,&ret)) {
  299. fprintf(stderr,"filter \"%s\" is not valid base32 string\n",filter);
  300. fprintf(stderr," ");
  301. while (ret--)
  302. fputc(' ',stderr);
  303. fprintf(stderr,"^\n");
  304. return;
  305. }
  306. ret = BASE32_FROM_LEN(ret);
  307. if (!ret)
  308. return;
  309. # ifdef INTFILTER
  310. size_t maxsz = sizeof(IFT);
  311. # else
  312. size_t maxsz = sizeof(bf.f);
  313. # endif
  314. if (ret > maxsz) {
  315. fprintf(stderr,"filter \"%s\" is too long\n",filter);
  316. fprintf(stderr," ");
  317. maxsz = (maxsz * 8) / 5;
  318. while (maxsz--)
  319. fputc(' ',stderr);
  320. fprintf(stderr,"^\n");
  321. return;
  322. }
  323. base32_from(bf.f,&bf.mask,filter);
  324. bf.len = ret - 1;
  325. # ifdef INTFILTER
  326. mc.i = 0;
  327. for (size_t i = 0;i < bf.len;++i)
  328. mc.b[i] = 0xFF;
  329. mc.b[bf.len] = bf.mask;
  330. memcpy(fc.b,bf.f,sizeof(fc.b));
  331. fc.i &= mc.i;
  332. struct intfilter ifltr = {
  333. .f = fc.i,
  334. # ifndef OMITMASK
  335. .m = mc.i,
  336. # endif
  337. };
  338. # ifdef OMITMASK
  339. ifilter_addflatten(&ifltr,mc.i);
  340. # else // OMITMASK
  341. VEC_ADD(filters,ifltr);
  342. # endif // OMITMASK
  343. # endif // INTFILTER
  344. # ifdef BINFILTER
  345. VEC_ADD(filters,bf);
  346. # endif // BINFILTER
  347. #endif // NEEDBINFILTER
  348. #ifdef PCRE2FILTER
  349. int errornum;
  350. PCRE2_SIZE erroroffset;
  351. pcre2_code *re;
  352. re = pcre2_compile((PCRE2_SPTR8)filter,PCRE2_ZERO_TERMINATED,
  353. PCRE2_NO_UTF_CHECK | PCRE2_ANCHORED,&errornum,&erroroffset,0);
  354. if (!re) {
  355. PCRE2_UCHAR buffer[1024];
  356. pcre2_get_error_message(errornum,buffer,sizeof(buffer));
  357. fprintf(stderr,"PCRE2 compilation failed at offset " FSZ ": %s\n",
  358. (size_t)erroroffset,buffer);
  359. return;
  360. }
  361. // attempt to JIT. ignore error
  362. (void) pcre2_jit_compile(re,PCRE2_JIT_COMPLETE);
  363. struct pcre2filter f;
  364. memset(&f,0,sizeof(f));
  365. f.re = re;
  366. size_t fl = strlen(filter) + 1;
  367. f.str = (char *) malloc(fl);
  368. if (!f.str)
  369. abort();
  370. memcpy(f.str,filter,fl);
  371. VEC_ADD(filters,f);
  372. #endif // PCRE2FILTER
  373. }
  374. static void filters_prepare(void)
  375. {
  376. #ifndef PCRE2FILTER
  377. if (!quietflag)
  378. fprintf(stderr,"sorting filters...");
  379. filter_sort();
  380. if (wantdedup) {
  381. if (!quietflag)
  382. fprintf(stderr," removing duplicates...");
  383. filters_dedup();
  384. }
  385. if (!quietflag)
  386. fprintf(stderr," done.\n");
  387. #endif
  388. }
  389. static bool loadfilterfile(const char *fname)
  390. {
  391. char buf[128];
  392. FILE *f = fopen(fname,"r");
  393. if (!f) {
  394. fprintf(stderr,"failed to load filter file \"%s\": %s\n",fname,strerror(errno));
  395. return false;
  396. }
  397. while (fgets(buf,sizeof(buf),f)) {
  398. for (char *p = buf;*p;++p) {
  399. if (*p == '\n') {
  400. *p = 0;
  401. break;
  402. }
  403. }
  404. if (*buf && *buf != '#' && memcmp(buf,"//",2) != 0)
  405. filters_add(buf);
  406. }
  407. int fe = ferror(f);
  408. fclose(f);
  409. if (fe != 0) {
  410. fprintf(stderr,"failure while reading filter file \"%s\": %s\n",fname,strerror(fe));
  411. return false;
  412. }
  413. return true;
  414. }