mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-26 08:28:13 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			841 lines
		
	
	
	
		
			23 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			841 lines
		
	
	
	
		
			23 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* -*- c-basic-offset: 2 -*- */
 | |
| /* Copyright(C) 2009-2014 Brazil
 | |
| 
 | |
|   This library is free software; you can redistribute it and/or
 | |
|   modify it under the terms of the GNU Lesser General Public
 | |
|   License version 2.1 as published by the Free Software Foundation.
 | |
| 
 | |
|   This library is distributed in the hope that it will be useful,
 | |
|   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|   Lesser General Public License for more details.
 | |
| 
 | |
|   You should have received a copy of the GNU Lesser General Public
 | |
|   License along with this library; if not, write to the Free Software
 | |
|   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
 | |
| */
 | |
| #include "grn.h"
 | |
| #include <string.h>
 | |
| #include <stddef.h>
 | |
| #include "grn_snip.h"
 | |
| #include "grn_ctx.h"
 | |
| 
 | |
| #if !defined MAX
 | |
| #define MAX(a, b) ((a) > (b) ? (a) : (b))
 | |
| #endif
 | |
| 
 | |
| #if !defined MIN
 | |
| #define MIN(a, b) ((a) < (b) ? (a) : (b))
 | |
| #endif
 | |
| 
 | |
| static int
 | |
| grn_bm_check_euc(const unsigned char *x, const size_t y)
 | |
| {
 | |
|   const unsigned char *p;
 | |
|   for (p = x + y - 1; p >= x && *p >= 0x80U; p--);
 | |
|   return (int) ((x + y - p) & 1);
 | |
| }
 | |
| 
 | |
| static int
 | |
| grn_bm_check_sjis(const unsigned char *x, const size_t y)
 | |
| {
 | |
|   const unsigned char *p;
 | |
|   for (p = x + y - 1; p >= x; p--)
 | |
|     if ((*p < 0x81U) || (*p > 0x9fU && *p < 0xe0U) || (*p > 0xfcU))
 | |
|       break;
 | |
|   return (int) ((x + y - p) & 1);
 | |
| }
 | |
| 
 | |
| /*
 | |
| static void
 | |
| grn_bm_suffixes(const unsigned char *x, size_t m, size_t *suff)
 | |
| {
 | |
|   size_t f, g;
 | |
|   intptr_t i;
 | |
|   f = 0;
 | |
|   suff[m - 1] = m;
 | |
|   g = m - 1;
 | |
|   for (i = m - 2; i >= 0; --i) {
 | |
|     if (i > (intptr_t) g && suff[i + m - 1 - f] < i - g)
 | |
|       suff[i] = suff[i + m - 1 - f];
 | |
|     else {
 | |
|       if (i < (intptr_t) g)
 | |
|         g = i;
 | |
|       f = i;
 | |
|       while (g > 0 && x[g] == x[g + m - 1 - f])
 | |
|         --g;
 | |
|       suff[i] = f - g;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| */
 | |
| 
 | |
| static void
 | |
| grn_bm_preBmBc(const unsigned char *x, size_t m, size_t *bmBc)
 | |
| {
 | |
|   size_t i;
 | |
|   for (i = 0; i < ASIZE; ++i) {
 | |
|     bmBc[i] = m;
 | |
|   }
 | |
|   for (i = 0; i < m - 1; ++i) {
 | |
|     bmBc[(unsigned int) x[i]] = m - (i + 1);
 | |
|   }
 | |
| }
 | |
| 
 | |
| #define GRN_BM_COMPARE do { \
 | |
|   if (string_checks[found]) { \
 | |
|     size_t offset = cond->last_offset, found_alpha_head = cond->found_alpha_head; \
 | |
|     /* calc real offset */\
 | |
|     for (i = cond->last_found; i < found; i++) { \
 | |
|       if (string_checks[i] > 0) { \
 | |
|         found_alpha_head = i; \
 | |
|         offset += string_checks[i]; \
 | |
|       } \
 | |
|     } \
 | |
|     /* if real offset is in a character, move it the head of the character */ \
 | |
|     if (string_checks[found] < 0) { \
 | |
|       offset -= string_checks[found_alpha_head]; \
 | |
|       cond->last_found = found_alpha_head; \
 | |
|     } else { \
 | |
|       cond->last_found = found; \
 | |
|     } \
 | |
|     cond->start_offset = cond->last_offset = offset; \
 | |
|     if (flags & GRN_SNIP_SKIP_LEADING_SPACES) { \
 | |
|       while (cond->start_offset < string_original_length_in_bytes && \
 | |
|              (i = grn_isspace(string_original + cond->start_offset, \
 | |
|                               string_encoding))) { cond->start_offset += i; } \
 | |
|     } \
 | |
|     for (i = cond->last_found; i < found + m; i++) { \
 | |
|       if (string_checks[i] > 0) { \
 | |
|         offset += string_checks[i]; \
 | |
|       } \
 | |
|     } \
 | |
|     cond->end_offset = offset; \
 | |
|     cond->found = found + shift; \
 | |
|     cond->found_alpha_head = found_alpha_head; \
 | |
|     /* printf("bm: cond:%p found:%zd last_found:%zd st_off:%zd ed_off:%zd\n", cond, cond->found,cond->last_found,cond->start_offset,cond->end_offset); */ \
 | |
|     return; \
 | |
|   } \
 | |
| } while (0)
 | |
| 
 | |
| #define GRN_BM_BM_COMPARE do { \
 | |
|   if (p[-2] == ck) { \
 | |
|     for (i = 3; i <= m && p[-(intptr_t)i] == cp[-(intptr_t)i]; ++i) { \
 | |
|     } \
 | |
|     if (i > m) { \
 | |
|       found = p - y - m; \
 | |
|       GRN_BM_COMPARE; \
 | |
|     } \
 | |
|   } \
 | |
| } while (0)
 | |
| 
 | |
| void
 | |
| grn_bm_tunedbm(grn_ctx *ctx, snip_cond *cond, grn_obj *string, int flags)
 | |
| {
 | |
|   register unsigned char *limit, ck;
 | |
|   register const unsigned char *p, *cp;
 | |
|   register size_t *bmBc, delta1, i;
 | |
| 
 | |
|   const unsigned char *x;
 | |
|   unsigned char *y;
 | |
|   size_t shift, found;
 | |
| 
 | |
|   const char *string_original;
 | |
|   unsigned int string_original_length_in_bytes;
 | |
|   const short *string_checks;
 | |
|   grn_encoding string_encoding;
 | |
|   const char *string_norm, *keyword_norm;
 | |
|   unsigned int n, m;
 | |
| 
 | |
|   grn_string_get_original(ctx, string,
 | |
|                           &string_original, &string_original_length_in_bytes);
 | |
|   string_checks = grn_string_get_checks(ctx, string);
 | |
|   string_encoding = grn_string_get_encoding(ctx, string);
 | |
|   grn_string_get_normalized(ctx, string, &string_norm, &n, NULL);
 | |
|   grn_string_get_normalized(ctx, cond->keyword, &keyword_norm, &m, NULL);
 | |
| 
 | |
|   y = (unsigned char *)string_norm;
 | |
|   if (m == 1) {
 | |
|     if (n > cond->found) {
 | |
|       shift = 1;
 | |
|       p = memchr(y + cond->found, keyword_norm[0], n - cond->found);
 | |
|       if (p != NULL) {
 | |
|         found = p - y;
 | |
|         GRN_BM_COMPARE;
 | |
|       }
 | |
|     }
 | |
|     cond->stopflag = SNIPCOND_STOP;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   x = (unsigned char *)keyword_norm;
 | |
|   bmBc = cond->bmBc;
 | |
|   shift = cond->shift;
 | |
| 
 | |
|   /* Restart */
 | |
|   p = y + m + cond->found;
 | |
|   cp = x + m;
 | |
|   ck = cp[-2];
 | |
| 
 | |
|   /* 12 means 1(initial offset) + 10 (in loop) + 1 (shift) */
 | |
|   if (n - cond->found > 12 * m) {
 | |
|     limit = y + n - 11 * m;
 | |
|     while (p <= limit) {
 | |
|       p += bmBc[p[-1]];
 | |
|       if(!(delta1 = bmBc[p[-1]])) {
 | |
|         goto check;
 | |
|       }
 | |
|       p += delta1;
 | |
|       p += bmBc[p[-1]];
 | |
|       p += bmBc[p[-1]];
 | |
|       if(!(delta1 = bmBc[p[-1]])) {
 | |
|         goto check;
 | |
|       }
 | |
|       p += delta1;
 | |
|       p += bmBc[p[-1]];
 | |
|       p += bmBc[p[-1]];
 | |
|       if(!(delta1 = bmBc[p[-1]])) {
 | |
|         goto check;
 | |
|       }
 | |
|       p += delta1;
 | |
|       p += bmBc[p[-1]];
 | |
|       p += bmBc[p[-1]];
 | |
|       continue;
 | |
|     check:
 | |
|       GRN_BM_BM_COMPARE;
 | |
|       p += shift;
 | |
|     }
 | |
|   }
 | |
|   /* limit check + search */
 | |
|   limit = y + n;
 | |
|   while(p <= limit) {
 | |
|     if (!(delta1 = bmBc[p[-1]])) {
 | |
|       GRN_BM_BM_COMPARE;
 | |
|       p += shift;
 | |
|     }
 | |
|     p += delta1;
 | |
|   }
 | |
|   cond->stopflag = SNIPCOND_STOP;
 | |
| }
 | |
| 
 | |
| static size_t
 | |
| count_mapped_chars(const char *str, const char *end)
 | |
| {
 | |
|   const char *p;
 | |
|   size_t dl;
 | |
| 
 | |
|   dl = 0;
 | |
|   for (p = str; p != end; p++) {
 | |
|     switch (*p) {
 | |
|     case '<':
 | |
|     case '>':
 | |
|       dl += 4;                  /* < or > */
 | |
|       break;
 | |
|     case '&':
 | |
|       dl += 5;                  /* & */
 | |
|       break;
 | |
|     case '"':
 | |
|       dl += 6;                  /* " */
 | |
|       break;
 | |
|     default:
 | |
|       dl++;
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
|   return dl;
 | |
| }
 | |
| 
 | |
| grn_rc
 | |
| grn_snip_cond_close(grn_ctx *ctx, snip_cond *cond)
 | |
| {
 | |
|   if (!cond) {
 | |
|     return GRN_INVALID_ARGUMENT;
 | |
|   }
 | |
|   if (cond->keyword) {
 | |
|     grn_obj_close(ctx, cond->keyword);
 | |
|   }
 | |
|   return GRN_SUCCESS;
 | |
| }
 | |
| 
 | |
| grn_rc
 | |
| grn_snip_cond_init(grn_ctx *ctx, snip_cond *sc, const char *keyword, unsigned int keyword_len,
 | |
|                    grn_encoding enc, grn_obj *normalizer, int flags)
 | |
| {
 | |
|   const char *norm;
 | |
|   unsigned int norm_blen;
 | |
|   int f = GRN_STR_REMOVEBLANK;
 | |
|   memset(sc, 0, sizeof(snip_cond));
 | |
|   if (!(sc->keyword = grn_string_open(ctx, keyword, keyword_len,
 | |
|                                       normalizer, f))) {
 | |
|     GRN_LOG(ctx, GRN_LOG_ALERT,
 | |
|             "grn_string_open on snip_cond_init failed!");
 | |
|     return GRN_NO_MEMORY_AVAILABLE;
 | |
|   }
 | |
|   grn_string_get_normalized(ctx, sc->keyword, &norm, &norm_blen, NULL);
 | |
|   if (!norm_blen) {
 | |
|     grn_snip_cond_close(ctx, sc);
 | |
|     return GRN_INVALID_ARGUMENT;
 | |
|   }
 | |
|   if (norm_blen != 1) {
 | |
|     grn_bm_preBmBc((unsigned char *)norm, norm_blen, sc->bmBc);
 | |
|     sc->shift = sc->bmBc[(unsigned char)norm[norm_blen - 1]];
 | |
|     sc->bmBc[(unsigned char)norm[norm_blen - 1]] = 0;
 | |
|   }
 | |
|   return GRN_SUCCESS;
 | |
| }
 | |
| 
 | |
| void
 | |
| grn_snip_cond_reinit(snip_cond *cond)
 | |
| {
 | |
|   cond->found = 0;
 | |
|   cond->last_found = 0;
 | |
|   cond->last_offset = 0;
 | |
|   cond->start_offset = 0;
 | |
|   cond->end_offset = 0;
 | |
| 
 | |
|   cond->count = 0;
 | |
|   cond->stopflag = SNIPCOND_NONSTOP;
 | |
| }
 | |
| 
 | |
| inline static char *
 | |
| grn_snip_strndup(grn_ctx *ctx, const char *string, unsigned int string_len)
 | |
| {
 | |
|    char *copied_string;
 | |
| 
 | |
|    copied_string = GRN_MALLOC(string_len + 1);
 | |
|    if (!copied_string) {
 | |
|      return NULL;
 | |
|    }
 | |
|    grn_memcpy(copied_string, string, string_len);
 | |
|    copied_string[string_len]= '\0'; /* not required, but for ql use */
 | |
|    return copied_string;
 | |
| }
 | |
| 
 | |
| inline static grn_rc
 | |
| grn_snip_cond_set_tag(grn_ctx *ctx,
 | |
|                       const char **dest_tag, size_t *dest_tag_len,
 | |
|                       const char *tag, unsigned int tag_len,
 | |
|                       const char *default_tag, unsigned int default_tag_len,
 | |
|                       int copy_tag)
 | |
| {
 | |
|   if (tag) {
 | |
|     if (copy_tag) {
 | |
|       char *copied_tag;
 | |
|       copied_tag = grn_snip_strndup(ctx, tag, tag_len);
 | |
|       if (!copied_tag) {
 | |
|         return GRN_NO_MEMORY_AVAILABLE;
 | |
|       }
 | |
|       *dest_tag = copied_tag;
 | |
|     } else {
 | |
|       *dest_tag = tag;
 | |
|     }
 | |
|     *dest_tag_len = tag_len;
 | |
|   } else {
 | |
|     *dest_tag = default_tag;
 | |
|     *dest_tag_len = default_tag_len;
 | |
|   }
 | |
|   return GRN_SUCCESS;
 | |
| }
 | |
| 
 | |
| grn_rc
 | |
| grn_snip_set_normalizer(grn_ctx *ctx, grn_obj *snip,
 | |
|                         grn_obj *normalizer)
 | |
| {
 | |
|   grn_snip *snip_;
 | |
|   if (!snip) {
 | |
|     return GRN_INVALID_ARGUMENT;
 | |
|   }
 | |
| 
 | |
|   snip_ = (grn_snip *)snip;
 | |
|   snip_->normalizer = normalizer;
 | |
|   return GRN_SUCCESS;
 | |
| }
 | |
| 
 | |
| grn_obj *
 | |
| grn_snip_get_normalizer(grn_ctx *ctx, grn_obj *snip)
 | |
| {
 | |
|   grn_snip *snip_;
 | |
| 
 | |
|   if (!snip) {
 | |
|     return NULL;
 | |
|   }
 | |
| 
 | |
|   snip_ = (grn_snip *)snip;
 | |
|   return snip_->normalizer;
 | |
| }
 | |
| 
 | |
| grn_rc
 | |
| grn_snip_add_cond(grn_ctx *ctx, grn_obj *snip,
 | |
|                   const char *keyword, unsigned int keyword_len,
 | |
|                   const char *opentag, unsigned int opentag_len,
 | |
|                   const char *closetag, unsigned int closetag_len)
 | |
| {
 | |
|   grn_rc rc;
 | |
|   int copy_tag;
 | |
|   snip_cond *cond;
 | |
|   unsigned int norm_blen;
 | |
|   grn_snip *snip_;
 | |
| 
 | |
|   snip_ = (grn_snip *)snip;
 | |
|   if (!snip_ || !keyword || !keyword_len || snip_->cond_len >= MAX_SNIP_COND_COUNT) {
 | |
|     return GRN_INVALID_ARGUMENT;
 | |
|   }
 | |
| 
 | |
|   cond = snip_->cond + snip_->cond_len;
 | |
|   if ((rc = grn_snip_cond_init(ctx, cond, keyword, keyword_len,
 | |
|                                snip_->encoding, snip_->normalizer, snip_->flags))) {
 | |
|     return rc;
 | |
|   }
 | |
|   grn_string_get_normalized(ctx, cond->keyword, NULL, &norm_blen, NULL);
 | |
|   if (norm_blen > snip_->width) {
 | |
|     grn_snip_cond_close(ctx, cond);
 | |
|     return GRN_INVALID_ARGUMENT;
 | |
|   }
 | |
| 
 | |
|   copy_tag = snip_->flags & GRN_SNIP_COPY_TAG;
 | |
|   rc = grn_snip_cond_set_tag(ctx,
 | |
|                              &(cond->opentag), &(cond->opentag_len),
 | |
|                              opentag, opentag_len,
 | |
|                              snip_->defaultopentag, snip_->defaultopentag_len,
 | |
|                              copy_tag);
 | |
|   if (rc) {
 | |
|     grn_snip_cond_close(ctx, cond);
 | |
|     return rc;
 | |
|   }
 | |
| 
 | |
|   rc = grn_snip_cond_set_tag(ctx,
 | |
|                              &(cond->closetag), &(cond->closetag_len),
 | |
|                              closetag, closetag_len,
 | |
|                              snip_->defaultclosetag, snip_->defaultclosetag_len,
 | |
|                              copy_tag);
 | |
|   if (rc) {
 | |
|     if (opentag && copy_tag) {
 | |
|       GRN_FREE((void *)cond->opentag);
 | |
|     }
 | |
|     grn_snip_cond_close(ctx, cond);
 | |
|     return rc;
 | |
|   }
 | |
| 
 | |
|   snip_->cond_len++;
 | |
|   return GRN_SUCCESS;
 | |
| }
 | |
| 
 | |
| static size_t
 | |
| grn_snip_find_firstbyte(const char *string, grn_encoding encoding, size_t offset,
 | |
|                         size_t doffset)
 | |
| {
 | |
|   switch (encoding) {
 | |
|   case GRN_ENC_EUC_JP:
 | |
|     while (!(grn_bm_check_euc((unsigned char *) string, offset)))
 | |
|       offset += doffset;
 | |
|     break;
 | |
|   case GRN_ENC_SJIS:
 | |
|     if (!(grn_bm_check_sjis((unsigned char *) string, offset)))
 | |
|       offset += doffset;
 | |
|     break;
 | |
|   case GRN_ENC_UTF8:
 | |
|     while ((signed char)string[offset] <= (signed char)0xc0)
 | |
|       offset += doffset;
 | |
|     break;
 | |
|   default:
 | |
|     break;
 | |
|   }
 | |
|   return offset;
 | |
| }
 | |
| 
 | |
| inline static grn_rc
 | |
| grn_snip_set_default_tag(grn_ctx *ctx,
 | |
|                          const char **dest_tag, size_t *dest_tag_len,
 | |
|                          const char *tag, unsigned int tag_len,
 | |
|                          int copy_tag)
 | |
| {
 | |
|   if (copy_tag && tag) {
 | |
|     char *copied_tag;
 | |
|     copied_tag = grn_snip_strndup(ctx, tag, tag_len);
 | |
|     if (!copied_tag) {
 | |
|       return GRN_NO_MEMORY_AVAILABLE;
 | |
|     }
 | |
|     *dest_tag = copied_tag;
 | |
|   } else {
 | |
|     *dest_tag = tag;
 | |
|   }
 | |
|   *dest_tag_len = tag_len;
 | |
|   return GRN_SUCCESS;
 | |
| }
 | |
| 
 | |
| grn_obj *
 | |
| grn_snip_open(grn_ctx *ctx, int flags, unsigned int width,
 | |
|               unsigned int max_results,
 | |
|               const char *defaultopentag, unsigned int defaultopentag_len,
 | |
|               const char *defaultclosetag, unsigned int defaultclosetag_len,
 | |
|               grn_snip_mapping *mapping)
 | |
| {
 | |
|   int copy_tag;
 | |
|   grn_snip *ret = NULL;
 | |
|   if (!(ret = GRN_MALLOC(sizeof(grn_snip)))) {
 | |
|     GRN_LOG(ctx, GRN_LOG_ALERT, "grn_snip allocation failed on grn_snip_open");
 | |
|     return NULL;
 | |
|   }
 | |
|   if (max_results > MAX_SNIP_RESULT_COUNT || max_results == 0) {
 | |
|     GRN_LOG(ctx, GRN_LOG_WARNING, "max_results is invalid on grn_snip_open");
 | |
|     GRN_FREE(ret);
 | |
|     return NULL;
 | |
|   }
 | |
|   GRN_API_ENTER;
 | |
|   ret->encoding = ctx->encoding;
 | |
|   ret->flags = flags;
 | |
|   ret->width = width;
 | |
|   ret->max_results = max_results;
 | |
|   ret->defaultopentag = NULL;
 | |
|   ret->defaultclosetag = NULL;
 | |
| 
 | |
|   copy_tag = flags & GRN_SNIP_COPY_TAG;
 | |
|   if (grn_snip_set_default_tag(ctx,
 | |
|                                &(ret->defaultopentag),
 | |
|                                &(ret->defaultopentag_len),
 | |
|                                defaultopentag, defaultopentag_len,
 | |
|                                copy_tag)) {
 | |
|     GRN_FREE(ret);
 | |
|     GRN_API_RETURN(NULL);
 | |
|   }
 | |
| 
 | |
|   if (grn_snip_set_default_tag(ctx,
 | |
|                                &(ret->defaultclosetag),
 | |
|                                &(ret->defaultclosetag_len),
 | |
|                                defaultclosetag, defaultclosetag_len,
 | |
|                                copy_tag)) {
 | |
|     if (copy_tag && ret->defaultopentag) {
 | |
|       GRN_FREE((void *)ret->defaultopentag);
 | |
|     }
 | |
|     GRN_FREE(ret);
 | |
|     GRN_API_RETURN(NULL);
 | |
|   }
 | |
| 
 | |
|   ret->cond_len = 0;
 | |
|   ret->mapping = mapping;
 | |
|   ret->nstr = NULL;
 | |
|   ret->tag_count = 0;
 | |
|   ret->snip_count = 0;
 | |
|   if (ret->flags & GRN_SNIP_NORMALIZE) {
 | |
|     ret->normalizer = GRN_NORMALIZER_AUTO;
 | |
|   } else {
 | |
|     ret->normalizer = NULL;
 | |
|   }
 | |
| 
 | |
|   GRN_DB_OBJ_SET_TYPE(ret, GRN_SNIP);
 | |
|   {
 | |
|     grn_obj *db;
 | |
|     grn_id id;
 | |
|     db = grn_ctx_db(ctx);
 | |
|     id = grn_obj_register(ctx, db, NULL, 0);
 | |
|     DB_OBJ(ret)->header.domain = GRN_ID_NIL;
 | |
|     DB_OBJ(ret)->range = GRN_ID_NIL;
 | |
|     grn_db_obj_init(ctx, db, id, DB_OBJ(ret));
 | |
|   }
 | |
| 
 | |
|   GRN_API_RETURN((grn_obj *)ret);
 | |
| }
 | |
| 
 | |
| static grn_rc
 | |
| exec_clean(grn_ctx *ctx, grn_snip *snip)
 | |
| {
 | |
|   snip_cond *cond, *cond_end;
 | |
|   if (snip->nstr) {
 | |
|     grn_obj_close(ctx, snip->nstr);
 | |
|     snip->nstr = NULL;
 | |
|   }
 | |
|   snip->tag_count = 0;
 | |
|   snip->snip_count = 0;
 | |
|   for (cond = snip->cond, cond_end = cond + snip->cond_len;
 | |
|        cond < cond_end; cond++) {
 | |
|     grn_snip_cond_reinit(cond);
 | |
|   }
 | |
|   return GRN_SUCCESS;
 | |
| }
 | |
| 
 | |
| grn_rc
 | |
| grn_snip_close(grn_ctx *ctx, grn_snip *snip)
 | |
| {
 | |
|   snip_cond *cond, *cond_end;
 | |
|   if (!snip) { return GRN_INVALID_ARGUMENT; }
 | |
|   GRN_API_ENTER;
 | |
|   if (snip->flags & GRN_SNIP_COPY_TAG) {
 | |
|     int i;
 | |
|     snip_cond *sc;
 | |
|     const char *dot = snip->defaultopentag, *dct = snip->defaultclosetag;
 | |
|     for (i = snip->cond_len, sc = snip->cond; i; i--, sc++) {
 | |
|       if (sc->opentag != dot) { GRN_FREE((void *)sc->opentag); }
 | |
|       if (sc->closetag != dct) { GRN_FREE((void *)sc->closetag); }
 | |
|     }
 | |
|     if (dot) { GRN_FREE((void *)dot); }
 | |
|     if (dct) { GRN_FREE((void *)dct); }
 | |
|   }
 | |
|   if (snip->nstr) {
 | |
|     grn_obj_close(ctx, snip->nstr);
 | |
|   }
 | |
|   for (cond = snip->cond, cond_end = cond + snip->cond_len;
 | |
|        cond < cond_end; cond++) {
 | |
|     grn_snip_cond_close(ctx, cond);
 | |
|   }
 | |
|   GRN_FREE(snip);
 | |
|   GRN_API_RETURN(GRN_SUCCESS);
 | |
| }
 | |
| 
 | |
| grn_rc
 | |
| grn_snip_exec(grn_ctx *ctx, grn_obj *snip, const char *string, unsigned int string_len,
 | |
|               unsigned int *nresults, unsigned int *max_tagged_len)
 | |
| {
 | |
|   size_t i;
 | |
|   grn_snip *snip_;
 | |
|   int f = GRN_STR_WITH_CHECKS|GRN_STR_REMOVEBLANK;
 | |
|   if (!snip || !string || !nresults || !max_tagged_len) {
 | |
|     return GRN_INVALID_ARGUMENT;
 | |
|   }
 | |
|   GRN_API_ENTER;
 | |
|   snip_ = (grn_snip *)snip;
 | |
|   exec_clean(ctx, snip_);
 | |
|   *nresults = 0;
 | |
|   snip_->nstr = grn_string_open(ctx, string, string_len, snip_->normalizer, f);
 | |
|   if (!snip_->nstr) {
 | |
|     exec_clean(ctx, snip_);
 | |
|     GRN_LOG(ctx, GRN_LOG_ALERT, "grn_string_open on grn_snip_exec failed !");
 | |
|     GRN_API_RETURN(ctx->rc);
 | |
|   }
 | |
|   for (i = 0; i < snip_->cond_len; i++) {
 | |
|     grn_bm_tunedbm(ctx, snip_->cond + i, snip_->nstr, snip_->flags);
 | |
|   }
 | |
| 
 | |
|   {
 | |
|     _snip_tag_result *tag_result = snip_->tag_result;
 | |
|     _snip_result *snip_result = snip_->snip_result;
 | |
|     size_t last_end_offset = 0, last_last_end_offset = 0;
 | |
|     unsigned int unfound_cond_count = snip_->cond_len;
 | |
| 
 | |
|     *max_tagged_len = 0;
 | |
|     while (1) {
 | |
|       size_t tagged_len = 0, last_tag_end = 0;
 | |
|       int_least8_t all_stop = 1, found_cond = 0;
 | |
|       snip_result->tag_count = 0;
 | |
| 
 | |
|       while (1) {
 | |
|         size_t min_start_offset = (size_t) -1;
 | |
|         size_t max_end_offset = 0;
 | |
|         snip_cond *cond = NULL;
 | |
| 
 | |
|         /* get condition which have minimum offset and is not stopped */
 | |
|         for (i = 0; i < snip_->cond_len; i++) {
 | |
|           if (snip_->cond[i].stopflag == SNIPCOND_NONSTOP &&
 | |
|               (min_start_offset > snip_->cond[i].start_offset ||
 | |
|                (min_start_offset == snip_->cond[i].start_offset &&
 | |
|                 max_end_offset < snip_->cond[i].end_offset))) {
 | |
|             min_start_offset = snip_->cond[i].start_offset;
 | |
|             max_end_offset = snip_->cond[i].end_offset;
 | |
|             cond = &snip_->cond[i];
 | |
|           }
 | |
|         }
 | |
|         if (!cond) {
 | |
|           break;
 | |
|         }
 | |
|         /* check whether condtion is the first condition in snippet */
 | |
|         if (snip_result->tag_count == 0) {
 | |
|           /* skip condition if the number of rest snippet field is smaller than */
 | |
|           /* the number of unfound keywords. */
 | |
|           if (snip_->max_results - *nresults <= unfound_cond_count && cond->count > 0) {
 | |
|             int_least8_t exclude_other_cond = 1;
 | |
|             for (i = 0; i < snip_->cond_len; i++) {
 | |
|               if ((snip_->cond + i) != cond
 | |
|                   && snip_->cond[i].end_offset <= cond->start_offset + snip_->width
 | |
|                   && snip_->cond[i].count == 0) {
 | |
|                 exclude_other_cond = 0;
 | |
|               }
 | |
|             }
 | |
|             if (exclude_other_cond) {
 | |
|               grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags);
 | |
|               continue;
 | |
|             }
 | |
|           }
 | |
|           snip_result->start_offset = cond->start_offset;
 | |
|           snip_result->first_tag_result_idx = snip_->tag_count;
 | |
|         } else {
 | |
|           if (cond->start_offset >= snip_result->start_offset + snip_->width) {
 | |
|             break;
 | |
|           }
 | |
|           /* check nesting to make valid HTML */
 | |
|           /* ToDo: allow <test><te>te</te><st>st</st></test> */
 | |
|           if (cond->start_offset < last_tag_end) {
 | |
|             grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags);
 | |
|             continue;
 | |
|           }
 | |
|         }
 | |
|         if (cond->end_offset > snip_result->start_offset + snip_->width) {
 | |
|           /* If a keyword gets across a snippet, */
 | |
|           /* it was skipped and never to be tagged. */
 | |
|           cond->stopflag = SNIPCOND_ACROSS;
 | |
|           grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags);
 | |
|         } else {
 | |
|           found_cond = 1;
 | |
|           if (cond->count == 0) {
 | |
|             unfound_cond_count--;
 | |
|           }
 | |
|           cond->count++;
 | |
|           last_end_offset = cond->end_offset;
 | |
| 
 | |
|           tag_result->cond = cond;
 | |
|           tag_result->start_offset = cond->start_offset;
 | |
|           tag_result->end_offset = last_tag_end = cond->end_offset;
 | |
| 
 | |
|           snip_result->tag_count++;
 | |
|           tag_result++;
 | |
|           tagged_len += cond->opentag_len + cond->closetag_len;
 | |
|           if (++snip_->tag_count >= MAX_SNIP_TAG_COUNT) {
 | |
|             break;
 | |
|           }
 | |
|           grn_bm_tunedbm(ctx, cond, snip_->nstr, snip_->flags);
 | |
|         }
 | |
|       }
 | |
|       if (!found_cond) {
 | |
|         break;
 | |
|       }
 | |
|       if (snip_result->start_offset + last_end_offset < snip_->width) {
 | |
|         snip_result->start_offset = 0;
 | |
|       } else {
 | |
|         snip_result->start_offset =
 | |
|           MAX(MIN
 | |
|               ((snip_result->start_offset + last_end_offset - snip_->width) / 2,
 | |
|                string_len - snip_->width), last_last_end_offset);
 | |
|       }
 | |
|       snip_result->start_offset =
 | |
|         grn_snip_find_firstbyte(string, snip_->encoding, snip_result->start_offset, 1);
 | |
| 
 | |
|       snip_result->end_offset = snip_result->start_offset + snip_->width;
 | |
|       if (snip_result->end_offset < string_len) {
 | |
|         snip_result->end_offset =
 | |
|           grn_snip_find_firstbyte(string, snip_->encoding, snip_result->end_offset, -1);
 | |
|       } else {
 | |
|         snip_result->end_offset = string_len;
 | |
|       }
 | |
|       last_last_end_offset = snip_result->end_offset;
 | |
| 
 | |
|       if (snip_->mapping == (grn_snip_mapping *) -1) {
 | |
|         tagged_len +=
 | |
|           count_mapped_chars(&string[snip_result->start_offset],
 | |
|                              &string[snip_result->end_offset]) + 1;
 | |
|       } else {
 | |
|         tagged_len += snip_result->end_offset - snip_result->start_offset + 1;
 | |
|       }
 | |
| 
 | |
|       *max_tagged_len = MAX(*max_tagged_len, tagged_len);
 | |
| 
 | |
|       snip_result->last_tag_result_idx = snip_->tag_count - 1;
 | |
|       (*nresults)++;
 | |
|       snip_result++;
 | |
| 
 | |
|       if (*nresults == snip_->max_results || snip_->tag_count == MAX_SNIP_TAG_COUNT) {
 | |
|         break;
 | |
|       }
 | |
|       for (i = 0; i < snip_->cond_len; i++) {
 | |
|         if (snip_->cond[i].stopflag != SNIPCOND_STOP) {
 | |
|           all_stop = 0;
 | |
|           snip_->cond[i].stopflag = SNIPCOND_NONSTOP;
 | |
|         }
 | |
|       }
 | |
|       if (all_stop) {
 | |
|         break;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   snip_->snip_count = *nresults;
 | |
|   snip_->string = string;
 | |
| 
 | |
|   snip_->max_tagged_len = *max_tagged_len;
 | |
| 
 | |
|   GRN_API_RETURN(ctx->rc);
 | |
| }
 | |
| 
 | |
| grn_rc
 | |
| grn_snip_get_result(grn_ctx *ctx, grn_obj *snip, const unsigned int index, char *result, unsigned int *result_len)
 | |
| {
 | |
|   char *p;
 | |
|   size_t i, j, k;
 | |
|   _snip_result *sres;
 | |
|   grn_snip *snip_;
 | |
| 
 | |
|   snip_ = (grn_snip *)snip;
 | |
|   if (snip_->snip_count <= index || !snip_->nstr) {
 | |
|     return GRN_INVALID_ARGUMENT;
 | |
|   }
 | |
| 
 | |
|   GRN_ASSERT(snip_->snip_count != 0 && snip_->tag_count != 0);
 | |
| 
 | |
|   GRN_API_ENTER;
 | |
|   sres = &snip_->snip_result[index];
 | |
|   j = sres->first_tag_result_idx;
 | |
|   for (p = result, i = sres->start_offset; i < sres->end_offset; i++) {
 | |
|     for (; j <= sres->last_tag_result_idx && snip_->tag_result[j].start_offset == i; j++) {
 | |
|       if (snip_->tag_result[j].end_offset > sres->end_offset) {
 | |
|         continue;
 | |
|       }
 | |
|       grn_memcpy(p,
 | |
|                  snip_->tag_result[j].cond->opentag,
 | |
|                  snip_->tag_result[j].cond->opentag_len);
 | |
|       p += snip_->tag_result[j].cond->opentag_len;
 | |
|     }
 | |
| 
 | |
|     if (snip_->mapping == GRN_SNIP_MAPPING_HTML_ESCAPE) {
 | |
|       switch (snip_->string[i]) {
 | |
|       case '<':
 | |
|         *p++ = '&';
 | |
|         *p++ = 'l';
 | |
|         *p++ = 't';
 | |
|         *p++ = ';';
 | |
|         break;
 | |
|       case '>':
 | |
|         *p++ = '&';
 | |
|         *p++ = 'g';
 | |
|         *p++ = 't';
 | |
|         *p++ = ';';
 | |
|         break;
 | |
|       case '&':
 | |
|         *p++ = '&';
 | |
|         *p++ = 'a';
 | |
|         *p++ = 'm';
 | |
|         *p++ = 'p';
 | |
|         *p++ = ';';
 | |
|         break;
 | |
|       case '"':
 | |
|         *p++ = '&';
 | |
|         *p++ = 'q';
 | |
|         *p++ = 'u';
 | |
|         *p++ = 'o';
 | |
|         *p++ = 't';
 | |
|         *p++ = ';';
 | |
|         break;
 | |
|       default:
 | |
|         *p++ = snip_->string[i];
 | |
|         break;
 | |
|       }
 | |
|     } else {
 | |
|       *p++ = snip_->string[i];
 | |
|     }
 | |
| 
 | |
|     for (k = sres->last_tag_result_idx;
 | |
|          snip_->tag_result[k].end_offset <= sres->end_offset; k--) {
 | |
|       /* TODO: avoid all loop */
 | |
|       if (snip_->tag_result[k].end_offset == i + 1) {
 | |
|         grn_memcpy(p,
 | |
|                    snip_->tag_result[k].cond->closetag,
 | |
|                    snip_->tag_result[k].cond->closetag_len);
 | |
|         p += snip_->tag_result[k].cond->closetag_len;
 | |
|       }
 | |
|       if (k <= sres->first_tag_result_idx) {
 | |
|         break;
 | |
|       }
 | |
|     };
 | |
|   }
 | |
|   *p = '\0';
 | |
| 
 | |
|   if(result_len) { *result_len = (unsigned int)(p - result); }
 | |
|   GRN_ASSERT((unsigned int)(p - result) <= snip_->max_tagged_len);
 | |
| 
 | |
|   GRN_API_RETURN(ctx->rc);
 | |
| }
 | 
