diff --git a/evthr/Makefile b/evthr/Makefile new file mode 100644 index 0000000..860f02c --- /dev/null +++ b/evthr/Makefile @@ -0,0 +1,24 @@ +SRC = evthr.c +OUT = libevthr.a +OBJ = $(SRC:.c=.o) +INCLUDES = -I. +CFLAGS += -Wall -ggdb +LDFLAGS += -ggdb +CC = gcc + +.SUFFIXES: .c + +default: $(OUT) + +.c.o: + $(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@ + +$(OUT): $(OBJ) + ar rcs $(OUT) $(OBJ) + +test: $(OUT) test.c + $(CC) $(INCLUDES) $(CFLAGS) test.c -o test $(OUT) -levent -levent_pthreads -lpthread + +clean: + rm -f $(OBJ) $(OUT) test + diff --git a/evthr/README b/evthr/README new file mode 100644 index 0000000..5ce37cb --- /dev/null +++ b/evthr/README @@ -0,0 +1,53 @@ +Libevthr is an API which manages threads and thread-pools in an event based +manner. This API requires libevent with threading support. + +Libevthr works a bit differently than most thread management systems. Instead of +conditional signalling and some type of pre-thread queue, Libevthr uses a +deferral type mechanism. That is, a thread is always running, abstracted to a +point where you "defer" your function *into* a thread. + +For example you can start up a single thread with a backlog of 10 (a backlog +being the max number of outstanding callbacks to run within the thread), and +execute a function you would like to run inside the thread one or many times. +The act of deferrals is non-blocking. + +Example Code for evthrs: + + evthr_t * thr = evthr_new(10, NULL); + + if (evthr_start(thr) < 0) { + exit(1); + } + + evthr_defer(thr, my_cb_1, NULL); + evthr_defer(thr, my_cb_2, NULL); + evthr_defer(thr, my_cb_3, NULL); + + sleep(n_seconds); + + evthr_stop(thr); + +Libevthr also has the ability to create pools using the same methods that a +single evthr has. For example, if you would like to create 10 threads, each +with a backlog of 5: + + evthr_pool_t * thr_pool = evthr_pool_new(10, 5, NULL); + + if (evthr_pool_start(thr_pool) < 0) { + exit(1); + } + + evthr_pool_defer(thr_pool, my_cb_1, NULL); + evthr_pool_defer(thr_pool, my_cb_2, NULL); + evthr_pool_defer(thr_pool, my_cb_3, NULL); + +Your callback functions which you defer must be of type "evthr_cb", or +"void cb_name(void * arg, void * shared)". In this case, the "arg" variable is +the data you passed as the third argument to either evthr_pool_defer, or +evthr_defer. The "shared" variable is the data that was either the second +variable in evthr_new(), or the third variable in evthr_pool_new(). + +The gist of this is to allow a global dataset, along with deferred specific +data. + +See test.c for a quick example. diff --git a/evthr/evthr.c b/evthr/evthr.c new file mode 100644 index 0000000..0c5267b --- /dev/null +++ b/evthr/evthr.c @@ -0,0 +1,468 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "evthr.h" + +#define _EVTHR_MAGIC 0x4d52 + +typedef struct evthr_cmd evthr_cmd_t; +typedef struct evthr_pool_slist evthr_pool_slist_t; + +struct evthr_cmd { + uint16_t magic; + uint8_t stop; + void * args; + evthr_cb cb; +}; + +TAILQ_HEAD(evthr_pool_slist, evthr); + +struct evthr_pool { + int nthreads; + int nprocs; + evthr_pool_slist_t threads; +}; + +struct evthr { + int cur_backlog; + int proc_to_use; + int rdr; + int wdr; + char err; + ev_t * event; + evbase_t * evbase; + pthread_mutex_t * lock; + pthread_mutex_t * stat_lock; + pthread_mutex_t * rlock; + pthread_t * thr; + void * args; + + TAILQ_ENTRY(evthr) next; +}; + +void +evthr_inc_backlog(evthr_t * evthr) { + __sync_fetch_and_add(&evthr->cur_backlog, 1); +} + +void +evthr_dec_backlog(evthr_t * evthr) { + __sync_fetch_and_sub(&evthr->cur_backlog, 1); +} + +int +evthr_get_backlog(evthr_t * evthr) { + return __sync_add_and_fetch(&evthr->cur_backlog, 0); +} + +static void +_evthr_read_cmd(int sock, short which, void * args) { + evthr_t * thread; + evthr_cmd_t cmd; + int avail = 0; + ssize_t recvd; + + if (!(thread = (evthr_t *)args)) { + return; + } + + if (pthread_mutex_trylock(thread->lock) != 0) { + return; + } + + if (ioctl(sock, FIONREAD, &avail) < 0) { + goto error; + } + + if (avail <= 0) { + goto end; + } + + if (avail < (int)sizeof(evthr_cmd_t)) { + goto end; + } + + pthread_mutex_lock(thread->rlock); + + if ((recvd = recv(sock, &cmd, sizeof(evthr_cmd_t), 0)) <= 0) { + pthread_mutex_unlock(thread->rlock); + if (errno == EAGAIN) { + goto end; + } else { + goto error; + } + } + + pthread_mutex_unlock(thread->rlock); + + if (recvd != sizeof(evthr_cmd_t)) { + goto error; + } + + if (cmd.magic != _EVTHR_MAGIC) { + goto error; + } + + if (cmd.stop == 1) { + goto stop; + } + + if (cmd.cb != NULL) { + cmd.cb(thread, cmd.args, thread->args); + goto done; + } else { + goto done; + } + +stop: + event_base_loopbreak(thread->evbase); +done: + evthr_dec_backlog(thread); +end: + pthread_mutex_unlock(thread->lock); + return; +error: + pthread_mutex_lock(thread->stat_lock); + thread->cur_backlog = -1; + thread->err = 1; + pthread_mutex_unlock(thread->stat_lock); + pthread_mutex_unlock(thread->lock); + event_base_loopbreak(thread->evbase); + return; +} /* _evthr_read_cmd */ + +static int +_evthr_get_num_procs(void) { + return sysconf(_SC_NPROCESSORS_ONLN); +} + +static void * +_evthr_loop(void * args) { + evthr_t * thread; + + if (!(thread = (evthr_t *)args)) { + return NULL; + } + + if (thread == NULL || thread->thr == NULL) { + pthread_exit(NULL); + } + + thread->evbase = event_base_new(); + thread->event = event_new(thread->evbase, thread->rdr, + EV_READ | EV_PERSIST, _evthr_read_cmd, args); + + event_add(thread->event, NULL); + event_base_loop(thread->evbase, 0); + + if (thread->err == 1) { + fprintf(stderr, "FATAL ERROR!\n"); + } + + evthr_free(thread); + pthread_exit(NULL); +} + +evthr_res +evthr_defer(evthr_t * thread, evthr_cb cb, void * arg) { + int cur_backlog; + evthr_cmd_t cmd = { 0 }; + + cur_backlog = evthr_get_backlog(thread); + + if (cur_backlog == -1) { + return EVTHR_RES_FATAL; + } + + evthr_inc_backlog(thread); + + cmd.magic = _EVTHR_MAGIC; + cmd.cb = cb; + cmd.args = arg; + cmd.stop = 0; + + pthread_mutex_lock(thread->rlock); + + if (send(thread->wdr, &cmd, sizeof(evthr_cmd_t), 0) <= 0) { + pthread_mutex_unlock(thread->rlock); + return EVTHR_RES_RETRY; + } + + pthread_mutex_unlock(thread->rlock); + + return EVTHR_RES_OK; +} + +evthr_res +evthr_stop(evthr_t * thread) { + evthr_cmd_t cmd = { 0 }; + + cmd.magic = _EVTHR_MAGIC; + cmd.cb = NULL; + cmd.args = NULL; + cmd.stop = 1; + + pthread_mutex_lock(thread->rlock); + + if (write(thread->wdr, &cmd, sizeof(evthr_cmd_t)) < 0) { + pthread_mutex_unlock(thread->rlock); + return EVTHR_RES_RETRY; + } + + pthread_mutex_unlock(thread->rlock); + + return EVTHR_RES_OK; +} + +evbase_t * +evthr_get_base(evthr_t * thr) { + return thr->evbase; +} + +evthr_t * +evthr_new(void * args, int proc_to_use) { + evthr_t * thread; + int fds[2]; + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == -1) { + return NULL; + } + + if (!(thread = calloc(sizeof(evthr_t), sizeof(char)))) { + return NULL; + } + + thread->stat_lock = malloc(sizeof(pthread_mutex_t)); + thread->rlock = malloc(sizeof(pthread_mutex_t)); + thread->lock = malloc(sizeof(pthread_mutex_t)); + thread->thr = malloc(sizeof(pthread_t)); + thread->args = args; + thread->rdr = fds[0]; + thread->wdr = fds[1]; + thread->proc_to_use = proc_to_use; + + if (pthread_mutex_init(thread->lock, NULL)) { + evthr_free(thread); + return NULL; + } + + if (pthread_mutex_init(thread->stat_lock, NULL)) { + evthr_free(thread); + return NULL; + } + + if (pthread_mutex_init(thread->rlock, NULL)) { + evthr_free(thread); + return NULL; + } + + fcntl(thread->rdr, F_SETFL, O_NONBLOCK); + fcntl(thread->wdr, F_SETFL, O_NONBLOCK); + + return thread; +} /* evthr_new */ + +int +evthr_start(evthr_t * thread) { + if (thread == NULL || thread->thr == NULL) { + return -1; + } + + if (pthread_create(thread->thr, NULL, _evthr_loop, (void *)thread)) { + return -1; + } + + return pthread_detach(*thread->thr); +} + +void +evthr_free(evthr_t * thread) { + if (thread == NULL) { + return; + } + + if (thread->rdr > 0) { + close(thread->rdr); + } + + if (thread->wdr > 0) { + close(thread->wdr); + } + + if (thread->lock) { + pthread_mutex_destroy(thread->lock); + free(thread->lock); + } + + if (thread->stat_lock) { + pthread_mutex_destroy(thread->stat_lock); + } + + if (thread->rlock) { + pthread_mutex_destroy(thread->rlock); + } + + if (thread->thr) { + free(thread->thr); + } + + if (thread->event) { + event_free(thread->event); + } + + if (thread->evbase) { + event_base_free(thread->evbase); + } + + free(thread); +} + +void +evthr_pool_free(evthr_pool_t * pool) { + evthr_t * thread; + evthr_t * save; + + if (pool == NULL) { + return; + } + + for (thread = TAILQ_FIRST(&pool->threads); thread != NULL; thread = save) { + save = TAILQ_NEXT(thread, next); + + TAILQ_REMOVE(&pool->threads, thread, next); + + evthr_free(thread); + } + + free(pool); +} + +evthr_res +evthr_pool_stop(evthr_pool_t * pool) { + evthr_t * thr; + + if (pool == NULL) { + return EVTHR_RES_FATAL; + } + + TAILQ_FOREACH(thr, &pool->threads, next) { + evthr_stop(thr); + } + + memset(&pool->threads, 0, sizeof(pool->threads)); + + return EVTHR_RES_OK; +} + +evthr_res +evthr_pool_defer(evthr_pool_t * pool, evthr_cb cb, void * arg) { + evthr_t * min_thr = NULL; + evthr_t * thr = NULL; + + if (pool == NULL) { + return EVTHR_RES_FATAL; + } + + if (cb == NULL) { + return EVTHR_RES_NOCB; + } + + /* find the thread with the smallest backlog */ + TAILQ_FOREACH(thr, &pool->threads, next) { + evthr_t * m_save; + evthr_t * t_save; + int thr_backlog = 0; + int min_backlog = 0; + + thr_backlog = evthr_get_backlog(thr); + + if (min_thr) { + min_backlog = evthr_get_backlog(min_thr); + } + + m_save = min_thr; + t_save = thr; + + if (min_thr == NULL) { + min_thr = thr; + } else if (thr_backlog == 0) { + min_thr = thr; + } else if (thr_backlog < min_backlog) { + min_thr = thr; + } + + if (evthr_get_backlog(min_thr) == 0) { + break; + } + } + + return evthr_defer(min_thr, cb, arg); +} /* evthr_pool_defer */ + +evthr_pool_t * +evthr_pool_new(int nthreads, void * shared) { + evthr_pool_t * pool; + int i; + + if (nthreads == 0) { + return NULL; + } + + if (!(pool = calloc(sizeof(evthr_pool_t), sizeof(char)))) { + return NULL; + } + + pool->nprocs = _evthr_get_num_procs(); + pool->nthreads = nthreads; + TAILQ_INIT(&pool->threads); + + for (i = 0; i < nthreads; i++) { + evthr_t * thread; + int proc = i % pool->nprocs; + + if (!(thread = evthr_new(shared, proc))) { + evthr_pool_free(pool); + return NULL; + } + + TAILQ_INSERT_TAIL(&pool->threads, thread, next); + } + + return pool; +} + +int +evthr_pool_start(evthr_pool_t * pool) { + evthr_t * evthr = NULL; + + if (pool == NULL) { + return -1; + } + + TAILQ_FOREACH(evthr, &pool->threads, next) { + if (evthr_start(evthr) < 0) { + return -1; + } + + usleep(300); + } + + return 0; +} + diff --git a/evthr/evthr.h b/evthr/evthr.h new file mode 100644 index 0000000..80404b6 --- /dev/null +++ b/evthr/evthr.h @@ -0,0 +1,48 @@ +#define _GNU_SOURCE +#ifndef __EVTHR_H__ +#define __EVTHR_H__ + +#include +#include +#include +#include +#include + +struct evthr_pool; +struct evthr; + +typedef struct event_base evbase_t; +typedef struct event ev_t; + +typedef struct evthr_pool evthr_pool_t; +typedef struct evthr evthr_t; +typedef enum evthr_res evthr_res; + +typedef void (*evthr_cb)(evthr_t * thr, void * cmd_arg, void * shared); + +enum evthr_res { + EVTHR_RES_OK = 0, + EVTHR_RES_BACKLOG, + EVTHR_RES_RETRY, + EVTHR_RES_NOCB, + EVTHR_RES_FATAL +}; + +evthr_t * evthr_new(void * arg, int proc_to_use); +evbase_t * evthr_get_base(evthr_t * thr); +int evthr_start(evthr_t * evthr); +evthr_res evthr_stop(evthr_t * evthr); +evthr_res evthr_defer(evthr_t * evthr, evthr_cb cb, void * arg); +void evthr_free(evthr_t * evthr); +void evthr_inc_backlog(evthr_t * evthr); +void evthr_dec_backlog(evthr_t * evthr); +int evthr_get_backlog(evthr_t * evthr); + +evthr_pool_t * evthr_pool_new(int nthreads, void * shared); +int evthr_pool_start(evthr_pool_t * pool); +evthr_res evthr_pool_stop(evthr_pool_t * pool); +evthr_res evthr_pool_defer(evthr_pool_t * pool, evthr_cb cb, void * arg); +void evthr_pool_free(evthr_pool_t * pool); + +#endif /* __EVTHR_H__ */ + diff --git a/evthr/test.c b/evthr/test.c new file mode 100644 index 0000000..17e2b66 --- /dev/null +++ b/evthr/test.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include + +#include + +static void +_test_cb_1(evthr_t * thr, void * cmdarg, void * shared) { + printf("START _test_cb_1 (%u)\n", (unsigned int)pthread_self()); + sleep(1); + printf("END _test_cb_1 (%u)\n", (unsigned int)pthread_self()); +} + +int +main(int argc, char ** argv) { + evthr_pool_t * pool = NULL; + int i = 0; + + evthread_use_pthreads(); + evthread_enable_lock_debuging(); + pool = evthr_pool_new(8, NULL); + + evthr_pool_start(pool); + + while (1) { + if (i++ >= 5) { + break; + } + + printf("Iter %d\n", i); + + printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); + printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); + printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); + printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); + printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); + printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp")); + + sleep(2); + } + + evthr_pool_stop(pool); + evthr_pool_free(pool); + return 0; +} + diff --git a/http_parser/.gitignore b/http_parser/.gitignore new file mode 100644 index 0000000..73fe6a4 --- /dev/null +++ b/http_parser/.gitignore @@ -0,0 +1,4 @@ +tags +*.o +test +test_g diff --git a/http_parser/CONTRIBUTIONS b/http_parser/CONTRIBUTIONS new file mode 100644 index 0000000..11ba31e --- /dev/null +++ b/http_parser/CONTRIBUTIONS @@ -0,0 +1,4 @@ +Contributors must agree to the Contributor License Agreement before patches +can be accepted. + +http://spreadsheets2.google.com/viewform?hl=en&formkey=dDJXOGUwbzlYaWM4cHN1MERwQS1CSnc6MQ diff --git a/http_parser/LICENSE-MIT b/http_parser/LICENSE-MIT new file mode 100644 index 0000000..40ebce2 --- /dev/null +++ b/http_parser/LICENSE-MIT @@ -0,0 +1,19 @@ +Copyright Joyent, Inc. and other Node contributors. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/http_parser/Makefile b/http_parser/Makefile new file mode 100644 index 0000000..2b945c1 --- /dev/null +++ b/http_parser/Makefile @@ -0,0 +1,41 @@ +OPT_DEBUG=-O0 -g -Wall -Wextra -Werror -I. +OPT_FAST=-O3 -DHTTP_PARSER_STRICT=0 -I. + +CC?=gcc + + +test: test_g + ./test_g + +test_g: http_parser_g.o test_g.o + $(CC) $(OPT_DEBUG) http_parser_g.o test_g.o -o $@ + +test_g.o: test.c http_parser.h Makefile + $(CC) $(OPT_DEBUG) -c test.c -o $@ + +test.o: test.c http_parser.h Makefile + $(CC) $(OPT_FAST) -c test.c -o $@ + +http_parser_g.o: http_parser.c http_parser.h Makefile + $(CC) $(OPT_DEBUG) -c http_parser.c -o $@ + +test-valgrind: test_g + valgrind ./test_g + +http_parser.o: http_parser.c http_parser.h Makefile + $(CC) $(OPT_FAST) -c http_parser.c + +test_fast: http_parser.o test.c http_parser.h + $(CC) $(OPT_FAST) http_parser.o test.c -o $@ + +test-run-timed: test_fast + while(true) do time ./test_fast > /dev/null; done + + +tags: http_parser.c http_parser.h test.c + ctags $^ + +clean: + rm -f *.o test test_fast test_g http_parser.tar tags + +.PHONY: clean package test-run test-run-timed test-valgrind diff --git a/http_parser/README.md b/http_parser/README.md new file mode 100644 index 0000000..72332fb --- /dev/null +++ b/http_parser/README.md @@ -0,0 +1,171 @@ +HTTP Parser +=========== + +This is a parser for HTTP messages written in C. It parses both requests and +responses. The parser is designed to be used in performance HTTP +applications. It does not make any syscalls nor allocations, it does not +buffer data, it can be interrupted at anytime. Depending on your +architecture, it only requires about 40 bytes of data per message +stream (in a web server that is per connection). + +Features: + + * No dependencies + * Handles persistent streams (keep-alive). + * Decodes chunked encoding. + * Upgrade support + * Defends against buffer overflow attacks. + +The parser extracts the following information from HTTP messages: + + * Header fields and values + * Content-Length + * Request method + * Response status code + * Transfer-Encoding + * HTTP version + * Request path, query string, fragment + * Message body + + +Usage +----- + +One `http_parser` object is used per TCP connection. Initialize the struct +using `http_parser_init()` and set the callbacks. That might look something +like this for a request parser: + + http_parser_settings settings; + settings.on_path = my_path_callback; + settings.on_header_field = my_header_field_callback; + /* ... */ + + http_parser *parser = malloc(sizeof(http_parser)); + http_parser_init(parser, HTTP_REQUEST); + parser->data = my_socket; + +When data is received on the socket execute the parser and check for errors. + + size_t len = 80*1024, nparsed; + char buf[len]; + ssize_t recved; + + recved = recv(fd, buf, len, 0); + + if (recved < 0) { + /* Handle error. */ + } + + /* Start up / continue the parser. + * Note we pass recved==0 to signal that EOF has been recieved. + */ + nparsed = http_parser_execute(parser, &settings, buf, recved); + + if (parser->upgrade) { + /* handle new protocol */ + } else if (nparsed != recved) { + /* Handle error. Usually just close the connection. */ + } + +HTTP needs to know where the end of the stream is. For example, sometimes +servers send responses without Content-Length and expect the client to +consume input (for the body) until EOF. To tell http_parser about EOF, give +`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors +can still be encountered during an EOF, so one must still be prepared +to receive them. + +Scalar valued message information such as `status_code`, `method`, and the +HTTP version are stored in the parser structure. This data is only +temporally stored in `http_parser` and gets reset on each new message. If +this information is needed later, copy it out of the structure during the +`headers_complete` callback. + +The parser decodes the transfer-encoding for both requests and responses +transparently. That is, a chunked encoding is decoded before being sent to +the on_body callback. + + +The Special Problem of Upgrade +------------------------------ + +HTTP supports upgrading the connection to a different protocol. An +increasingly common example of this is the Web Socket protocol which sends +a request like + + GET /demo HTTP/1.1 + Upgrade: WebSocket + Connection: Upgrade + Host: example.com + Origin: http://example.com + WebSocket-Protocol: sample + +followed by non-HTTP data. + +(See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more +information the Web Socket protocol.) + +To support this, the parser will treat this as a normal HTTP message without a +body. Issuing both on_headers_complete and on_message_complete callbacks. However +http_parser_execute() will stop parsing at the end of the headers and return. + +The user is expected to check if `parser->upgrade` has been set to 1 after +`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied +offset by the return value of `http_parser_execute()`. + + +Callbacks +--------- + +During the `http_parser_execute()` call, the callbacks set in +`http_parser_settings` will be executed. The parser maintains state and +never looks behind, so buffering the data is not necessary. If you need to +save certain data for later usage, you can do that from the callbacks. + +There are two types of callbacks: + +* notification `typedef int (*http_cb) (http_parser*);` + Callbacks: on_message_begin, on_headers_complete, on_message_complete. +* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);` + Callbacks: (requests only) on_path, on_query_string, on_uri, on_fragment, + (common) on_header_field, on_header_value, on_body; + +Callbacks must return 0 on success. Returning a non-zero value indicates +error to the parser, making it exit immediately. + +In case you parse HTTP message in chunks (i.e. `read()` request line +from socket, parse, read half headers, parse, etc) your data callbacks +may be called more than once. Http-parser guarantees that data pointer is only +valid for the lifetime of callback. You can also `read()` into a heap allocated +buffer to avoid copying memory around if this fits your application. + +Reading headers may be a tricky task if you read/parse headers partially. +Basically, you need to remember whether last header callback was field or value +and apply following logic: + + (on_header_field and on_header_value shortened to on_h_*) + ------------------------ ------------ -------------------------------------------- + | State (prev. callback) | Callback | Description/action | + ------------------------ ------------ -------------------------------------------- + | nothing (first call) | on_h_field | Allocate new buffer and copy callback data | + | | | into it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_field | New header started. | + | | | Copy current name,value buffers to headers | + | | | list and allocate new buffer for new name | + ------------------------ ------------ -------------------------------------------- + | field | on_h_field | Previous name continues. Reallocate name | + | | | buffer and append callback data to it | + ------------------------ ------------ -------------------------------------------- + | field | on_h_value | Value for current header started. Allocate | + | | | new buffer and copy callback data to it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_value | Value continues. Reallocate value buffer | + | | | and append callback data to it | + ------------------------ ------------ -------------------------------------------- + + +See examples of reading in headers: + +* [partial example](http://gist.github.com/155877) in C +* [from http-parser tests](http://github.com/ry/http-parser/blob/37a0ff8928fb0d83cec0d0d8909c5a4abcd221af/test.c#L403) in C +* [from Node library](http://github.com/ry/node/blob/842eaf446d2fdcb33b296c67c911c32a0dabc747/src/http.js#L284) in Javascript diff --git a/http_parser/http_parser.c b/http_parser/http_parser.c new file mode 100644 index 0000000..0fe0e8f --- /dev/null +++ b/http_parser/http_parser.c @@ -0,0 +1,1644 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include +#include +#include + + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + + +#define CALLBACK2(FOR) \ +do { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser)) return (p - data); \ + } \ +} while (0) + + +#define MARK(FOR) \ +do { \ + FOR##_mark = p; \ +} while (0) + +#define CALLBACK_NOCLEAR(FOR) \ +do { \ + if (FOR##_mark) { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser, \ + FOR##_mark, \ + p - FOR##_mark)) \ + { \ + return (p - data); \ + } \ + } \ + } \ +} while (0) + + +#define CALLBACK(FOR) \ +do { \ + CALLBACK_NOCLEAR(FOR); \ + FOR##_mark = NULL; \ +} while (0) + + +#define PROXY_CONNECTION "proxy-connection" +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" +#define UPGRADE "upgrade" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + + +static const char *method_strings[] = + { "DELETE" + , "GET" + , "HEAD" + , "POST" + , "PUT" + , "CONNECT" + , "OPTIONS" + , "TRACE" + , "COPY" + , "LOCK" + , "MKCOL" + , "MOVE" + , "PROPFIND" + , "PROPPATCH" + , "UNLOCK" + , "REPORT" + , "MKACTIVITY" + , "CHECKOUT" + , "MERGE" + , "M-SEARCH" + , "NOTIFY" + , "SUBSCRIBE" + , "UNSUBSCRIBE" + }; + + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1* + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +static const char tokens[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + ' ', '!', '"', '#', '$', '%', '&', '\'', +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, '*', '+', 0, '-', '.', '/', +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, '^', '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, '|', '}', '~', 0 }; + + +static const int8_t unhex[256] = + {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + +static const uint8_t normal_url_char[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0, 1, 1, 0, 1, 1, 1, 1, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1, 1, 1, 1, 1, 1, 1, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1, 1, 1, 1, 1, 1, 1, 0, + +/* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8 + encoded paths. This is out of spec, but clients generate this and most other + HTTP servers support it. We should, too. */ + + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 }; + + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_first_http_major + , s_res_http_major + , s_res_first_http_minor + , s_res_http_minor + , s_res_first_status_code + , s_res_status_code + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_host + , s_req_port + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_first_http_major + , s_req_http_major + , s_req_first_http_minor + , s_req_http_minor + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_start + , s_header_value + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + /* Important: 's_headers_almost_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + }; + + +#define PARSING_HEADER(state) (state <= s_headers_almost_done) + + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + + , h_matching_connection + , h_matching_proxy_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_matching_upgrade + + , h_connection + , h_content_length + , h_transfer_encoding + , h_upgrade + + , h_matching_transfer_encoding_chunked + , h_matching_connection_keep_alive + , h_matching_connection_close + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close + }; + + +enum flags + { F_CHUNKED = 1 << 0 + , F_CONNECTION_KEEP_ALIVE = 1 << 1 + , F_CONNECTION_CLOSE = 1 << 2 + , F_TRAILING = 1 << 3 + , F_UPGRADE = 1 << 4 + , F_SKIPBODY = 1 << 5 + }; + + +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define TOKEN(c) tokens[(unsigned char)c] + + +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + + +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) if (cond) goto error +# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) +#else +# define STRICT_CHECK(cond) +# define NEW_MESSAGE() start_state +#endif + + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) +{ + char c, ch; + const char *p = data, *pe; + int64_t to_read; + + enum state state = (enum state) parser->state; + enum header_states header_state = (enum header_states) parser->header_state; + uint64_t index = parser->index; + uint64_t nread = parser->nread; + + if (len == 0) { + switch (state) { + case s_body_identity_eof: + CALLBACK2(message_complete); + return 0; + + case s_dead: + case s_start_req_or_res: + case s_start_res: + case s_start_req: + return 0; + + default: + return 1; // error + } + } + + /* technically we could combine all of these (except for url_mark) into one + variable, saving stack space, but it seems more clear to have them + separated. */ + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *fragment_mark = 0; + const char *query_string_mark = 0; + const char *path_mark = 0; + const char *url_mark = 0; + + if (state == s_header_field) + header_field_mark = data; + if (state == s_header_value) + header_value_mark = data; + if (state == s_req_fragment) + fragment_mark = data; + if (state == s_req_query_string) + query_string_mark = data; + if (state == s_req_path) + path_mark = data; + if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash + || state == s_req_schema_slash_slash || state == s_req_port + || state == s_req_query_string_start || state == s_req_query_string + || state == s_req_host + || state == s_req_fragment_start || state == s_req_fragment) + url_mark = data; + + for (p=data, pe=data+len; p != pe; p++) { + ch = *p; + + if (PARSING_HEADER(state)) { + ++nread; + /* Buffer overflow attack */ + if (nread > HTTP_MAX_HEADER_SIZE) goto error; + } + + switch (state) { + + case s_dead: + /* this state is used after a 'Connection: close' message + * the parser will error out if it reads another message + */ + goto error; + + case s_start_req_or_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + if (ch == 'H') + state = s_res_or_resp_H; + else { + parser->type = HTTP_REQUEST; + goto start_req_method_assign; + } + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + state = s_res_HT; + } else { + if (ch != 'E') goto error; + parser->type = HTTP_REQUEST; + parser->method = HTTP_HEAD; + index = 2; + state = s_req_method; + } + break; + + case s_start_res: + { + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + switch (ch) { + case 'H': + state = s_res_H; + break; + + case CR: + case LF: + break; + + default: + goto error; + } + break; + } + + case s_res_H: + STRICT_CHECK(ch != 'T'); + state = s_res_HT; + break; + + case s_res_HT: + STRICT_CHECK(ch != 'T'); + state = s_res_HTT; + break; + + case s_res_HTT: + STRICT_CHECK(ch != 'P'); + state = s_res_HTTP; + break; + + case s_res_HTTP: + STRICT_CHECK(ch != '/'); + state = s_res_first_http_major; + break; + + case s_res_first_http_major: + if (ch < '1' || ch > '9') goto error; + parser->http_major = ch - '0'; + state = s_res_http_major; + break; + + /* major HTTP version or dot */ + case s_res_http_major: + { + if (ch == '.') { + state = s_res_first_http_minor; + break; + } + + if (ch < '0' || ch > '9') goto error; + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) goto error; + break; + } + + /* first digit of minor HTTP version */ + case s_res_first_http_minor: + if (ch < '0' || ch > '9') goto error; + parser->http_minor = ch - '0'; + state = s_res_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_res_http_minor: + { + if (ch == ' ') { + state = s_res_first_status_code; + break; + } + + if (ch < '0' || ch > '9') goto error; + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) goto error; + break; + } + + case s_res_first_status_code: + { + if (ch < '0' || ch > '9') { + if (ch == ' ') { + break; + } + goto error; + } + parser->status_code = ch - '0'; + state = s_res_status_code; + break; + } + + case s_res_status_code: + { + if (ch < '0' || ch > '9') { + switch (ch) { + case ' ': + state = s_res_status; + break; + case CR: + state = s_res_line_almost_done; + break; + case LF: + state = s_header_field_start; + break; + default: + goto error; + } + break; + } + + parser->status_code *= 10; + parser->status_code += ch - '0'; + + if (parser->status_code > 999) goto error; + break; + } + + case s_res_status: + /* the human readable status. e.g. "NOT FOUND" + * we are not humans so just ignore this */ + if (ch == CR) { + state = s_res_line_almost_done; + break; + } + + if (ch == LF) { + state = s_header_field_start; + break; + } + break; + + case s_res_line_almost_done: + STRICT_CHECK(ch != LF); + state = s_header_field_start; + break; + + case s_start_req: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = -1; + + CALLBACK2(message_begin); + + if (ch < 'A' || 'Z' < ch) goto error; + + start_req_method_assign: + parser->method = (enum http_method) 0; + index = 1; + switch (ch) { + case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; + case 'D': parser->method = HTTP_DELETE; break; + case 'G': parser->method = HTTP_GET; break; + case 'H': parser->method = HTTP_HEAD; break; + case 'L': parser->method = HTTP_LOCK; break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break; + case 'N': parser->method = HTTP_NOTIFY; break; + case 'O': parser->method = HTTP_OPTIONS; break; + case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break; + case 'R': parser->method = HTTP_REPORT; break; + case 'S': parser->method = HTTP_SUBSCRIBE; break; + case 'T': parser->method = HTTP_TRACE; break; + case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; + default: goto error; + } + state = s_req_method; + break; + } + + case s_req_method: + { + if (ch == '\0') + goto error; + + const char *matcher = method_strings[parser->method]; + if (ch == ' ' && matcher[index] == '\0') { + state = s_req_spaces_before_url; + } else if (ch == matcher[index]) { + ; /* nada */ + } else if (parser->method == HTTP_CONNECT) { + if (index == 1 && ch == 'H') { + parser->method = HTTP_CHECKOUT; + } else if (index == 2 && ch == 'P') { + parser->method = HTTP_COPY; + } + } else if (parser->method == HTTP_MKCOL) { + if (index == 1 && ch == 'O') { + parser->method = HTTP_MOVE; + } else if (index == 1 && ch == 'E') { + parser->method = HTTP_MERGE; + } else if (index == 1 && ch == '-') { + parser->method = HTTP_MSEARCH; + } else if (index == 2 && ch == 'A') { + parser->method = HTTP_MKACTIVITY; + } + } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') { + parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ + } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') { + parser->method = HTTP_PUT; + } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') { + parser->method = HTTP_UNSUBSCRIBE; + } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { + parser->method = HTTP_PROPPATCH; + } else { + goto error; + } + + ++index; + break; + } + case s_req_spaces_before_url: + { + if (ch == ' ') break; + + if (ch == '/' || ch == '*') { + MARK(url); + MARK(path); + state = s_req_path; + break; + } + + c = LOWER(ch); + + if (c >= 'a' && c <= 'z') { + MARK(url); + state = s_req_schema; + break; + } + + goto error; + } + + case s_req_schema: + { + c = LOWER(ch); + + if (c >= 'a' && c <= 'z') break; + + if (ch == ':') { + state = s_req_schema_slash; + break; + } else if (ch == '.') { + state = s_req_host; + break; + } else if ('0' <= ch && ch <= '9') { + state = s_req_host; + break; + } + + goto error; + } + + case s_req_schema_slash: + STRICT_CHECK(ch != '/'); + state = s_req_schema_slash_slash; + break; + + case s_req_schema_slash_slash: + STRICT_CHECK(ch != '/'); + state = s_req_host; + break; + + case s_req_host: + { + c = LOWER(ch); + if (c >= 'a' && c <= 'z') break; + if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break; + switch (ch) { + case ':': + state = s_req_port; + break; + case '/': + MARK(path); + state = s_req_path; + break; + case ' ': + /* The request line looks like: + * "GET http://foo.bar.com HTTP/1.1" + * That is, there is no path. + */ + CALLBACK(url); + state = s_req_http_start; + break; + case '?': + state = s_req_query_string_start; + break; + default: + goto error; + } + break; + } + + case s_req_port: + { + if (ch >= '0' && ch <= '9') break; + switch (ch) { + case '/': + MARK(path); + state = s_req_path; + break; + case ' ': + /* The request line looks like: + * "GET http://foo.bar.com:1234 HTTP/1.1" + * That is, there is no path. + */ + CALLBACK(url); + state = s_req_http_start; + break; + case '?': + state = s_req_query_string_start; + break; + default: + goto error; + } + break; + } + + case s_req_path: + { + if (normal_url_char[(unsigned char)ch]) break; + + switch (ch) { + case ' ': + CALLBACK(url); + CALLBACK(path); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + CALLBACK(path); + parser->http_major = 0; + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + CALLBACK(path); + parser->http_major = 0; + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + CALLBACK(path); + state = s_req_query_string_start; + break; + case '#': + CALLBACK(path); + state = s_req_fragment_start; + break; + default: + goto error; + } + break; + } + + case s_req_query_string_start: + { + if (normal_url_char[(unsigned char)ch]) { + MARK(query_string); + state = s_req_query_string; + break; + } + + switch (ch) { + case '?': + break; /* XXX ignore extra '?' ... is this right? */ + case ' ': + CALLBACK(url); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + parser->http_major = 0; + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + parser->http_major = 0; + parser->http_minor = 9; + state = s_header_field_start; + break; + case '#': + state = s_req_fragment_start; + break; + default: + goto error; + } + break; + } + + case s_req_query_string: + { + if (normal_url_char[(unsigned char)ch]) break; + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + break; + case ' ': + CALLBACK(url); + CALLBACK(query_string); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + CALLBACK(query_string); + parser->http_major = 0; + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + CALLBACK(query_string); + parser->http_major = 0; + parser->http_minor = 9; + state = s_header_field_start; + break; + case '#': + CALLBACK(query_string); + state = s_req_fragment_start; + break; + default: + goto error; + } + break; + } + + case s_req_fragment_start: + { + if (normal_url_char[(unsigned char)ch]) { + MARK(fragment); + state = s_req_fragment; + break; + } + + switch (ch) { + case ' ': + CALLBACK(url); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + parser->http_major = 0; + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + parser->http_major = 0; + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + MARK(fragment); + state = s_req_fragment; + break; + case '#': + break; + default: + goto error; + } + break; + } + + case s_req_fragment: + { + if (normal_url_char[(unsigned char)ch]) break; + + switch (ch) { + case ' ': + CALLBACK(url); + CALLBACK(fragment); + state = s_req_http_start; + break; + case CR: + CALLBACK(url); + CALLBACK(fragment); + parser->http_major = 0; + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(url); + CALLBACK(fragment); + parser->http_major = 0; + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + case '#': + break; + default: + goto error; + } + break; + } + + case s_req_http_start: + switch (ch) { + case 'H': + state = s_req_http_H; + break; + case ' ': + break; + default: + goto error; + } + break; + + case s_req_http_H: + STRICT_CHECK(ch != 'T'); + state = s_req_http_HT; + break; + + case s_req_http_HT: + STRICT_CHECK(ch != 'T'); + state = s_req_http_HTT; + break; + + case s_req_http_HTT: + STRICT_CHECK(ch != 'P'); + state = s_req_http_HTTP; + break; + + case s_req_http_HTTP: + STRICT_CHECK(ch != '/'); + state = s_req_first_http_major; + break; + + /* first digit of major HTTP version */ + case s_req_first_http_major: + if (ch < '1' || ch > '9') goto error; + parser->http_major = ch - '0'; + state = s_req_http_major; + break; + + /* major HTTP version or dot */ + case s_req_http_major: + { + if (ch == '.') { + state = s_req_first_http_minor; + break; + } + + if (ch < '0' || ch > '9') goto error; + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) goto error; + break; + } + + /* first digit of minor HTTP version */ + case s_req_first_http_minor: + if (ch < '0' || ch > '9') goto error; + parser->http_minor = ch - '0'; + state = s_req_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_req_http_minor: + { + if (ch == CR) { + state = s_req_line_almost_done; + break; + } + + if (ch == LF) { + state = s_header_field_start; + break; + } + + /* XXX allow spaces after digit? */ + + if (ch < '0' || ch > '9') goto error; + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) goto error; + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (ch != LF) goto error; + state = s_header_field_start; + break; + } + + case s_header_field_start: + { + if (ch == CR) { + state = s_headers_almost_done; + break; + } + + if (ch == LF) { + /* they might be just sending \n instead of \r\n so this would be + * the second \n to denote the end of headers*/ + state = s_headers_almost_done; + goto headers_almost_done; + } + + c = TOKEN(ch); + + if (!c) goto error; + + MARK(header_field); + + index = 0; + state = s_header_field; + + switch (c) { + case 'c': + header_state = h_C; + break; + + case 'p': + header_state = h_matching_proxy_connection; + break; + + case 't': + header_state = h_matching_transfer_encoding; + break; + + case 'u': + header_state = h_matching_upgrade; + break; + + default: + header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + c = TOKEN(ch); + + if (c) { + switch (header_state) { + case h_general: + break; + + case h_C: + index++; + header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + index++; + header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + index++; + switch (c) { + case 'n': + header_state = h_matching_connection; + break; + case 't': + header_state = h_matching_content_length; + break; + default: + header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + index++; + if (index > sizeof(CONNECTION)-1 + || c != CONNECTION[index]) { + header_state = h_general; + } else if (index == sizeof(CONNECTION)-2) { + header_state = h_connection; + } + break; + + /* proxy-connection */ + + case h_matching_proxy_connection: + index++; + if (index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[index]) { + header_state = h_general; + } else if (index == sizeof(PROXY_CONNECTION)-2) { + header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + index++; + if (index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[index]) { + header_state = h_general; + } else if (index == sizeof(CONTENT_LENGTH)-2) { + header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + index++; + if (index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[index]) { + header_state = h_general; + } else if (index == sizeof(TRANSFER_ENCODING)-2) { + header_state = h_transfer_encoding; + } + break; + + /* upgrade */ + + case h_matching_upgrade: + index++; + if (index > sizeof(UPGRADE)-1 + || c != UPGRADE[index]) { + header_state = h_general; + } else if (index == sizeof(UPGRADE)-2) { + header_state = h_upgrade; + } + break; + + case h_connection: + case h_content_length: + case h_transfer_encoding: + case h_upgrade: + if (ch != ' ') header_state = h_general; + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + break; + } + + if (ch == ':') { + CALLBACK(header_field); + state = s_header_value_start; + break; + } + + if (ch == CR) { + state = s_header_almost_done; + CALLBACK(header_field); + break; + } + + if (ch == LF) { + CALLBACK(header_field); + state = s_header_field_start; + break; + } + + goto error; + } + + case s_header_value_start: + { + if (ch == ' ') break; + + MARK(header_value); + + state = s_header_value; + index = 0; + + c = LOWER(ch); + + if (ch == CR) { + CALLBACK(header_value); + header_state = h_general; + state = s_header_almost_done; + break; + } + + if (ch == LF) { + CALLBACK(header_value); + state = s_header_field_start; + break; + } + + switch (header_state) { + case h_upgrade: + parser->flags |= F_UPGRADE; + header_state = h_general; + break; + + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + header_state = h_matching_transfer_encoding_chunked; + } else { + header_state = h_general; + } + break; + + case h_content_length: + if (ch < '0' || ch > '9') goto error; + parser->content_length = ch - '0'; + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + header_state = h_matching_connection_close; + } else { + header_state = h_general; + } + break; + + default: + header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + c = LOWER(ch); + + if (ch == CR) { + CALLBACK(header_value); + state = s_header_almost_done; + break; + } + + if (ch == LF) { + CALLBACK(header_value); + goto header_almost_done; + } + + switch (header_state) { + case h_general: + break; + + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; + + case h_content_length: + if (ch == ' ') break; + if (ch < '0' || ch > '9') goto error; + parser->content_length *= 10; + parser->content_length += ch - '0'; + break; + + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_chunked: + index++; + if (index > sizeof(CHUNKED)-1 + || c != CHUNKED[index]) { + header_state = h_general; + } else if (index == sizeof(CHUNKED)-2) { + header_state = h_transfer_encoding_chunked; + } + break; + + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + index++; + if (index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[index]) { + header_state = h_general; + } else if (index == sizeof(KEEP_ALIVE)-2) { + header_state = h_connection_keep_alive; + } + break; + + /* looking for 'Connection: close' */ + case h_matching_connection_close: + index++; + if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) { + header_state = h_general; + } else if (index == sizeof(CLOSE)-2) { + header_state = h_connection_close; + } + break; + + case h_transfer_encoding_chunked: + case h_connection_keep_alive: + case h_connection_close: + if (ch != ' ') header_state = h_general; + break; + + default: + state = s_header_value; + header_state = h_general; + break; + } + break; + } + + case s_header_almost_done: + header_almost_done: + { + STRICT_CHECK(ch != LF); + + state = s_header_field_start; + + switch (header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } + break; + } + + case s_headers_almost_done: + headers_almost_done: + { + STRICT_CHECK(ch != LF); + + if (parser->flags & F_TRAILING) { + /* End of a chunked request */ + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + break; + } + + nread = 0; + + if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) { + parser->upgrade = 1; + } + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of recieving a response to a HEAD + * request. + */ + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(parser)) { + case 0: + break; + + case 1: + parser->flags |= F_SKIPBODY; + break; + + default: + parser->state = state; + return p - data; /* Error */ + } + } + + /* Exit, the rest of the connect is in a different protocol. */ + if (parser->upgrade) { + CALLBACK2(message_complete); + return (p - data); + } + + if (parser->flags & F_SKIPBODY) { + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header */ + state = s_chunk_size_start; + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } else if (parser->content_length > 0) { + /* Content-Length header given and non-zero */ + state = s_body_identity; + } else { + if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) { + /* Assume content-length 0 - read the next */ + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } else { + /* Read body until EOF */ + state = s_body_identity_eof; + } + } + } + + break; + } + + case s_body_identity: + to_read = MIN(pe - p, (int64_t)parser->content_length); + if (to_read > 0) { + if (settings->on_body) settings->on_body(parser, p, to_read); + p += to_read - 1; + parser->content_length -= to_read; + if (parser->content_length == 0) { + CALLBACK2(message_complete); + state = NEW_MESSAGE(); + } + } + break; + + /* read until EOF */ + case s_body_identity_eof: + to_read = pe - p; + if (to_read > 0) { + if (settings->on_body) settings->on_body(parser, p, to_read); + p += to_read - 1; + } + break; + + case s_chunk_size_start: + { + assert(nread == 1); + assert(parser->flags & F_CHUNKED); + + c = unhex[(unsigned char)ch]; + if (c == -1) goto error; + parser->content_length = c; + state = s_chunk_size; + break; + } + + case s_chunk_size: + { + assert(parser->flags & F_CHUNKED); + + if (ch == CR) { + state = s_chunk_size_almost_done; + break; + } + + c = unhex[(unsigned char)ch]; + + if (c == -1) { + if (ch == ';' || ch == ' ') { + state = s_chunk_parameters; + break; + } + goto error; + } + + parser->content_length *= 16; + parser->content_length += c; + break; + } + + case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); + /* just ignore this shit. TODO check for overflow */ + if (ch == CR) { + state = s_chunk_size_almost_done; + break; + } + break; + } + + case s_chunk_size_almost_done: + { + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + + nread = 0; + + if (parser->content_length == 0) { + parser->flags |= F_TRAILING; + state = s_header_field_start; + } else { + state = s_chunk_data; + } + break; + } + + case s_chunk_data: + { + assert(parser->flags & F_CHUNKED); + + to_read = MIN(pe - p, (int64_t)(parser->content_length)); + + if (to_read > 0) { + if (settings->on_body) settings->on_body(parser, p, to_read); + p += to_read - 1; + } + + if (to_read == parser->content_length) { + state = s_chunk_data_almost_done; + } + + parser->content_length -= to_read; + break; + } + + case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != CR); + state = s_chunk_data_done; + break; + + case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + state = s_chunk_size_start; + break; + + default: + assert(0 && "unhandled state"); + goto error; + } + } + + CALLBACK_NOCLEAR(header_field); + CALLBACK_NOCLEAR(header_value); + CALLBACK_NOCLEAR(fragment); + CALLBACK_NOCLEAR(query_string); + CALLBACK_NOCLEAR(path); + CALLBACK_NOCLEAR(url); + + parser->state = state; + parser->header_state = header_state; + parser->index = index; + parser->nread = nread; + + return len; + +error: + parser->state = s_dead; + return (p - data); +} + + +int +http_should_keep_alive (http_parser *parser) +{ + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } else { + return 1; + } + } else { + /* HTTP/1.0 or earlier */ + if (parser->flags & F_CONNECTION_KEEP_ALIVE) { + return 1; + } else { + return 0; + } + } +} + + +const char * http_method_str (enum http_method m) +{ + return method_strings[m]; +} + + +void +http_parser_init (http_parser *parser, enum http_parser_type t) +{ + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); + parser->nread = 0; + parser->upgrade = 0; + parser->flags = 0; + parser->method = 0; +} diff --git a/http_parser/http_parser.h b/http_parser/http_parser.h new file mode 100644 index 0000000..9c7a26d --- /dev/null +++ b/http_parser/http_parser.h @@ -0,0 +1,183 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +#define HTTP_PARSER_VERSION_MAJOR 1 +#define HTTP_PARSER_VERSION_MINOR 0 + +#include +#if defined(_WIN32) && !defined(__MINGW32__) +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; + +typedef unsigned int size_t; +typedef int ssize_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#else +# define HTTP_PARSER_STRICT 0 +#endif + + +/* Maximium header size allowed */ +#define HTTP_MAX_HEADER_SIZE (80*1024) + + +typedef struct http_parser http_parser; +typedef struct http_parser_settings http_parser_settings; + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * http_data_cb does not return data chunks. It will be call arbitrarally + * many times for each string. E.G. you might get 10 callbacks for "on_path" + * each providing just a few characters more data. + */ +typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); +typedef int (*http_cb) (http_parser*); + + +/* Request Methods */ +enum http_method + { HTTP_DELETE = 0 + , HTTP_GET + , HTTP_HEAD + , HTTP_POST + , HTTP_PUT + /* pathological */ + , HTTP_CONNECT + , HTTP_OPTIONS + , HTTP_TRACE + /* webdav */ + , HTTP_COPY + , HTTP_LOCK + , HTTP_MKCOL + , HTTP_MOVE + , HTTP_PROPFIND + , HTTP_PROPPATCH + , HTTP_UNLOCK + /* subversion */ + , HTTP_REPORT + , HTTP_MKACTIVITY + , HTTP_CHECKOUT + , HTTP_MERGE + /* upnp */ + , HTTP_MSEARCH + , HTTP_NOTIFY + , HTTP_SUBSCRIBE + , HTTP_UNSUBSCRIBE + }; + + +enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; + + +struct http_parser { + /** PRIVATE **/ + unsigned char type : 2; + unsigned char flags : 6; + unsigned char state; + unsigned char header_state; + unsigned char index; + + uint32_t nread; + int64_t content_length; + + /** READ-ONLY **/ + unsigned short http_major; + unsigned short http_minor; + unsigned short status_code; /* responses only */ + unsigned char method; /* requests only */ + + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + char upgrade; + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ +}; + + +struct http_parser_settings { + http_cb on_message_begin; + http_data_cb on_path; + http_data_cb on_query_string; + http_data_cb on_url; + http_data_cb on_fragment; + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; +}; + + +void http_parser_init(http_parser *parser, enum http_parser_type type); + + +size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns true, then this will be should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ +int http_should_keep_alive(http_parser *parser); + +/* Returns a string version of the HTTP method. */ +const char *http_method_str(enum http_method); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/http_parser/test.c b/http_parser/test.c new file mode 100644 index 0000000..4a93163 --- /dev/null +++ b/http_parser/test.c @@ -0,0 +1,1952 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include +#include +#include /* rand */ +#include +#include + +#undef TRUE +#define TRUE 1 +#undef FALSE +#define FALSE 0 + +#define MAX_HEADERS 13 +#define MAX_ELEMENT_SIZE 500 + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +static http_parser *parser; + +struct message { + const char *name; // for debugging purposes + const char *raw; + enum http_parser_type type; + enum http_method method; + int status_code; + char request_path[MAX_ELEMENT_SIZE]; + char request_url[MAX_ELEMENT_SIZE]; + char fragment[MAX_ELEMENT_SIZE]; + char query_string[MAX_ELEMENT_SIZE]; + char body[MAX_ELEMENT_SIZE]; + size_t body_size; + int num_headers; + enum { NONE=0, FIELD, VALUE } last_header_element; + char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE]; + int should_keep_alive; + + int upgrade; + + unsigned short http_major; + unsigned short http_minor; + + int message_begin_cb_called; + int headers_complete_cb_called; + int message_complete_cb_called; + int message_complete_on_eof; +}; + +static int currently_parsing_eof; + +static struct message messages[5]; +static int num_messages; + +/* * R E Q U E S T S * */ +const struct message requests[] = +#define CURL_GET 0 +{ {.name= "curl get" + ,.type= HTTP_REQUEST + ,.raw= "GET /test HTTP/1.1\r\n" + "User-Agent: curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1\r\n" + "Host: 0.0.0.0=5000\r\n" + "Accept: */*\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 3 + ,.headers= + { { "User-Agent", "curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1" } + , { "Host", "0.0.0.0=5000" } + , { "Accept", "*/*" } + } + ,.body= "" + } + +#define FIREFOX_GET 1 +, {.name= "firefox get" + ,.type= HTTP_REQUEST + ,.raw= "GET /favicon.ico HTTP/1.1\r\n" + "Host: 0.0.0.0=5000\r\n" + "User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0\r\n" + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" + "Accept-Language: en-us,en;q=0.5\r\n" + "Accept-Encoding: gzip,deflate\r\n" + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" + "Keep-Alive: 300\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/favicon.ico" + ,.request_url= "/favicon.ico" + ,.num_headers= 8 + ,.headers= + { { "Host", "0.0.0.0=5000" } + , { "User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0" } + , { "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" } + , { "Accept-Language", "en-us,en;q=0.5" } + , { "Accept-Encoding", "gzip,deflate" } + , { "Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7" } + , { "Keep-Alive", "300" } + , { "Connection", "keep-alive" } + } + ,.body= "" + } + +#define DUMBFUCK 2 +, {.name= "dumbfuck" + ,.type= HTTP_REQUEST + ,.raw= "GET /dumbfuck HTTP/1.1\r\n" + "aaaaaaaaaaaaa:++++++++++\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/dumbfuck" + ,.request_url= "/dumbfuck" + ,.num_headers= 1 + ,.headers= + { { "aaaaaaaaaaaaa", "++++++++++" } + } + ,.body= "" + } + +#define FRAGMENT_IN_URI 3 +, {.name= "fragment in url" + ,.type= HTTP_REQUEST + ,.raw= "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "page=1" + ,.fragment= "posts-17408" + ,.request_path= "/forums/1/topics/2375" + /* XXX request url does include fragment? */ + ,.request_url= "/forums/1/topics/2375?page=1#posts-17408" + ,.num_headers= 0 + ,.body= "" + } + +#define GET_NO_HEADERS_NO_BODY 4 +, {.name= "get no headers no body" + ,.type= HTTP_REQUEST + ,.raw= "GET /get_no_headers_no_body/world HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE /* would need Connection: close */ + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_no_headers_no_body/world" + ,.request_url= "/get_no_headers_no_body/world" + ,.num_headers= 0 + ,.body= "" + } + +#define GET_ONE_HEADER_NO_BODY 5 +, {.name= "get one header no body" + ,.type= HTTP_REQUEST + ,.raw= "GET /get_one_header_no_body HTTP/1.1\r\n" + "Accept: */*\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE /* would need Connection: close */ + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_one_header_no_body" + ,.request_url= "/get_one_header_no_body" + ,.num_headers= 1 + ,.headers= + { { "Accept" , "*/*" } + } + ,.body= "" + } + +#define GET_FUNKY_CONTENT_LENGTH 6 +, {.name= "get funky content length body hello" + ,.type= HTTP_REQUEST + ,.raw= "GET /get_funky_content_length_body_hello HTTP/1.0\r\n" + "conTENT-Length: 5\r\n" + "\r\n" + "HELLO" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_funky_content_length_body_hello" + ,.request_url= "/get_funky_content_length_body_hello" + ,.num_headers= 1 + ,.headers= + { { "conTENT-Length" , "5" } + } + ,.body= "HELLO" + } + +#define POST_IDENTITY_BODY_WORLD 7 +, {.name= "post identity body world" + ,.type= HTTP_REQUEST + ,.raw= "POST /post_identity_body_world?q=search#hey HTTP/1.1\r\n" + "Accept: */*\r\n" + "Transfer-Encoding: identity\r\n" + "Content-Length: 5\r\n" + "\r\n" + "World" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "q=search" + ,.fragment= "hey" + ,.request_path= "/post_identity_body_world" + ,.request_url= "/post_identity_body_world?q=search#hey" + ,.num_headers= 3 + ,.headers= + { { "Accept", "*/*" } + , { "Transfer-Encoding", "identity" } + , { "Content-Length", "5" } + } + ,.body= "World" + } + +#define POST_CHUNKED_ALL_YOUR_BASE 8 +, {.name= "post - chunked body: all your base are belong to us" + ,.type= HTTP_REQUEST + ,.raw= "POST /post_chunked_all_your_base HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "1e\r\nall your base are belong to us\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/post_chunked_all_your_base" + ,.request_url= "/post_chunked_all_your_base" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding" , "chunked" } + } + ,.body= "all your base are belong to us" + } + +#define TWO_CHUNKS_MULT_ZERO_END 9 +, {.name= "two chunks ; triple zero ending" + ,.type= HTTP_REQUEST + ,.raw= "POST /two_chunks_mult_zero_end HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5\r\nhello\r\n" + "6\r\n world\r\n" + "000\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/two_chunks_mult_zero_end" + ,.request_url= "/two_chunks_mult_zero_end" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding", "chunked" } + } + ,.body= "hello world" + } + +#define CHUNKED_W_TRAILING_HEADERS 10 +, {.name= "chunked with trailing headers. blech." + ,.type= HTTP_REQUEST + ,.raw= "POST /chunked_w_trailing_headers HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5\r\nhello\r\n" + "6\r\n world\r\n" + "0\r\n" + "Vary: *\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/chunked_w_trailing_headers" + ,.request_url= "/chunked_w_trailing_headers" + ,.num_headers= 3 + ,.headers= + { { "Transfer-Encoding", "chunked" } + , { "Vary", "*" } + , { "Content-Type", "text/plain" } + } + ,.body= "hello world" + } + +#define CHUNKED_W_BULLSHIT_AFTER_LENGTH 11 +, {.name= "with bullshit after the length" + ,.type= HTTP_REQUEST + ,.raw= "POST /chunked_w_bullshit_after_length HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5; ihatew3;whatthefuck=aretheseparametersfor\r\nhello\r\n" + "6; blahblah; blah\r\n world\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/chunked_w_bullshit_after_length" + ,.request_url= "/chunked_w_bullshit_after_length" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding", "chunked" } + } + ,.body= "hello world" + } + +#define WITH_QUOTES 12 +, {.name= "with quotes" + ,.type= HTTP_REQUEST + ,.raw= "GET /with_\"stupid\"_quotes?foo=\"bar\" HTTP/1.1\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "foo=\"bar\"" + ,.fragment= "" + ,.request_path= "/with_\"stupid\"_quotes" + ,.request_url= "/with_\"stupid\"_quotes?foo=\"bar\"" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define APACHEBENCH_GET 13 +/* The server receiving this request SHOULD NOT wait for EOF + * to know that content-length == 0. + * How to represent this in a unit test? message_complete_on_eof + * Compare with NO_CONTENT_LENGTH_RESPONSE. + */ +, {.name = "apachebench get" + ,.type= HTTP_REQUEST + ,.raw= "GET /test HTTP/1.0\r\n" + "Host: 0.0.0.0:5000\r\n" + "User-Agent: ApacheBench/2.3\r\n" + "Accept: */*\r\n\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 3 + ,.headers= { { "Host", "0.0.0.0:5000" } + , { "User-Agent", "ApacheBench/2.3" } + , { "Accept", "*/*" } + } + ,.body= "" + } + +#define QUERY_URL_WITH_QUESTION_MARK_GET 14 +/* Some clients include '?' characters in query strings. + */ +, {.name = "query url with question mark" + ,.type= HTTP_REQUEST + ,.raw= "GET /test.cgi?foo=bar?baz HTTP/1.1\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "foo=bar?baz" + ,.fragment= "" + ,.request_path= "/test.cgi" + ,.request_url= "/test.cgi?foo=bar?baz" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define PREFIX_NEWLINE_GET 15 +/* Some clients, especially after a POST in a keep-alive connection, + * will send an extra CRLF before the next request + */ +, {.name = "newline prefix get" + ,.type= HTTP_REQUEST + ,.raw= "\r\nGET /test HTTP/1.1\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define UPGRADE_REQUEST 16 +, {.name = "upgrade request" + ,.type= HTTP_REQUEST + ,.raw= "GET /demo HTTP/1.1\r\n" + "Host: example.com\r\n" + "Connection: Upgrade\r\n" + "Sec-WebSocket-Key2: 12998 5 Y3 1 .P00\r\n" + "Sec-WebSocket-Protocol: sample\r\n" + "Upgrade: WebSocket\r\n" + "Sec-WebSocket-Key1: 4 @1 46546xW%0l 1 5\r\n" + "Origin: http://example.com\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/demo" + ,.request_url= "/demo" + ,.num_headers= 7 + ,.upgrade=1 + ,.headers= { { "Host", "example.com" } + , { "Connection", "Upgrade" } + , { "Sec-WebSocket-Key2", "12998 5 Y3 1 .P00" } + , { "Sec-WebSocket-Protocol", "sample" } + , { "Upgrade", "WebSocket" } + , { "Sec-WebSocket-Key1", "4 @1 46546xW%0l 1 5" } + , { "Origin", "http://example.com" } + } + ,.body= "" + } + +#define CONNECT_REQUEST 17 +, {.name = "connect request" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT home0.netscape.com:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "home0.netscape.com:443" + ,.num_headers= 2 + ,.upgrade=1 + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + } + ,.body= "" + } + +#define REPORT_REQ 18 +, {.name= "report request" + ,.type= HTTP_REQUEST + ,.raw= "REPORT /test HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_REPORT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define NO_HTTP_VERSION 19 +, {.name= "request with no http version" + ,.type= HTTP_REQUEST + ,.raw= "GET /\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 0 + ,.http_minor= 9 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define MSEARCH_REQ 20 +, {.name= "m-search request" + ,.type= HTTP_REQUEST + ,.raw= "M-SEARCH * HTTP/1.1\r\n" + "HOST: 239.255.255.250:1900\r\n" + "MAN: \"ssdp:discover\"\r\n" + "ST: \"ssdp:all\"\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_MSEARCH + ,.query_string= "" + ,.fragment= "" + ,.request_path= "*" + ,.request_url= "*" + ,.num_headers= 3 + ,.headers= { { "HOST", "239.255.255.250:1900" } + , { "MAN", "\"ssdp:discover\"" } + , { "ST", "\"ssdp:all\"" } + } + ,.body= "" + } + +#define UTF8_PATH_REQ 21 +, {.name= "utf-8 path request" + ,.type= HTTP_REQUEST + ,.raw= "GET /ホエツカ/ホエt/pope?q=1#narf HTTP/1.1\r\n" + "Host: github.com\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "q=1" + ,.fragment= "narf" + ,.request_path= "/ホエツカ/ホエt/pope" + ,.request_url= "/ホエツカ/ホエt/pope?q=1#narf" + ,.num_headers= 1 + ,.headers= { {"Host", "github.com" } + } + ,.body= "" + } + +#define QUERY_TERMINATED_HOST 22 +, {.name= "host terminated by a query string" + ,.type= HTTP_REQUEST + ,.raw= "GET http://hypnotoad.org?hail=all HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "hail=all" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "http://hypnotoad.org?hail=all" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define QUERY_TERMINATED_HOSTPORT 23 +, {.name= "host:port terminated by a query string" + ,.type= HTTP_REQUEST + ,.raw= "GET http://hypnotoad.org:1234?hail=all HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "hail=all" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "http://hypnotoad.org:1234?hail=all" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define SPACE_TERMINATED_HOSTPORT 24 +, {.name= "host:port terminated by a space" + ,.type= HTTP_REQUEST + ,.raw= "GET http://hypnotoad.org:1234 HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "http://hypnotoad.org:1234" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +, {.name= NULL } /* sentinel */ +}; + +/* * R E S P O N S E S * */ +const struct message responses[] = +#define GOOGLE_301 0 +{ {.name= "google 301" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 301 Moved Permanently\r\n" + "Location: http://www.google.com/\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Date: Sun, 26 Apr 2009 11:11:49 GMT\r\n" + "Expires: Tue, 26 May 2009 11:11:49 GMT\r\n" + "X-$PrototypeBI-Version: 1.6.0.3\r\n" /* $ char in header field */ + "Cache-Control: public, max-age=2592000\r\n" + "Server: gws\r\n" + "Content-Length: 219 \r\n" + "\r\n" + "\n" + "301 Moved\n" + "

301 Moved

\n" + "The document has moved\n" + "here.\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 301 + ,.num_headers= 8 + ,.headers= + { { "Location", "http://www.google.com/" } + , { "Content-Type", "text/html; charset=UTF-8" } + , { "Date", "Sun, 26 Apr 2009 11:11:49 GMT" } + , { "Expires", "Tue, 26 May 2009 11:11:49 GMT" } + , { "X-$PrototypeBI-Version", "1.6.0.3" } + , { "Cache-Control", "public, max-age=2592000" } + , { "Server", "gws" } + , { "Content-Length", "219 " } + } + ,.body= "\n" + "301 Moved\n" + "

301 Moved

\n" + "The document has moved\n" + "here.\r\n" + "\r\n" + } + +#define NO_CONTENT_LENGTH_RESPONSE 1 +/* The client should wait for the server's EOF. That is, when content-length + * is not specified, and "Connection: close", the end of body is specified + * by the EOF. + * Compare with APACHEBENCH_GET + */ +, {.name= "no content-length response" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Date: Tue, 04 Aug 2009 07:59:32 GMT\r\n" + "Server: Apache\r\n" + "X-Powered-By: Servlet/2.5 JSP/2.1\r\n" + "Content-Type: text/xml; charset=utf-8\r\n" + "Connection: close\r\n" + "\r\n" + "\n" + "\n" + " \n" + " \n" + " SOAP-ENV:Client\n" + " Client Error\n" + " \n" + " \n" + "" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 5 + ,.headers= + { { "Date", "Tue, 04 Aug 2009 07:59:32 GMT" } + , { "Server", "Apache" } + , { "X-Powered-By", "Servlet/2.5 JSP/2.1" } + , { "Content-Type", "text/xml; charset=utf-8" } + , { "Connection", "close" } + } + ,.body= "\n" + "\n" + " \n" + " \n" + " SOAP-ENV:Client\n" + " Client Error\n" + " \n" + " \n" + "" + } + +#define NO_HEADERS_NO_BODY_404 2 +, {.name= "404 no headers no body" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 404 Not Found\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 404 + ,.num_headers= 0 + ,.headers= {} + ,.body_size= 0 + ,.body= "" + } + +#define NO_REASON_PHRASE 3 +, {.name= "301 no response phrase" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 301\r\n\r\n" + ,.should_keep_alive = TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 301 + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define TRAILING_SPACE_ON_CHUNKED_BODY 4 +, {.name="200 trailing space on chunked body" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Content-Type: text/plain\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "25 \r\n" + "This is the data in the first chunk\r\n" + "\r\n" + "1C\r\n" + "and this is the second one\r\n" + "\r\n" + "0 \r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 2 + ,.headers= + { {"Content-Type", "text/plain" } + , {"Transfer-Encoding", "chunked" } + } + ,.body_size = 37+28 + ,.body = + "This is the data in the first chunk\r\n" + "and this is the second one\r\n" + + } + +#define NO_CARRIAGE_RET 5 +, {.name="no carriage ret" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\n" + "Content-Type: text/html; charset=utf-8\n" + "Connection: close\n" + "\n" + "these headers are from http://news.ycombinator.com/" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 2 + ,.headers= + { {"Content-Type", "text/html; charset=utf-8" } + , {"Connection", "close" } + } + ,.body= "these headers are from http://news.ycombinator.com/" + } + +#define PROXY_CONNECTION 6 +, {.name="proxy connection" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Content-Length: 11\r\n" + "Proxy-Connection: close\r\n" + "Date: Thu, 31 Dec 2009 20:55:48 +0000\r\n" + "\r\n" + "hello world" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 4 + ,.headers= + { {"Content-Type", "text/html; charset=UTF-8" } + , {"Content-Length", "11" } + , {"Proxy-Connection", "close" } + , {"Date", "Thu, 31 Dec 2009 20:55:48 +0000"} + } + ,.body= "hello world" + } + +#define UNDERSTORE_HEADER_KEY 7 + // shown by + // curl -o /dev/null -v "http://ad.doubleclick.net/pfadx/DARTSHELLCONFIGXML;dcmt=text/xml;" +, {.name="underscore header key" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Server: DCLK-AdSvr\r\n" + "Content-Type: text/xml\r\n" + "Content-Length: 0\r\n" + "DCLK_imp: v7;x;114750856;0-0;0;17820020;0/0;21603567/21621457/1;;~okv=;dcmt=text/xml;;~cs=o\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 4 + ,.headers= + { {"Server", "DCLK-AdSvr" } + , {"Content-Type", "text/xml" } + , {"Content-Length", "0" } + , {"DCLK_imp", "v7;x;114750856;0-0;0;17820020;0/0;21603567/21621457/1;;~okv=;dcmt=text/xml;;~cs=o" } + } + ,.body= "" + } + +#define BONJOUR_MADAME_FR 8 +/* The client should not merge two headers fields when the first one doesn't + * have a value. + */ +, {.name= "bonjourmadame.fr" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.0 301 Moved Permanently\r\n" + "Date: Thu, 03 Jun 2010 09:56:32 GMT\r\n" + "Server: Apache/2.2.3 (Red Hat)\r\n" + "Cache-Control: public\r\n" + "Pragma: \r\n" + "Location: http://www.bonjourmadame.fr/\r\n" + "Vary: Accept-Encoding\r\n" + "Content-Length: 0\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 301 + ,.num_headers= 9 + ,.headers= + { { "Date", "Thu, 03 Jun 2010 09:56:32 GMT" } + , { "Server", "Apache/2.2.3 (Red Hat)" } + , { "Cache-Control", "public" } + , { "Pragma", "" } + , { "Location", "http://www.bonjourmadame.fr/" } + , { "Vary", "Accept-Encoding" } + , { "Content-Length", "0" } + , { "Content-Type", "text/html; charset=UTF-8" } + , { "Connection", "keep-alive" } + } + ,.body= "" + } + +#define SPACE_IN_FIELD_RES 9 +/* Should handle spaces in header fields */ +, {.name= "field space" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Server: Microsoft-IIS/6.0\r\n" + "X-Powered-By: ASP.NET\r\n" + "en-US Content-Type: text/xml\r\n" /* this is the problem */ + "Content-Type: text/xml\r\n" + "Content-Length: 16\r\n" + "Date: Fri, 23 Jul 2010 18:45:38 GMT\r\n" + "Connection: keep-alive\r\n" + "\r\n" + "hello" /* fake body */ + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 7 + ,.headers= + { { "Server", "Microsoft-IIS/6.0" } + , { "X-Powered-By", "ASP.NET" } + , { "en-US Content-Type", "text/xml" } + , { "Content-Type", "text/xml" } + , { "Content-Length", "16" } + , { "Date", "Fri, 23 Jul 2010 18:45:38 GMT" } + , { "Connection", "keep-alive" } + } + ,.body= "hello" + } + + +#define RES_FIELD_UNDERSCORE 10 +/* Should handle spaces in header fields */ +, {.name= "field underscore" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Date: Tue, 28 Sep 2010 01:14:13 GMT\r\n" + "Server: Apache\r\n" + "Cache-Control: no-cache, must-revalidate\r\n" + "Expires: Mon, 26 Jul 1997 05:00:00 GMT\r\n" + ".et-Cookie: PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com\r\n" + "Vary: Accept-Encoding\r\n" + "_eep-Alive: timeout=45\r\n" /* semantic value ignored */ + "_onnection: Keep-Alive\r\n" /* semantic value ignored */ + "Transfer-Encoding: chunked\r\n" + "Content-Type: text/html\r\n" + "Connection: close\r\n" + "\r\n" + "0\r\n\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.num_headers= 11 + ,.headers= + { { "Date", "Tue, 28 Sep 2010 01:14:13 GMT" } + , { "Server", "Apache" } + , { "Cache-Control", "no-cache, must-revalidate" } + , { "Expires", "Mon, 26 Jul 1997 05:00:00 GMT" } + , { ".et-Cookie", "PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com" } + , { "Vary", "Accept-Encoding" } + , { "_eep-Alive", "timeout=45" } + , { "_onnection", "Keep-Alive" } + , { "Transfer-Encoding", "chunked" } + , { "Content-Type", "text/html" } + , { "Connection", "close" } + } + ,.body= "" + } + +#define NON_ASCII_IN_STATUS_LINE 11 +/* Should handle non-ASCII in status line */ +, {.name= "non-ASCII in status line" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 500 Oriテォntatieprobleem\r\n" + "Date: Fri, 5 Nov 2010 23:07:12 GMT+2\r\n" + "Content-Length: 0\r\n" + "Connection: close\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 500 + ,.num_headers= 3 + ,.headers= + { { "Date", "Fri, 5 Nov 2010 23:07:12 GMT+2" } + , { "Content-Length", "0" } + , { "Connection", "close" } + } + ,.body= "" + } + + +, {.name= NULL } /* sentinel */ +}; + +int +request_path_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + strncat(messages[num_messages].request_path, buf, len); + return 0; +} + +int +request_url_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + strncat(messages[num_messages].request_url, buf, len); + return 0; +} + +int +query_string_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + strncat(messages[num_messages].query_string, buf, len); + return 0; +} + +int +fragment_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + strncat(messages[num_messages].fragment, buf, len); + return 0; +} + +int +header_field_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + struct message *m = &messages[num_messages]; + + if (m->last_header_element != FIELD) + m->num_headers++; + + strncat(m->headers[m->num_headers-1][0], buf, len); + + m->last_header_element = FIELD; + + return 0; +} + +int +header_value_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + struct message *m = &messages[num_messages]; + + strncat(m->headers[m->num_headers-1][1], buf, len); + + m->last_header_element = VALUE; + + return 0; +} + +int +body_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + strncat(messages[num_messages].body, buf, len); + messages[num_messages].body_size += len; + // printf("body_cb: '%s'\n", requests[num_messages].body); + return 0; +} + +int +count_body_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == parser); + assert(buf); + messages[num_messages].body_size += len; + return 0; +} + +int +message_begin_cb (http_parser *p) +{ + assert(p == parser); + messages[num_messages].message_begin_cb_called = TRUE; + return 0; +} + +int +headers_complete_cb (http_parser *p) +{ + assert(p == parser); + messages[num_messages].method = parser->method; + messages[num_messages].status_code = parser->status_code; + messages[num_messages].http_major = parser->http_major; + messages[num_messages].http_minor = parser->http_minor; + messages[num_messages].headers_complete_cb_called = TRUE; + messages[num_messages].should_keep_alive = http_should_keep_alive(parser); + return 0; +} + +int +message_complete_cb (http_parser *p) +{ + assert(p == parser); + if (messages[num_messages].should_keep_alive != http_should_keep_alive(parser)) + { + fprintf(stderr, "\n\n *** Error http_should_keep_alive() should have same " + "value in both on_message_complete and on_headers_complete " + "but it doesn't! ***\n\n"); + assert(0); + exit(1); + } + messages[num_messages].message_complete_cb_called = TRUE; + + messages[num_messages].message_complete_on_eof = currently_parsing_eof; + + num_messages++; + return 0; +} + +static http_parser_settings settings = + {.on_message_begin = message_begin_cb + ,.on_header_field = header_field_cb + ,.on_header_value = header_value_cb + ,.on_path = request_path_cb + ,.on_url = request_url_cb + ,.on_fragment = fragment_cb + ,.on_query_string = query_string_cb + ,.on_body = body_cb + ,.on_headers_complete = headers_complete_cb + ,.on_message_complete = message_complete_cb + }; + +static http_parser_settings settings_count_body = + {.on_message_begin = message_begin_cb + ,.on_header_field = header_field_cb + ,.on_header_value = header_value_cb + ,.on_path = request_path_cb + ,.on_url = request_url_cb + ,.on_fragment = fragment_cb + ,.on_query_string = query_string_cb + ,.on_body = count_body_cb + ,.on_headers_complete = headers_complete_cb + ,.on_message_complete = message_complete_cb + }; + +static http_parser_settings settings_null = + {.on_message_begin = 0 + ,.on_header_field = 0 + ,.on_header_value = 0 + ,.on_path = 0 + ,.on_url = 0 + ,.on_fragment = 0 + ,.on_query_string = 0 + ,.on_body = 0 + ,.on_headers_complete = 0 + ,.on_message_complete = 0 + }; + +void +parser_init (enum http_parser_type type) +{ + num_messages = 0; + + assert(parser == NULL); + + parser = malloc(sizeof(http_parser)); + + http_parser_init(parser, type); + + memset(&messages, 0, sizeof messages); + +} + +void +parser_free () +{ + assert(parser); + free(parser); + parser = NULL; +} + +size_t parse (const char *buf, size_t len) +{ + size_t nparsed; + currently_parsing_eof = (len == 0); + nparsed = http_parser_execute(parser, &settings, buf, len); + return nparsed; +} + +size_t parse_count_body (const char *buf, size_t len) +{ + size_t nparsed; + currently_parsing_eof = (len == 0); + nparsed = http_parser_execute(parser, &settings_count_body, buf, len); + return nparsed; +} + +static inline int +check_str_eq (const struct message *m, + const char *prop, + const char *expected, + const char *found) { + if (0 != strcmp(expected, found)) { + printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); + printf("expected '%s'\n", expected); + printf(" found '%s'\n", found); + return 0; + } + return 1; +} + +static inline int +check_num_eq (const struct message *m, + const char *prop, + int expected, + int found) { + if (expected != found) { + printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); + printf("expected %d\n", expected); + printf(" found %d\n", found); + return 0; + } + return 1; +} + +#define MESSAGE_CHECK_STR_EQ(expected, found, prop) \ + if (!check_str_eq(expected, #prop, expected->prop, found->prop)) return 0 + +#define MESSAGE_CHECK_NUM_EQ(expected, found, prop) \ + if (!check_num_eq(expected, #prop, expected->prop, found->prop)) return 0 + + +int +message_eq (int index, const struct message *expected) +{ + int i; + struct message *m = &messages[index]; + + MESSAGE_CHECK_NUM_EQ(expected, m, http_major); + MESSAGE_CHECK_NUM_EQ(expected, m, http_minor); + + if (expected->type == HTTP_REQUEST) { + MESSAGE_CHECK_NUM_EQ(expected, m, method); + } else { + MESSAGE_CHECK_NUM_EQ(expected, m, status_code); + } + + MESSAGE_CHECK_NUM_EQ(expected, m, should_keep_alive); + MESSAGE_CHECK_NUM_EQ(expected, m, message_complete_on_eof); + + assert(m->message_begin_cb_called); + assert(m->headers_complete_cb_called); + assert(m->message_complete_cb_called); + + + MESSAGE_CHECK_STR_EQ(expected, m, request_path); + MESSAGE_CHECK_STR_EQ(expected, m, query_string); + MESSAGE_CHECK_STR_EQ(expected, m, fragment); + MESSAGE_CHECK_STR_EQ(expected, m, request_url); + if (expected->body_size) { + MESSAGE_CHECK_NUM_EQ(expected, m, body_size); + } else { + MESSAGE_CHECK_STR_EQ(expected, m, body); + } + + MESSAGE_CHECK_NUM_EQ(expected, m, num_headers); + + int r; + for (i = 0; i < m->num_headers; i++) { + r = check_str_eq(expected, "header field", expected->headers[i][0], m->headers[i][0]); + if (!r) return 0; + r = check_str_eq(expected, "header value", expected->headers[i][1], m->headers[i][1]); + if (!r) return 0; + } + + return 1; +} + +static void +print_error (const char *raw, size_t error_location) +{ + fprintf(stderr, "\n*** parse error ***\n\n"); + + int this_line = 0, char_len = 0; + size_t i, j, len = strlen(raw), error_location_line = 0; + for (i = 0; i < len; i++) { + if (i == error_location) this_line = 1; + switch (raw[i]) { + case '\r': + char_len = 2; + fprintf(stderr, "\\r"); + break; + + case '\n': + char_len = 2; + fprintf(stderr, "\\n\n"); + + if (this_line) goto print; + + error_location_line = 0; + continue; + + default: + char_len = 1; + fputc(raw[i], stderr); + break; + } + if (!this_line) error_location_line += char_len; + } + + fprintf(stderr, "[eof]\n"); + + print: + for (j = 0; j < error_location_line; j++) { + fputc(' ', stderr); + } + fprintf(stderr, "^\n\nerror location: %u\n", (unsigned int)error_location); +} + + +void +test_message (const struct message *message) +{ + size_t raw_len = strlen(message->raw); + size_t msg1len; + for (msg1len = 0; msg1len < raw_len; msg1len++) { + parser_init(message->type); + + size_t read; + const char *msg1 = message->raw; + const char *msg2 = msg1 + msg1len; + size_t msg2len = raw_len - msg1len; + + if (msg1len) { + read = parse(msg1, msg1len); + + if (message->upgrade && parser->upgrade) goto test; + + if (read != msg1len) { + print_error(msg1, read); + exit(1); + } + } + + + read = parse(msg2, msg2len); + + if (message->upgrade && parser->upgrade) goto test; + + if (read != msg2len) { + print_error(msg2, read); + exit(1); + } + + read = parse(NULL, 0); + + if (message->upgrade && parser->upgrade) goto test; + + if (read != 0) { + print_error(message->raw, read); + exit(1); + } + + test: + + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", message->name); + exit(1); + } + + if(!message_eq(0, message)) exit(1); + + parser_free(); + } +} + +void +test_message_count_body (const struct message *message) +{ + parser_init(message->type); + + size_t read; + size_t l = strlen(message->raw); + size_t i, toread; + size_t chunk = 4024; + + for (i = 0; i < l; i+= chunk) { + toread = MIN(l-i, chunk); + read = parse_count_body(message->raw + i, toread); + if (read != toread) { + print_error(message->raw, read); + exit(1); + } + } + + + read = parse_count_body(NULL, 0); + if (read != 0) { + print_error(message->raw, read); + exit(1); + } + + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", message->name); + exit(1); + } + + if(!message_eq(0, message)) exit(1); + + parser_free(); +} + +void +test_simple (const char *buf, int should_pass) +{ + parser_init(HTTP_REQUEST); + + size_t parsed; + int pass; + parsed = parse(buf, strlen(buf)); + pass = (parsed == strlen(buf)); + parsed = parse(NULL, 0); + pass &= (parsed == 0); + + parser_free(); + + if (pass != should_pass) { + fprintf(stderr, "\n*** test_simple expected %s ***\n\n%s", should_pass ? "success" : "error", buf); + exit(1); + } +} + +void +test_header_overflow_error (int req) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + const char *buf; + buf = req ? "GET / HTTP/1.1\r\n" : "HTTP/1.0 200 OK\r\n"; + parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf)); + assert(parsed == strlen(buf)); + + buf = "header-key: header-value\r\n"; + int i; + for (i = 0; i < 10000; i++) { + if (http_parser_execute(&parser, &settings_null, buf, strlen(buf)) != strlen(buf)) { + //fprintf(stderr, "error found on iter %d\n", i); + return; + } + } + + fprintf(stderr, "\n*** Error expected but none in header overflow test ***\n"); + exit(1); +} + +void +test_no_overflow_long_body (int req, size_t length) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + size_t i; + char buf1[3000]; + size_t buf1len = sprintf(buf1, "%s\r\nConnection: Keep-Alive\r\nContent-Length: %zu\r\n\r\n", + req ? "POST / HTTP/1.0" : "HTTP/1.0 200 OK", length); + parsed = http_parser_execute(&parser, &settings_null, buf1, buf1len); + if (parsed != buf1len) + goto err; + + for (i = 0; i < length; i++) { + char foo = 'a'; + parsed = http_parser_execute(&parser, &settings_null, &foo, 1); + if (parsed != 1) + goto err; + } + + parsed = http_parser_execute(&parser, &settings_null, buf1, buf1len); + if (parsed != buf1len) goto err; + return; + + err: + fprintf(stderr, + "\n*** error in test_no_overflow_long_body %s of length %zu ***\n", + req ? "REQUEST" : "RESPONSE", + length); + exit(1); +} + +void +test_multiple3 (const struct message *r1, const struct message *r2, const struct message *r3) +{ + int message_count = 1; + if (!r1->upgrade) { + message_count++; + if (!r2->upgrade) message_count++; + } + int has_upgrade = (message_count < 3 || r3->upgrade); + + char total[ strlen(r1->raw) + + strlen(r2->raw) + + strlen(r3->raw) + + 1 + ]; + total[0] = '\0'; + + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); + + parser_init(r1->type); + + size_t read; + + read = parse(total, strlen(total)); + + if (has_upgrade && parser->upgrade) goto test; + + if (read != strlen(total)) { + print_error(total, read); + exit(1); + } + + read = parse(NULL, 0); + + if (has_upgrade && parser->upgrade) goto test; + + if (read != 0) { + print_error(total, read); + exit(1); + } + +test: + + if (message_count != num_messages) { + fprintf(stderr, "\n\n*** Parser didn't see 3 messages only %d *** \n", num_messages); + exit(1); + } + + if (!message_eq(0, r1)) exit(1); + if (message_count > 1) { + if (!message_eq(1, r2)) exit(1); + if (message_count > 2) { + if (!message_eq(2, r3)) exit(1); + } + } + + parser_free(); +} + +/* SCAN through every possible breaking to make sure the + * parser can handle getting the content in any chunks that + * might come from the socket + */ +void +test_scan (const struct message *r1, const struct message *r2, const struct message *r3) +{ + char total[80*1024] = "\0"; + char buf1[80*1024] = "\0"; + char buf2[80*1024] = "\0"; + char buf3[80*1024] = "\0"; + + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); + + size_t read; + + int total_len = strlen(total); + + int total_ops = 2 * (total_len - 1) * (total_len - 2) / 2; + int ops = 0 ; + + size_t buf1_len, buf2_len, buf3_len; + + int i,j,type_both; + for (type_both = 0; type_both < 2; type_both ++ ) { + for (j = 2; j < total_len; j ++ ) { + for (i = 1; i < j; i ++ ) { + + if (ops % 1000 == 0) { + printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops); + fflush(stdout); + } + ops += 1; + + parser_init(type_both ? HTTP_BOTH : r1->type); + + buf1_len = i; + strncpy(buf1, total, buf1_len); + buf1[buf1_len] = 0; + + buf2_len = j - i; + strncpy(buf2, total+i, buf2_len); + buf2[buf2_len] = 0; + + buf3_len = total_len - j; + strncpy(buf3, total+j, buf3_len); + buf3[buf3_len] = 0; + + read = parse(buf1, buf1_len); + + if (r3->upgrade && parser->upgrade) goto test; + + if (read != buf1_len) { + print_error(buf1, read); + goto error; + } + + read = parse(buf2, buf2_len); + + if (r3->upgrade && parser->upgrade) goto test; + + if (read != buf2_len) { + print_error(buf2, read); + goto error; + } + + read = parse(buf3, buf3_len); + + if (r3->upgrade && parser->upgrade) goto test; + + if (read != buf3_len) { + print_error(buf3, read); + goto error; + } + + parse(NULL, 0); + +test: + + if (3 != num_messages) { + fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages); + goto error; + } + + if (!message_eq(0, r1)) { + fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n"); + goto error; + } + + if (!message_eq(1, r2)) { + fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n"); + goto error; + } + + if (!message_eq(2, r3)) { + fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n"); + goto error; + } + + parser_free(); + } + } + } + puts("\b\b\b\b100%"); + return; + + error: + fprintf(stderr, "i=%d j=%d\n", i, j); + fprintf(stderr, "buf1 (%u) %s\n\n", (unsigned int)buf1_len, buf1); + fprintf(stderr, "buf2 (%u) %s\n\n", (unsigned int)buf2_len , buf2); + fprintf(stderr, "buf3 (%u) %s\n", (unsigned int)buf3_len, buf3); + exit(1); +} + +// user required to free the result +// string terminated by \0 +char * +create_large_chunked_message (int body_size_in_kb, const char* headers) +{ + int i; + size_t wrote = 0; + size_t headers_len = strlen(headers); + size_t bufsize = headers_len + (5+1024+2)*body_size_in_kb + 6; + char * buf = malloc(bufsize); + + memcpy(buf, headers, headers_len); + wrote += headers_len; + + for (i = 0; i < body_size_in_kb; i++) { + // write 1kb chunk into the body. + memcpy(buf + wrote, "400\r\n", 5); + wrote += 5; + memset(buf + wrote, 'C', 1024); + wrote += 1024; + strcpy(buf + wrote, "\r\n"); + wrote += 2; + } + + memcpy(buf + wrote, "0\r\n\r\n", 6); + wrote += 6; + assert(wrote == bufsize); + + return buf; +} + + +int +main (void) +{ + parser = NULL; + int i, j, k; + int request_count; + int response_count; + + printf("sizeof(http_parser) = %u\n", (unsigned int)sizeof(http_parser)); + + for (request_count = 0; requests[request_count].name; request_count++); + for (response_count = 0; responses[response_count].name; response_count++); + + //// OVERFLOW CONDITIONS + + test_header_overflow_error(HTTP_REQUEST); + test_no_overflow_long_body(HTTP_REQUEST, 1000); + test_no_overflow_long_body(HTTP_REQUEST, 100000); + + test_header_overflow_error(HTTP_RESPONSE); + test_no_overflow_long_body(HTTP_RESPONSE, 1000); + test_no_overflow_long_body(HTTP_RESPONSE, 100000); + + //// RESPONSES + + for (i = 0; i < response_count; i++) { + test_message(&responses[i]); + } + + for (i = 0; i < response_count; i++) { + if (!responses[i].should_keep_alive) continue; + for (j = 0; j < response_count; j++) { + if (!responses[j].should_keep_alive) continue; + for (k = 0; k < response_count; k++) { + test_multiple3(&responses[i], &responses[j], &responses[k]); + } + } + } + + test_message_count_body(&responses[NO_HEADERS_NO_BODY_404]); + test_message_count_body(&responses[TRAILING_SPACE_ON_CHUNKED_BODY]); + + // test very large chunked response + { + char * msg = create_large_chunked_message(31337, + "HTTP/1.0 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "Content-Type: text/plain\r\n" + "\r\n"); + struct message large_chunked = + {.name= "large chunked" + ,.type= HTTP_RESPONSE + ,.raw= msg + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 200 + ,.num_headers= 2 + ,.headers= + { { "Transfer-Encoding", "chunked" } + , { "Content-Type", "text/plain" } + } + ,.body_size= 31337*1024 + }; + test_message_count_body(&large_chunked); + free(msg); + } + + + + printf("response scan 1/2 "); + test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY] + , &responses[NO_HEADERS_NO_BODY_404] + , &responses[NO_REASON_PHRASE] + ); + + printf("response scan 2/2 "); + test_scan( &responses[BONJOUR_MADAME_FR] + , &responses[UNDERSTORE_HEADER_KEY] + , &responses[NO_CARRIAGE_RET] + ); + + puts("responses okay"); + + + /// REQUESTS + + test_simple("hello world", 0); + test_simple("GET / HTP/1.1\r\n\r\n", 0); + + + test_simple("ASDF / HTTP/1.1\r\n\r\n", 0); + test_simple("PROPPATCHA / HTTP/1.1\r\n\r\n", 0); + test_simple("GETA / HTTP/1.1\r\n\r\n", 0); + + // Well-formed but incomplete + test_simple("GET / HTTP/1.1\r\n" + "Content-Type: text/plain\r\n" + "Content-Length: 6\r\n" + "\r\n" + "fooba", + 0); + + static const char *all_methods[] = { + "DELETE", + "GET", + "HEAD", + "POST", + "PUT", + //"CONNECT", //CONNECT can't be tested like other methods, it's a tunnel + "OPTIONS", + "TRACE", + "COPY", + "LOCK", + "MKCOL", + "MOVE", + "PROPFIND", + "PROPPATCH", + "UNLOCK", + 0 }; + const char **this_method; + for (this_method = all_methods; *this_method; this_method++) { + char buf[200]; + sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); + test_simple(buf, 1); + } + + const char *dumbfuck2 = + "GET / HTTP/1.1\r\n" + "X-SSL-Bullshit: -----BEGIN CERTIFICATE-----\r\n" + "\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n" + "\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n" + "\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n" + "\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n" + "\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n" + "\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n" + "\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n" + "\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n" + "\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n" + "\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n" + "\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n" + "\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n" + "\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n" + "\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n" + "\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n" + "\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n" + "\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n" + "\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n" + "\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n" + "\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n" + "\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n" + "\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n" + "\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n" + "\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n" + "\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n" + "\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n" + "\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n" + "\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n" + "\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n" + "\tRA==\r\n" + "\t-----END CERTIFICATE-----\r\n" + "\r\n"; + test_simple(dumbfuck2, 0); + +#if 0 + // NOTE(Wed Nov 18 11:57:27 CET 2009) this seems okay. we just read body + // until EOF. + // + // no content-length + // error if there is a body without content length + const char *bad_get_no_headers_no_body = "GET /bad_get_no_headers_no_body/world HTTP/1.1\r\n" + "Accept: */*\r\n" + "\r\n" + "HELLO"; + test_simple(bad_get_no_headers_no_body, 0); +#endif + /* TODO sending junk and large headers gets rejected */ + + + /* check to make sure our predefined requests are okay */ + for (i = 0; requests[i].name; i++) { + test_message(&requests[i]); + } + + + + for (i = 0; i < request_count; i++) { + if (!requests[i].should_keep_alive) continue; + for (j = 0; j < request_count; j++) { + if (!requests[j].should_keep_alive) continue; + for (k = 0; k < request_count; k++) { + test_multiple3(&requests[i], &requests[j], &requests[k]); + } + } + } + + printf("request scan 1/4 "); + test_scan( &requests[GET_NO_HEADERS_NO_BODY] + , &requests[GET_ONE_HEADER_NO_BODY] + , &requests[GET_NO_HEADERS_NO_BODY] + ); + + printf("request scan 2/4 "); + test_scan( &requests[POST_CHUNKED_ALL_YOUR_BASE] + , &requests[POST_IDENTITY_BODY_WORLD] + , &requests[GET_FUNKY_CONTENT_LENGTH] + ); + + printf("request scan 3/4 "); + test_scan( &requests[TWO_CHUNKS_MULT_ZERO_END] + , &requests[CHUNKED_W_TRAILING_HEADERS] + , &requests[CHUNKED_W_BULLSHIT_AFTER_LENGTH] + ); + + printf("request scan 4/4 "); + test_scan( &requests[QUERY_URL_WITH_QUESTION_MARK_GET] + , &requests[PREFIX_NEWLINE_GET ] + , &requests[CONNECT_REQUEST] + ); + + puts("requests okay"); + + return 0; +} diff --git a/oniguruma/AUTHORS b/oniguruma/AUTHORS new file mode 100644 index 0000000..93167bd --- /dev/null +++ b/oniguruma/AUTHORS @@ -0,0 +1 @@ +sndgk393 AT ybb DOT ne DOT jp (K.Kosako) diff --git a/oniguruma/CMakeLists.txt b/oniguruma/CMakeLists.txt new file mode 100644 index 0000000..31bb498 --- /dev/null +++ b/oniguruma/CMakeLists.txt @@ -0,0 +1,49 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +INCLUDE (CheckFunctionExists) +INCLUDE (CheckIncludeFiles) +INCLUDE (CheckTypeSize) + +CHECK_FUNCTION_EXISTS(alloca C_ALLOCA) +CHECK_FUNCTION_EXISTS(memcmp HAVE_MEMCMP) + +CHECK_INCLUDE_FILES(alloca.h HAVE_ALLOCA_H) +CHECK_INCLUDE_FILES(strings.h HAVE_STRINGS_H) +CHECK_INCLUDE_FILES(string.h HAVE_STRING_H) +CHECK_INCLUDE_FILES(stdlib.h HAVE_STDLIB_H) +CHECK_INCLUDE_FILES(sys/time.h HAVE_SYS_TIME_H) +CHECK_INCLUDE_FILES(sys/times.h HAVE_SYS_TIMES_H) +CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H) +CHECK_INCLUDE_FILES(memory.h HAVE_MEMORY_H) +CHECK_INCLUDE_FILES(stdarg.h HAVE_STDARG_PROTOTYPES) + +CHECK_TYPE_SIZE("int" SIZEOF_INT) +CHECK_TYPE_SIZE("long" SIZEOF_LONG) +CHECK_TYPE_SIZE("short" SIZEOF_SHORT) + +CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +set(SOURCES regint.h regparse.h regenc.h st.h +regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c +regenc.c regsyntax.c regtrav.c regversion.c st.c +regposix.c regposerr.c +enc/unicode.c enc/ascii.c enc/utf8.c +enc/utf16_be.c enc/utf16_le.c +enc/utf32_be.c enc/utf32_le.c +enc/euc_jp.c enc/sjis.c enc/iso8859_1.c +enc/iso8859_2.c enc/iso8859_3.c +enc/iso8859_4.c enc/iso8859_5.c +enc/iso8859_6.c enc/iso8859_7.c +enc/iso8859_8.c enc/iso8859_9.c +enc/iso8859_10.c enc/iso8859_11.c +enc/iso8859_13.c enc/iso8859_14.c +enc/iso8859_15.c enc/iso8859_16.c +enc/euc_tw.c enc/euc_kr.c enc/big5.c +enc/gb18030.c enc/koi8_r.c enc/cp1251.c) + + +add_library(libonig STATIC ${SOURCES}) +set_target_properties(libonig PROPERTIES OUTPUT_NAME "libonig") diff --git a/oniguruma/COPYING b/oniguruma/COPYING new file mode 100644 index 0000000..2cee0bb --- /dev/null +++ b/oniguruma/COPYING @@ -0,0 +1,28 @@ +Oniguruma LICENSE +----------------- + +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ diff --git a/oniguruma/HISTORY b/oniguruma/HISTORY new file mode 100644 index 0000000..06f38c2 --- /dev/null +++ b/oniguruma/HISTORY @@ -0,0 +1,2052 @@ +History + +2010/01/09: Version 5.9.2 + +2010/01/05: [bug] fix utf16be_code_to_mbc() and utf16le_code_to_mbc(). +2008/09/16: [bug] fix memory leaks in parse_exp(). +2008/08/01: [bug] fix memory leaks. +2008/06/17: [bug] invalid type of argument was used + in onig_st_lookup_strend(). +2008/06/16: [bug] invalid CaseFoldMap entry in ISO-8859-5. 0xdf -> 0xde +2008/02/19: [new] add: onig_reg_init(). +2008/02/19: [new] add: onig_free_body(). +2008/02/19: [new] add: onig_new_without_alloc(). +2008/02/19: [API] rename onig_alloc_init() to onig_reg_init(), + and argument type changed. +2008/01/31: [impl] move UTF16_IS_SURROGATE_XXX() to regenc.h. +2008/01/30: [bug] (thanks akr) + fix euctw_islead(). +2008/01/23: [bug] update enc/koi8.c. + +2007/12/22: Version 5.9.1 + +2007/12/21: [impl] add sprint_byte(). +2007/11/28: [bug] (thanks Andy Armstrong) + don't overwrite error code in fetch_name(). +2007/11/12: [bug] utf8 mbc length of code 0xfe, 0xff are not 1, +2007/10/23: [spec] onig_enc_len() takes three arguments. (not used) +2007/10/15: [impl] (thanks Rui Hirokawa) + add check HAVE_STDARG_H. +2007/09/07: [API] rename enc_len() to onig_enc_len() in oniguruma.h. +2007/09/04: [API] remove ONIGENC_ERR_XXXXX. +2007/09/03: [API] add error ONIGERR_INVALID_CODE_POINT_VALUE. +2007/09/03: [impl] change error message to "invaid code point value" + for ONIGERR_INVALID_WIDE_CHAR_VALUE. +2007/09/03: [bug] xxx_code_to_mbclen() should return + ONIGERR_INVALID_WIDE_CHAR_VALUE for invalid code point. + ex. /[\x{7fffffff}]/ for ASCII encoding. +2007/08/28: [impl] remove "warning: no previous declaration ...". +2007/08/21: [impl] remove warnings in enc/mktable.c. +2007/08/20: [impl] remove "warning: unused parameter" +2007/08/20: [impl] remove "warning: comparison between signed and unsigned". +2007/08/06: [impl] remove clear_not_flag_cclass(). +2007/08/03: [bug] fix the case of undefined USE_NAMED_GROUP. +2007/08/02: [spec] add backref by number. +2007/08/01: [API] add OnigCtype. +2007/07/27: [spec] add USE_CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS. +2007/07/24: [impl] define PLATFORM_UNALIGNED_WORD_ACCESS. +2007/07/23: [dist] fix doc/FAQ.ja. + +2007/07/14: Version 5.9.0 + +2007/07/13: [bug] add check into onig_reduce_nested_quantifier(). +2007/06/26: [spec] (thanks K.Takata) + ONIG_OPTION_SINGLELINE: '$' -> '\Z' (as Perl) +2007/06/26: [dist] (thanks K.Takata) + fix documents API and API.ja. +2007/06/19: [impl] remove IS_NOT_NULL() check before onig_node_free(). +2007/06/18: [bug] (thanks KUBO Takehiro) + WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint). +2007/06/18: [impl] rename CClassNode flags. +2007/06/18: [bug] initialization miss. +2007/06/13: [impl] change node type reference NXXXX. +2007/06/11: [impl] add node type bit. +2007/06/11: [spec] allow anchor in enclosed repeater. /(\z)*/ +2007/06/11: [impl] rename node types. +2007/06/08: [impl] remove OP_SET_OPTION_PUSH and OP_SET_OPTION from match_at(). +2007/06/07: [impl] use xvsnprintf(). +2007/06/06: [tune] don't set qn->next_head_exact for string first byte is zero. +2007/06/06: [impl] remove unused variables. + +2007/06/04: Version 5.8.0 + +2007/06/04: [impl] add #ifndef vsnprintf into regint.h. +2007/05/31: [dist] add configure option '--enable-crnl-as-line-terminator'. +2007/05/30: [dist] add sample/crnl.c. +2007/05/30: [bug] should check USE_CRNL_AS_LINE_TERMINATOR case + in onig_search(). +2007/05/29: [impl] move USE_CRNL_AS_LINE_TERMINATOR into regenc.h. +2007/05/29: [impl] should check USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + in forward_search_range() and backward_search_range(). + +2007/04/27: Version 5.7.0 + +2007/04/20: [spec] add config USE_MATCH_RANGE_IS_COMPLETE_RANGE. +2007/04/20: [impl] refactoring in match_at(). + +2007/04/12: Version 5.6.1 + +2007/04/12: [bug] must not use UChar in oniguruma.h. +2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000 + to 0x4000. [ruby-core:10883] + +2007/04/04: Version 5.6.0 (mourning for Hideo Takamatsu) + +2007/04/03: [spec] add new notation (?'name'), \k'name', \g'name'. +2007/04/03: [impl] remove unused variable. +2007/03/26: [impl] add 'void' to function declarations. + +2007/03/06: Version 5.5.3 + +2007/03/06: [bug] add #include for bcc32. + (In bcc32, alloca() is declared in malloc.h.) +2007/03/02: [bug] invalid optimization for semi-end-buf in onig_search(). + ex. /\n\Z/.match("aaaaaaaaaa\n") +2007/03/02: [impl] move range > start check position in end_buf process. + +2007/01/09: Version 5.5.2 + +2007/01/09: [impl] rename USE_EXTERNAL_LOWER_CASE_CONV_TABLE. +2007/01/05: [tune] select_opt_exact_info() didn't work for empty info. + ex. /.a/ make MAP info instead of EXACT info. +2006/12/28: [impl] add print_enc_string() for ONIG_DEBUG mode. + +2006/12/22: Version 5.5.1 + +2006/12/22: [impl] rename ADD_PAD_TO_SHORT_BYTE_STRING + . to USE_PAD_TO_SHORT_BYTE_CHAR. +2006/12/21: [spec] should check too short multibyte char in parse_exp(). + add ADD_PAD_TO_SHORT_BYTE_STRING. + ex. /\x00/ in UTF16 should be error. + +2006/12/06: Version 5.5.0 + +2006/12/05: [bug] should add unfold-1 codes from folded code into + onigenc_unicode_get_case_fold_codes_by_str(). + (ex. "S" -> "s" -> 0x017f) +2006/12/05: [new] add flag ONIGENC_CASE_FOLD_TURKISH_AZERI and + USE_UNICODE_CASE_FOLD_TURKISH_AZERI. (disabled in default) +2006/12/04: [spec] remove ONIGENC_CASE_FOLD_FULL. +2006/11/30: [impl] remove unnecessary check in xxx_mbc_case_fold(). + +2006/11/29: Version 5.4.0 + +2006/11/28: [spec] INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR is enabled in + default case fold status. +2006/11/28: [spec] rename ONIGENC_CASE_FOLD_MULTI_CHAR to + INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR. +2006/11/28: [impl] remove USE_UNICODE_CASE_FOLD_MULTI_CHAR. +2006/11/28: [impl] remove Fold[123]Table and add FoldTable. +2006/11/27: [impl] change tool/unicode_fc.rb to see CaseFolding.txt. +2006/11/24: [bug] should call callback for to[j] <-> to[k] in + onigenc_unicode_apply_all_case_fold(). + +2006/11/22: Version 5.3.0 + +2006/11/22: [dist] add index_ja.html. +2006/11/22: [impl] undef ONIG_ESCAPE_UCHAR_COLLISION in regint.h and regenc.h. +2006/11/21: [bug] invalid array access. +2006/11/21: [impl] escape UChar collision from config.h. +2006/11/20: [new] add Hiragana/Katakana properties into Shift_JIS. +2006/11/20: [impl] fix CR_Katakana[] values in EUC-JP. +2006/11/17: [impl] declare strend hash table functions in regint.h. +2006/11/17: [impl] move property list functions to regenc.c. +2006/11/17: [new] add Hiragana/Katakana properties into EUC-JP. +2006/11/15: [impl] remove NOT_RUBY from AM_CFLAGS. + +2006/11/14: Version 5.2.0 + +2006/11/14: [impl] remove program codes for Ruby. +2006/11/14: [impl] reduce program codes for Ruby. +2006/11/10: [bug] 0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e + should be [:punct:]. +2006/11/09: [new] (thanks Byte) + add new character encoding CP1251. +2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER. + +2006/11/07: Version 5.1.0 + +2006/11/07: [dist] remove test.rb, testconv.rb and testconvu.rb. +2006/11/07: [bug] get_case_fold_codes_by_str() should handle 'Ss' and 'sS' + combination for ess-tsett. +2006/11/07: [impl] apply_all_case_fold() doesn't need to return all + case character combination for multi-character folding. + (ONIGENC_CASE_FOLD_MULTI_CHAR) +2006/11/07: [bug] (thanks Byte) + add { 0xa3, 0xb3 } to CaseFoldMap[] for KOI8-R. +2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of + the string range. + add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE. +2006/11/02: [impl] re-implement expand_case_fold_string() for + ONIGENC_CASE_FOLD_MULTI_CHAR. +2006/10/30: [impl] add NSTR_DONT_GET_OPTINFO flag. +2006/10/30: [impl] (thanks K.Takata) + add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END. +2006/10/30: [bug] (thanks Wolfgang Nadasi-Donner) + invalid offset value was used in STATE_CHECK_BUFF_INIT(). +2006/10/27: [tune] speed up ONIGENC_MBC_CASE_FOLD() for UTF-16, UTF-32. + (ASCII code check) +2006/10/27: [tune] (thanks Kornelius Kalnbach) + String#scan for long string needs long time compare with + old Ruby + by initialization time for combination explosion check + ex. ("test " * 100_000).scan(/\w*\s?/) + change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000. + reduce initialization area of state_check_buff. +2006/10/25: [impl] add DISABLE_CASE_FOLD_MULTI_CHAR(). + +2006/10/23: Version 5.0.1 + +2006/10/23: [bug] should fold string in expand_case_fold_string(). +2006/10/23: [bug] (thanks Km) + too many case fold/unfold expansion problem. + don't expand and set ambig flag to the string node. + (except ONIGENC_CASE_FOLD_MULTI_CHAR). +2006/10/23: [bug] (thanks K.Takata) + invalid \p{Alnum}, \p{ASCII}, [:alnum:], [:ascii:]. + fix OnigEncAsciiCtypeTable[] etc... +2006/10/23: [spec] (thanks K.Takata) + add [:word:] POSIX bracket. +2006/10/23: [bug] (thanks K.Takata) + \p{Word} doesn't work. +2006/10/20: [impl] don't expand for AMBIG_FLAG string in + expand_case_fold_string(). + +2006/10/19: Version 5.0.0 + +2006/10/18: [bug] ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM should be 13. +2006/10/18: [impl] remove unused functions. +2006/10/18: [dist] update documents. +2006/10/18: [API] move OnigMetaCharTableType to OnigSyntaxType. +2006/10/18: [dev] add too/unicode_fc.rb, unicode_pc.rb. +2006/10/18: [dist] remove MANIFEST-RUBY from distribution. +2006/10/18: [bug] return duplicated code in + onigenc_unicode_get_case_fold_codes_by_str(). +2006/10/18 [API] remove ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS. +2006/10/18: [dev] add tool/19. +2006/10/18: [dist] remove target 19 from Makefile.am. +2006/10/17: [dist] add enc/unicode.c to target 19 of win32/Makefile. +2006/10/17: [impl] change type for escape VC++ warning. +2006/10/17: [API] rename ONIGENC_CASE_FOLD_NONE to ONIGENC_CASE_FOLD_MIN. +2006/10/17: [dist] remove INSTALL-RUBY from distribution. +2006/10/17: [dist] update LTVERSION to "2:0:0". +2006/10/17: [impl] remove warnings for [make CFLAGS="-g -O2 -Wall"] + in the case USE_UNICODE_PROPERTIES and + USE_UNICODE_CASE_FOLD_MULTI_CHAR are undefined. +2006/10/17: [impl] remove warnings for [make CFLAGS="-g -O2 -Wall"]. +2006/10/17: [impl] re-implement onigenc_unicode_apply_all_case_fold(). + multi-char by case folded char-class is treated as + caseless-string (ambig flag on). + enable OP_EXACT1_IC and OP_EXACTN_IC. +2006/10/16: [bug] unfold expand for 1->2, 1->3 folding in + onigenc_unicode_apply_all_case_fold(). + add CaseFoldExpand_12[], CaseFoldExpand_13[]. +2006/10/16: [bug] (thanks Akinori Musha) + first argument of rb_warn() should be format string. +2006/10/16: [impl] add msa.state_check_buff_size initialization + in onig_search(). +2006/10/16: [spec] re-implement Unicode Caseless Match codes. +2006/10/10: [bug] should call onig_st_free_table() in + onig_free_shared_cclass_table(). +2006/10/10: [impl] remove OnigCompCaseFoldCodes. +2006/10/10: [impl] remove onigenc_ascii_is_mbc_ambiguous() and + onigenc_mbn_is_mbc_ambiguous(). +2006/10/10: [API] remove is_mbc_ambiguous() member from OnigEncodingType. +2006/10/10: [API] rename onig_set_default_ambig_flag() to + onig_set_default_case_fold_flag(), + onig_get_default_ambig_flag() to + onig_get_default_case_fold_flag(), + onig_get_ambig_flag() to onig_get_case_fold_flag(). +2006/10/10: [API] rename ambig_flag to case_fold_flag. +2006/10/10: [API] rename OnigAmbigType to OnigCaseFoldType. +2006/10/10: [impl] rename ONIGENC_IS_CODE_SB_WORD() to IS_CODE_SB_WORD() + and move to regint.h. +2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB. +2006/10/10: [impl] remove OP_EXACT1_IC and OP_EXACTN_IC from match_at(). +2006/10/10: [impl] should free new_str in expand_case_fold_string(). +2006/10/06: [dist] add test entrys to sample/encode.c. +2006/10/06: [impl] re-implement caseless match (case-fold). +2006/10/06: [impl] expand string node by case fold variations. + add expand_case_fold_string(). +2006/10/05: [spec] rename OnigCompAmbigCodeItem to OnigCaseFoldCodeItem. +2006/10/05: [spec] add apply_all_case_fold() and get_case_fold_codes_by_str() + to OnigEncodingType. +2006/10/05: [spec] remove ambig_flag, get_all_pair_ambig_codes() and + get_all_comp_ambig_codes() member from OnigEncodingType. +2006/10/03: [impl] rename mbc_to_normalize() to mbc_case_fold(). +2006/10/03: [spec] rename ONIGENC_AMBIGUOUS_MATCH_XXX + to ONIGENC_CASE_FOLD_XXX. + rename ONIGENC_CASE_FOLD_COMPOUND + to ONIGENC_CASE_FOLD_MULTI_CHAR. +2006/10/02: [impl] remove all ONIG_RUBY_M17N part. +2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT(). + make valgrind happy. +2006/09/22: [impl] remove parse time ctype values (CTYPE_WORD etc...) +2006/09/22: [ruby] enable USE_BACKREF_AT_LEVEL for Ruby mode. +2006/09/22: [spec] (thanks Allan Odgaard) + allow upper case letter as the first character + of group name. + fetch_name() and fetch_name_with_level() +2006/09/21: [impl] convert to ascii for parameter string in + onig_error_code_to_str(). + add enc member into OnigErrorInfo. +2006/09/21: [dist] update documents for Unicode Property. +2006/09/21: [new] add Unicode Properties. (enc/unicode.c) + Any, Assigned, C, Cc, L, Lm, Arabic, Greek etc... +2006/09/21: [impl] add USE_UNICODE_PROPERTIES into regenc.h. +2006/09/21: [impl] remove USE_UNICODE_FULL_RANGE_CTYPE. +2006/09/20: [impl] change ONIGENC_CTYPE_XXXX to sequencial values. + add BIT_CTYPE_XXXX bit flags to regenc.h. + update XXXX_CtypeTable[] for BIT_CTYPE_ALNUM. +2006/09/19: [memo] move from CVS to Subversion (1.3.2). +2006/09/19: [impl] (thanks KOYAMA Tetsuji) + HAVE_STDARG_PROTOTYPES was not defined in Mac OS X + by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc... +2006/09/15: [bug] (thanks Allan Odgaard) + out of range access in bm_search_notrev(). + (p < s) +2006/09/13: [impl] add ONIGENC_CTYPE_ENC_EXT flag. +2006/09/13: [spec] remove 'Is' prefix check for property name + from fetch_char_property_to_ctype(). +2006/09/13: [API] add property_name_to_ctype member to OnigEncodingType. +2006/09/12: [spec][ruby] add ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY and + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT to OnigSyntaxRuby. + +2006/09/08: Version 4.4.2 + +2006/09/08: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/08: [bug] (thanks K.Takata) + out of range access in bm_search_notrev(). +2006/09/04: [spec] (thanks K.Takata) + allow look-behind in negative look-behind. + ex. /(? (?:a*){n,n}, (?:a+){n,n} +2006/09/21: [impl] reduce (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} + if backreference is not used. +2006/08/17: [bug] should check scan_env.num_call > 0 for backrefed pattern + in combination explosion check. + +2006/08/17: Version 4.3.0 + +2006/08/17: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/17: [new] add config USE_COMBINATION_EXPLOSION_CHECK. + check /(.+)*/, /(\s*foo\s*)*/ etc... + [API] add num_comb_exp_check member in regex_t. + [dist] change LTVERSION value to "1:0:0" in configure.in. +2006/08/15: [bug] OP_REPEAT_INC process in match_at(). + should check repeat-count >= range-upper and + range-upper may be infinite. + +2006/08/11: Version 4.2.3 + +2006/08/11: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/10: [impl] remove double call in set_qualifier(). +2006/08/10: [impl] remove by_number member in QualifierNode. +2006/08/09: [impl] remove a comma at the end of enum ReduceType + for escape warning on Mac OS X. +2006/08/07: [impl] remove warning in regcomp.c. +2006/08/07: [spec] move definition of USE_BACKREF_AT_LEVEL into NOT_RUBY. + +2006/08/03: Version 4.2.2 + +2006/08/03: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/03: [bug] (thanks Hiroyuki Yamamoto) + segmentation fault in regexec(). (POSIX API) +2006/08/02: [bug] combination of \G in look-ahead/look-behind and other + anchors(\A, \z, \Z) cause invalid result. + ex. /(?!\G)a\z/.match("ba") + start arg. of MATCH_ARG_INIT() should be original + arg. of onig_search(). + +2006/07/31: Version 4.2.1 + +2006/07/31: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/07/31: [bug] (thanks Kimura Minoru) + re-implement bm_search_notrev(). +2006/07/31: [impl] bm_search_notrev() refactoring. +2006/07/31: [bug] (thanks Kimura Minoru) + fix incomplete multibyte string in exact info. +2006/07/31: [impl] (thanks Seiji Masugata) + remove cast in va_init_list() for Intel C Compiler. + +2006/07/18: Version 4.2.0 + +2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/07/18: [new] (thanks Wolfgang Nadasi-Donner) + add back reference with nest level. + \k, \k +2006/07/11: [impl] change long to unsigned long for ONIG_OPTION_XXX + and ONIG_SYN_XXX number literals. + +2006/07/03: Version 4.1.2 + +2006/07/03: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/07/03: [spec] (thanks Wolfgang Nadasi-Donner) + allow \G in look-behind. + add ANCHOR_BEGIN_POSITION flag in setup_tree(). +2006/06/12: [impl] (thanks matz) + fix cast from char* to const char* + in onig_snprintf_with_pattern(). + fix cast from char* to const char* + for PopularQStr[] and ReduceQStr[]. + +2006/05/22: Version 4.1.1 + +2006/05/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/22: [impl] add position string argument to STACK_BASE_CHECK(). +2006/05/22: [bug] (thanks NARUSE, Yui) + add STK_NULL_CHECK_END to IS_TO_VOID_TARGET(). + ex. core dump in + /(?\(([^\(\)]++|\g)*+\))/.match('((a))') + +2006/05/15: Version 4.1.0 + +2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/15: [impl] thread atomic changes for onig_end() and + onig_free_node_list(). +2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2005/05/15: [dist] update API, API.ja, FAQ, FAQ.ja. +2006/05/15: [spec] remove onig_recompile(), onig_recompile_deluxe() + and re_recompile_pattern(). + add config USE_RECOMPILE_API. +2006/05/15: [impl] improved thread safe implementation of onig_search() + and onig_match(). + +2006/05/11: Version 4.0.4 + +2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/11: [bug] (thanks Yuji Kaneda) + dead-lock in onig_end(). +2006/05/11: [dist] update index.html. + +2006/05/08: Version 4.0.3 + +2006/05/08: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/08: [bug] (thanks Allan Odgaard) + Segmentation fault in backward search. + ex. /^\t.*$/ +2006/04/18: [dist] update index.html. +2006/04/05: [dist] update index.html. +2006/03/24: [dist] update doc/RE, doc/RE.ja. + +2006/03/23: Version 4.0.2 + +2006/03/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/03/22: [impl] add both of ONIG_OPTION_DONT_CAPTURE_GROUP + and ONIG_OPTION_CAPTURE_GROUP check. +2006/03/22: [spec] add error code ONIGERR_INVALID_COMBINATION_OF_OPTIONS. +2006/03/22: [impl] remove USE_NAMED_GROUP condition from + ONIG_OPTION_DONT_CAPTURE_GROUP check in parse_effect(). +2006/03/22: [new] add API onig_noname_group_capture_is_active(). +2006/03/01: [spec] rename regex object type from regex_t to OnigRegexType. + add typedef OnigRegexType regex_t + unless ONIG_ESCAPE_REGEX_T_COLLISION is defined. +2006/02/27: [spec] change ONIG_MAX_MULTI_BYTE_RANGES_NUM from 1000 + to 10000. (for docdiff program) +2006/02/17: [dist] change COPYING year 2005 -> 2006. + +2006/02/07: Version 4.0.1 + +2006/02/07: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2006/02/07: [bug] memory leaks in onig_free_shared_cclass_table(). +2006/02/03: [ruby] add -m 0644 option to install command in "make 19". +2006/02/03: [impl] rename ANCHOR_ANYCHAR_STAR_PL to ANCHOR_ANYCHAR_STAR_ML. + change from IS_POSIXLINE() to IS_MULTILINE() + for ANCHOR_ANYCHAR_START/_ML decision + in optimize_node_left(). +2006/01/26: [dist] update index.html for Oniguruma 2.5.3. +2006/01/25: [dist] update URL in index.html. + +2006/01/24: Version 4.0.0 + +2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i386-cygwin]. +2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2006/01/24: [dist] remove warnings from sample/encode.c. +2006/01/24: [dist] change install description in README(.ja). +2006/01/24: [dist] remove re.c.XXX.patch from distribution and CVS. +2006/01/24: [dist] --- support shared library --- + use GNU libtool/automake. + change configure.in and add Makefile.am, sample/Makefile.am. + add AUTHORS file. +2006/01/24: [dist] test programs return exit code -1 when test fails. +2006/01/24: [bug] (thanks KIMURA Koichi) + invalid syntax definition in ONIG_SYNTAX_GREP. + ONIG_SYN_OP_BRACE_INTERVAL + -> ONIG_SYN_OP_ESC_BRACE_INTERVAL +2006/01/23: [dist] fix configure.in for onig-config. +2006/01/19: [new] add new config USE_UNICODE_ALL_LINE_TERMINATORS. + (U+000d, U+0085, U+2028, U+2029) +2005/12/29: [dist] change pmatch array size to 25 in testconv.rb. +2005/12/26: [dist] fix name in test.rb. +2005/12/26: [dist] update index.html for 2.5.1. + +2005/11/29: Version 3.9.1 + +2005/11/29: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin]. +2005/11/21: [bug] (thanks Allan Odgaard) + utf-8 character comments in extended mode leads + invalid result. + ex. /(?x)(?<= # o\n~) / + fix onigenc_unicode_is_code_ctype() and + utf8_is_code_ctype(). +2005/11/20: [bug] (thanks MATSUMOTO Satoshi) (thanks Isao Sonobe) + begin-line anchor and BM search optimization leads + invalid result in UTF-16/32. + fix in set_optimize_exact_info(). + +2005/11/20: Version 3.9.0 + +2005/11/20: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin]. +2005/11/20: [test] success in ruby 1.9.0 (2005-10-18) [i386-cygwin]. +2005/11/20: [new] add new config USE_CRNL_AS_LINE_TERMINATOR. + (!!! NO SUPPORT experimental option !!!) +2005/11/15: [bug] (thanks Allan Odgaard) + tok->escape was not cleared in fetch_token_in_cc(). + ex. [\s&&[^\n]] makes wrong result. +2005/10/18: [impl] (thanks nobu) + change sjis_mbc_enc_len() + and node_new_cclass_by_codepoint_range() scope to static. +2005/09/05: [dist] remove link to MultiFind. +2005/09/01: [dist] add link to yagrep. + +2005/08/23: Version 3.8.9 + +2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/23: [inst] fix Makefile.in for make ctest/ptest. + +2005/08/23: Version 3.8.8 + +2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/23: [impl] split is_code_in_cc() from onig_is_code_in_cc(). +2005/08/23: [impl] should check DATA_ENSURE() at OP_CCLASS_NODE in match_at(). +2005/08/23: [impl] (thanks akr) + add ONIG_OPTION_MAXBIT for escape conflict with + Ruby's option. +2005/08/22: [impl] escape GCC 4.0 warnings for testc.c. +2005/08/22: [bug] (thanks nobu, matz) [ruby-dev:26840] + UTF-8 0xFE, 0xFF handling bug in code_is_in_cclass_node(). + abort on /\S*/ =~ "\xfe" +2005/08/22: [impl] escape GCC 4.0 warnings for sample/*.c. +2005/08/22: [impl] fix testconvu.rb. +2005/08/22: [impl] escape GCC 4.0 warnings. + +2005/08/09: Version 3.8.7 + +2005/08/09: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/09: [bug] (thanks Allan Odgaard) + should not call enc_len() for s == range + in onig_search(). +2005/08/01: [dist] add mkdir $prefix, mkdir $exec_prefix to make install. + +2005/07/27: Version 3.8.6 + +2005/07/27: [test] success in ruby 1.9.0 (2005-07-26) [i686-linux]. +2005/07/27: [impl] update onig-config.in. +2005/07/26: [new] (thanks Yen-Ju Chen) + add Oniguruma configuration check program. + (onig-config.in) + +2005/07/14: Version 3.8.5 + +2005/07/14: [test] success in ruby 1.9.0 (2005-07-14) [i686-linux]. +2005/07/11: [test] success in ruby 1.9.0 (2005-07-04) [i686-linux]. +2005/07/11: [bug] (thanks nobu) [ruby-dev:26505] + invalid handling for /\c\x/ and /\C-\x/. + fix fetch_escaped_value(). +2005/07/05: [impl] (thanks Alexey Zakhlestine) + escape GCC 4.0 warnings. + +2005/07/01: Version 3.8.4 + +2005/07/01: [test] success in ruby 1.9.0 (2005-07-01) [i686-linux]. +2005/06/30: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux]. +2005/06/30: [dist] add GB 18030 test to sample/encode.c. +2005/06/30: [impl] escape warning of gb18030_left_adjust_char_head(). +2005/06/30: [new] (contributed by KUBO Takehiro) + add new character encoding ONIG_ENCODING_GB18030. +2005/06/30: [bug] invalid ctype check for multibyte encodings. + ("graph", "print") + fix onigenc_mb2/4_is_code_ctype(), + eucjp_is_code_ctype() and sjis_is_code_ctype(). +2005/06/30: [bug] invalid conversion from code point to mbc in + onigenc_mb4_code_to_mbc(). + +2005/06/28: Version 3.8.3 + +2005/06/28: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux]. +2005/06/27: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux]. +2005/06/27: [bug] (thanks Wolfgang Nadasi-Donner) + invalid check for never ending recursion. + lower zero quantifier should be treated as + a non-recursive call alternative. + ex. /(?[^()]*(\(\g\)[^()]*)*)/ +2005/06/15: [impl] add divide_ambig_string_node_sub(). +2005/06/15: [dist] add a test to sample/encode.c. +2005/06/10: [new] add ONIG_SYNTAX_PERL_NG. (Perl + named group) + +2005/06/01: Version 3.8.2 + +2005/06/01: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux]. +2005/05/31: [dist] add doc/FAQ and doc/FAQ.ja. +2005/05/31: [impl] minor change in node_new(). +2005/05/30: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux]. +2005/05/30: [bug] (thanks Allan Odgaard) + FreeNodeList null check should be on thread-atomic + in node_new(). + +2005/05/11: Version 3.8.1 + +2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i386-mswin32]. +2005/05/11: [dist] update win32/Makefile (make 19). +2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux]. +2005/05/06: [test] success in ruby 1.9.0 (2005-05-06) [i686-linux]. +2005/05/06: [impl] (thanks nobu) [ruby-core:4815] + add #ifdef USE_VARIABLE_META_CHARS to goto label. +2005/04/25: [test] success in ruby 1.9.0 (2005-04-25) [i686-linux]. +2005/04/25: [impl] change DEFAULT_WARN_FUNCTION and DEFAULT_VERB_WARN_FUNCTION + to onig_rb_warn() and onig_rb_warning(). + +2005/04/15: Version 3.8.0 + +2005/04/15: [test] success in ruby 1.9.0 (2005-04-14) [i686-linux]. +2005/04/01: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux]. +2005/04/01: [impl] (thanks Joe Orton) + (thanks Moriyoshi Koizumi) + many const-ification to many *.[ch] files. + +2005/03/25: Version 3.7.2 + +2005/03/25: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux]. +2005/03/23: [test] success in ruby 1.9.0 (2005-03-20) [i686-linux]. +2005/03/23: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux]. +2005/03/23: [new] add ONIG_SYNTAX_ASIS. +2005/03/23: [new] add ONIG_SYN_OP2_INEFFECTIVE_ESCAPE. +2005/03/09: [spec] rename MBCTYPE_XXX to RE_MBCTYPE_XXX. (GNU API) +2005/03/08: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux]. +2005/03/08: [impl] (thanks matz) [ruby-dev:25783] + should not allocate memory for key data in st.c. + move st_*_strend() functions from st.c. fixed some + potential memory leaks. + (imported from Ruby 1.9 2005-03-08) + +2005/03/07: Version 3.7.1 + +2005/03/07: [test] success in ruby 1.9.0 (2005-03-07) [i686-linux]. +2005/03/07: [impl] (thanks Rui Hirokawa) + add ONIG_ESCAPE_UCHAR_COLLISION. + rename UChar to OnigUChar in oniguruma.h. +2005/03/07: [impl] remove declarations for Ruby in oniggnu.h. +2005/03/05: [bug] ANCHOR_ANYCHAR_STAR didn't work in onig_search(). +2005/03/01: [dist] remove oniggnu.h from MANIFEST-RUBY. + remove oniggnu.h from make 19. +2005/03/01: [bug] (thanks matz) [ruby-dev:25778] + uninitialized member (OptEnv.backrefed_status) + was used. + +2005/02/19: Version 3.7.0 + +2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin]. +2005/02/19: [new] (thanks Minero Aoki) + add onig_region_set(). +2005/02/19: [API] change onig_region_init() to extern. +2005/02/19: [dist] remove reggnu.c from MANIFEST-RUBY. + remove reggnu.c from make 19. +2005/02/19: [dist] update doc/API and doc/API.ja. +2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin]. +2005/02/19: [impl] (thanks Alexey Zakhlestine) + change UChar* to const UChar* in oniguruma.h, + regenc.h and regparse.h. +2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and + onigposix.h and st.h. +2005/02/12: [test] success in ruby 1.9.0 (2005-02-11) [i386-cygwin]. +2005/02/12: [bug] (thanks nobu) [ruby-dev:25676] + type_cclass_hash() fix overrun. +2005/02/09: [test] success in ruby 1.9.0 (2005-02-09) [i686-linux]. +2005/02/09: [spec] add RE_OPTION_FIND_NOT_EMPTY etc.. to oniggnu.h. +2005/02/09: [dist] remove hash.c.patch. +2005/02/07: [impl] remove re_mbctab, mbctab_ascii etc... + (USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY) + +2005/02/04: Version 3.6.0 + +2005/02/04: [test] success in ruby 1.9.0 (2005-02-04) [i686-linux]. +2005/02/01: [bug] add key_free() call to st_free_table(). +2005/02/01: [new] add onig_get_default_ambig_flag() and + onig_set_default_ambig_flag(). +2005/02/01: [dist] update MANIFEST-RUBY. +2005/01/31: [test] success in ruby 1.9.0 (2005-01-29) [i686-linux]. +2005/01/31: [spec] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND + from ONIGENC_AMBIGUOUS_MATCH_DEFAULT. +2005/01/31: [dist] update Makefile.in (make 19). +2005/01/29: [memo] (thanks Kazuo Saito) + Oniguruma 3.5.4 was merged to Ruby 1.9.0. +2005/01/28: [impl] (thanks UK-taniyama) + add extern "C" { } directive to oniguruma.h, oniggnu.h + and onigposix.h for C++. +2005/01/25: [impl] remove nested function call for xxx_code_to_mbclen(). + (euc_kr.c, euc_tw.c, big5.c) + +2005/01/19: Version 3.5.4 + +2005/01/19: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux]. +2005/01/19: [bug] (thanks Isao Sonobe) + callback function argument name_end of onig_foreach_name() + was wrong. + name key of name table should be null terminated for + character encoding length. + add strdup_with_null(), rename onig_strdup() to k_strdup(). + use e->name_len in i_names(). +2005/01/17: [impl] (thanks UK-taniyama) + add HAVE_SYS_TYPES_H to config.h.in. + +2005/01/13: Version 3.5.3 + +2005/01/13: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux]. +2005/01/13: [bug] ignore case match bug. + ex. /s+/iu.match("SSSSS") ==> [4..5] + fix OP_EXACT1_IC, OP_EXACTN_IC process. +2005/01/13: [bug] (thanks Isao Sonobe) + ignore case match bug. + ex. /is/iu.match("ss") fail. + fix str_lower_case_match() etc. + +2005/01/05: Version 3.5.2 + +2005/01/05: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux]. +2005/01/05: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux]. +2005/01/05: [bug] (thanks Isao Sonobe) + ignore case match bug. + ex. /s+/iu.match("sssss") ==> [4..5] + fix OP_EXACT1_IC, OP_EXACTN_IC process. +2005/01/05: [bug] (thanks Isao Sonobe) + group name table should be renumbered. + add onig_renumber_name_table(). +2004/12/24: [dist] remove file onigcmpt200.h. + +2004/12/17: Version 3.5.1 + +2004/12/17: [dist] add INSTALL-RUBY to archive. +2004/12/16: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux]. +2004/12/16: [dist] update hash.c.patch. +2004/12/15: [bug] (thanks matz) + char > 127 should be casted to unsigned char. (utf8.c) +2004/12/13: [impl] add HAVE_PROTOTYPES and HAVE_STDARG_PROTOTYPES definition + to oniguruma.h in the case __cplusplus. +2004/12/06: [dist] update doc/RE and doc/RE.ja. +2004/12/03: [impl] (thanks nobu) + st.h fix prototype for C++. + +2004/12/03: Version 3.5.0 + +2004/12/02: [test] success in ruby 1.9.0 (2004-12-02) [i686-linux]. +2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-mswin32]. +2004/12/01: [dist] add make targets 19 and 19up to win32/Makefile. +2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-cygwin]. +2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i686-linux]. +2004/12/01: [impl] double cast for escape warning in Cygwin. + (HashDataType* )((void* )(&e)) in regparse.c +2004/12/01: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux]. +2004/12/01: [tune] change implementation of clear_opt_map_info(). + (which was 10-16% cost in gprof result for my test program) +2004/12/01: [dist] remove regex.c from distribution files. +2004/11/30: [memo] remove targets 16 and 18 from Makefile.in. +2004/11/30: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux]. +2004/11/30: [inst] add "cp -p st.[ch] st.[ch].ruby_orig" to "make 19". +2004/11/30: [tune] map_position_value() return 20 if code is 0 + and minimum enclen > 1. +2004/11/30: [test] success in ruby 1.9.0 (2004-11-29) [i686-linux]. +2004/11/30: [impl] minor changes for multi-thread in regexec.c and regcomp.c. +2004/11/30: [impl] change THREAD_PASS_LIMIT_COUNT value from 10 to 8. +2004/11/30: [impl] add THREAD_ATOMIC_XXX to FreeNodeList access in regparse.c +2004/11/29: [impl] add USE_MULTI_THREAD_SYSTEM. +2004/11/29: [memo] add hash.c.patch to CVS. +2004/11/29: [dist] change mail address to 'sndgk393 AT ...' +2004/11/29: [dist] add -s option (silent mode) to test.rb. +2004/11/29: [tune] change THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS value + from 20 to 8. +2004/11/29: [inst] add make target "19up". +2004/11/29: [dist] change Oniguruma Home Page URL. +2004/11/29: [impl] remove onig_is_in_code_range_array(). +2004/11/29: [dist] fix doc/RE and RE.ja (character types). +2004/11/26: [dist] fix win32/Makefile. +2004/11/26: [dist] fix doc/RE and RE.ja (multibyte character types). +2004/11/26: [impl] add onig_free_shared_cclass_table(). +2004/11/26: [impl] move definition USE_UNICODE_FULL_RANGE_CTYPE to regenc.h. +2004/11/26: [impl] add opcode OP_CCLASS_NODE. +2004/11/26: [impl] move definition of CClassNode to regint.h. +2004/11/26: [impl] add type PointerType in regint.h. +2004/11/25: [impl] remove ONIGENC_CTYPE_MOD_NOT. +2004/11/25: [impl] rename onig_node_new_cclass_by_codepoint_range to + node_new_cclass_by_codepoint_range. +2004/11/25: [impl] remove get_type_cc_node method from OnigEncodingType. +2004/11/25: [impl] move implementation of shared char-class from enc/*.c + to regparse.c. +2004/11/25: [dist] add hash.c.patch for Ruby 1.9 hash.c change. +2004/11/22: [impl] change utf8_get_type_node(). +2004/11/22: [impl] add ONIGENC_CTYPE_MOD_NOT. +2004/11/22: [bug] (thanks MIYAMUKO Katsuyuki) + ruby make test fail in HP-UX B.11.23 ia64. + should use tok->u.code instead of tok->u.c in + the case of TK_CODE_POINT. +2004/11/19: [bug] (thanks Yoshida Masato) + invalid multibyte code causes segmentation fault. + ex. /[\xFF-\xFF]/u +2004/11/19: [bug] (thanks Yoshida Masato) + illegal check in char-class range in UTF-8. + ex. s = "[\xC2\xA0-\xC3\xBE]" + p(Regexp.new(s, nil, "u") =~ "\xC3\xBE") +2004/11/18: [impl] add onig_node_new_cclass_by_codepoint_range(). +2004/11/18: [impl] remove OnigCodePointRange type. (use OnigCodePoint[].) +2004/11/17: [bug] (thanks nobu) + abort in "a".gsub(/a\Z/, "") + fix ONIGENC_STEP_BACK() argument in onig_search(). +2004/11/16: [impl] add key2 member to st_table_entry in st.[ch]. + change API of st for non-null terminated string key. +2004/11/16: [impl] add get_type_cc_node method to OnigEncodingType. +2004/11/15: [impl] add st.h and st.c from Ruby 1.9. + use st-hash always. +2004/11/12: [impl] change menber 'not' of CClassNode to 'flags'. + add flags FLAG_CCLASS_NOT and FLAG_CCLASS_SHARE. +2004/11/12: [impl] add onig_is_in_code_range_array() to enc/unicode.c. +2004/11/12: [impl] fix CRWord in enc/unicode.c and MBWord in enc/utf8.c. +2004/11/11: [bug] fix enc/utf8.c. + size 0 array initializer was compile error in VC++. +2004/11/09: [inst] (thanks Hiroki YAGITA) + change installed file mode to 0644. +2004/11/09: [bug] (thanks UK-taniyama) + wrong definitions GET_RELADDR_INC(), GET_ABSADDR_INC() + etc... (NOT PLATFORM_UNALIGNED_WORD_ACCESS) +2004/11/09: [impl] type cast in regexec() for remove compile time warning. + (WIN32, regposix.c) +2004/11/08: [spec] fix Unicode character types. + 0x00ad (soft hyphen) should be [:cntrl:] and [:space:] type. + [0x0009..0x000d], 0x0085 should be [:print:] type. + 0x00ad should not be [:punct:] type. +2004/11/08: [inst] fix Makefile.in. (for make ctest/ptest/testcu) +2004/11/06: [impl] (thanks Kazuo Saito) + too many alternatives pattern causes core dump. + change implementation of onig_node_free(). +2004/11/05: [spec] rename ONIGERR_END_PATTERN_AT_BACKSLASH to + ONIGERR_END_PATTERN_AT_ESCAPE. +2004/11/05: [impl] (thanks matz) + escape compile time warnings for x86-64 Linux. + StackIndex type int -> long +2004/11/05: [memo] (thanks Kazuo Saito) + Oniguruma 3.4.0 was merged to Ruby 1.9.0. + +2004/10/30: Version 3.4.0 + +2004/10/30: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux]. +2004/10/30: [new] add hexadecimal digit char type. (\h, \H) + syntax: ONIG_SYN_OP2_ESC_H_XDIGIT +2004/10/30: [bug] (thanks Guy Decoux) + reluctant infinite repeat bug. + ex. /^[a-z]{2,}?$/.match("aaa") fail. + fix OP_REPEAT_INC_NG process in match_at(). + +2004/10/18: Version 3.3.1 + +2004/10/18: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux]. +2004/10/18: [impl] (thanks Imai Yasumasa) + enclose #include by #ifndef __BORLANDC__. +2004/10/18: [bug] (thanks Imai Yasumasa) + memory acess violation in select_opt_exact_info(). +2004/09/25: [dist] fix doc/API and doc/API.ja. +2004/09/25: [bug] fix OP_SEMI_END_BUF process in match_at() for + the case USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + is not defined. + +2004/09/17: Version 3.3.0 + +2004/09/17: [dist] add COPYING to program source files. +2004/09/17: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/09/17: [bug] (thanks Isao Sonobe) + memory access violations in xxx_mbc_enc_len(), + and xxx_mbc_to_normalize() and + xxx_left_adjust_char_head(). + add string range check in match_at() and onig_search(). +2004/09/08: [dist] change mail address format.(kosako AT sofnec ...) + +2004/09/04: Version 3.2.9 + +2004/09/04: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/09/04: [bug] (thanks Bob Kerstetter and Richard Koch) + search fail in ignore case mode. + fix str_lower_case_match(). +2004/09/04: [inst] (thanks Isao Sonobe) + clear sample directory in 'make clean'. +2004/09/04: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII + meanings in XXXXX_mbc_to_normalize() and + XXXXX_is_mbc_ambiguous(). +2004/08/28: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII + meanings in iso_8859_XX_mbc_to_normalize() and + iso_8859_XX_is_mbc_ambiguous(). + +2004/08/24: Version 3.2.8 + +2004/08/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/08/24: [spec] add ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY. + /a{n}?/ == /(?:a{n})?/ +2004/08/24: [dist] fix doc/RE and doc/RE.ja. +2004/08/24: [bug] (thanks starfish) + memory leak in set_optimize_exact_info(). + +2004/08/21: Version 3.2.7 + +2004/08/21: [test] success in ruby 1.8.2 (2004-07-28) [i686-linux]. + (1.8.2 preview2) +2004/08/21: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/08/21: [bug] (thanks Isao Sonobe) (thanks kage) + memory access violation in bm_search_notrev(). + (forgotten to merge from 2.X) + +2004/07/24: Version 3.2.6 + +2004/07/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux]. +2004/07/24: [test] success in ruby 1.8.2 (2004-07-16) [i686-linux]. +2004/07/24: [bug] fix warnings for regexec.c. (gcc 2.91.66) +2004/07/24: [memo] change version control system from Subversion + to CVS 1.11.17. +2004/07/20: [bug] (thanks Isao Sonobe) + illegal result in negative character class in ignore case + mode. fix pair-ambig-codes process in parse_exp(). + ex. /[^a]/i.match("A") +2004/07/20: [bug] (thanks Isao Sonobe) + undefined bytecode error happens in UTF-16BE etc.. + compile_length_cclass_node() was not consistent with + compile_cclass_node(). + +2004/07/01: Version 3.2.5 + +2004/07/01: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux]. +2004/07/01: [new] add onig_get_syntax_{op,op2,behavior,options}. +2004/07/01: [bug] (thanks Isao Sonobe) + invalid result in onig_capture_tree_traverse(). + fix make_capture_history_tree(). + +2004/06/29: Version 3.2.4 + +2004/06/29: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux]. +2004/06/29: [new] (thanks Isao Sonobe) + add onig_number_of_captures(). + +2004/06/25: Version 3.2.3 + +2004/06/25: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux]. +2004/06/25: [bug] (thanks Isao Sonobe) + invalid result in onig_capture_tree_traverse(). + fix make_capture_history_tree(). + +2004/06/24: Version 3.2.2 + +2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/06/24: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux]. +2004/06/24: [new] (thanks Isao Sonobe) + add onig_number_of_capture_histories(). +2004/06/24: [bug] (thanks Isao Sonobe) + invalid char position match in UTF-16 and UTF-32. + add onigenc_always_false_is_allowed_reverse_match(). + +2004/06/17: Version 3.2.1 + +2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/06/17: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux]. +2004/06/17: [impl] should not use OP_REPEAT for (...)? even if target size + is long. +2004/06/17: [bug] (thanks nobu) [ruby-dev:23703] + should use STACK_AT() instead of stkp in OP_REPEAT_INC. + add IN_VAR_REPEAT flag in setup_tree(). +2004/06/16: [impl] change select_opt_exact_info() to use ByteValTable[]. +2004/06/16: [impl] change map_position_value() table values. +2004/06/14: [impl] (thanks John Carter) + RelAddrType, AbsAddrType and LengthType change + from short int to int type for the very long string match. +2004/06/14: [bug] (thanks Greg A. Woods) + fix nmatch argument of regexec() is smaller than + reg->num_mem + 1 case. (POSIX API) +2004/06/14: [spec] (thanks Greg A. Woods) + set pmatch to NULL if nmatch is 0 in regexec(). (POSIX API) + +2004/06/10: Version 3.2.0 + +2004/06/10: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/06/10: [test] success in ruby 1.9.0 (2004-05-27) [i386-mswin32]. +2004/06/10: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux]. +2004/06/10: [dist] add README.ja. +2004/06/10: [new] add onig_copy_encoding(). +2004/06/10: [API] add encoding argument to onig_set_meta_char(). + add meta_char_table member to OnigEncodingType. +2004/06/08: [dist] add doc/API.ja. +2004/06/07: [API] add num_of_elements member to OnigCompileInfo. +2004/05/29: [memo] (thanks Kazuo Saito) + Oniguruma 3.1.0 was merged to Ruby 1.9.0. +2004/05/26: [impl] rename NST_SIMPLE_REPEAT to NST_STOP_BT_SIMPLE_REPEAT. +2004/05/26: [impl] doesn't need to check that target's simple repeat-ness + for EFFECT_MEMORY type node in setup_tree(). + +2004/05/25: Version 3.1.0 + +2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/05/25: [test] success in ruby 1.9.0 (2004-05-23) [i686-linux]. +2004/05/25: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux]. +2004/05/25: [bug] (thanks Masahiro Sakai) [ruby-dev:23560] + ruby -ruri -ve 'URI::ABS_URI =~ + "http://example.org/Andr\xC3\xA9"' + nested STK_REPEAT type stack can't backtrack repeat_stk[]. + add OP_REPEAT_INC_SG and OP_REPEAT_INC_NG_SG. +2004/05/25: [new] support UTF-32LE. (ONIG_ENCODING_UTF32_LE) +2004/05/25: [new] support UTF-32BE. (ONIG_ENCODING_UTF32_BE) +2004/05/24: [impl] divide enc/utf16.c to utf16_be.c and utf16_le.c. +2004/05/24: [impl] add enc/unicode.c. +2004/05/24: [API] change calling sequences of onig_new_deluxe() and + onig_recompile_deluxe(). + define OnigCompileInfo type. +2004/05/21: [impl] perform ensure process for rb_trap_exec() in match_at(). + add onig_exec_trap() and CHECK_INTERRUPT_IN_MATCH_AT. +2004/05/21: [impl] add regex status check to onig_match(). +2004/05/21: [new] add onig_get_capture_tree() and + onig_capture_tree_traverse(). +2004/05/20: [spec] (thanks Isao Sonobe) + capture history return capture data tree. + (see sample/listcap.c) +2004/05/19: [bug] (thanks Simon Strandgaard) + Control-C does not work in matching process on Ruby. + add calling of CHECK_INTERRUPT into match_at(). + ex. /<(?:[^">]+|"[^"]*")+>/.match('') +2004/05/19: [bug] (thanks Simon Strandgaard) + define virtual codepoint values for invalid encoding + byte 0xfe and 0xff in UTF-8. + ex. /\w+/u.match("%a\xffb\xfec%") ==> "a" +2004/05/19: [spec] (thanks Simon Strandgaard) + too big backref number should be treated as a sequence of + an octal char and number digits. + ex. /b\3777\c/.match("b\3777\c") +2004/05/17: [spec] rename encoding names "UTF-16 BE" and "UTF-16 LE" + to "UTF-16BE" and "UTF-16LE". +2004/05/17: [impl] move ismbchar() and mbclen() from oniguruma.h to oniggnu.h. +2004/05/17: [impl] rename onigenc_single_byte_is_allowed_reverse_match() to + onigenc_always_true_is_allowed_reverse_match(). + +2004/05/14: Version 3.0.0 + +2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/05/14: [test] success in ruby 1.9.0 (2004-05-14) [i686-linux]. +2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. + (* need to edit parse.y: + register int c; ---> int c; in yylex()) +2004/05/14: [impl] add regext.c. +2004/05/14: [spec] KOI8 is not included in library archive by default setup. +2004/05/14: [impl] implementation changes are completed for all encoding files. +2004/05/12: [impl] add divide_ambig_string_node(). + ambiguous string is divided and normalized before + optimization and compilation process. +2004/05/11: [dist] remove INSTALL-RUBY from distribution. +2004/04/28: [memo] (thanks Kazuo Saito) + Oniguruma 2.2.8 was merged to Ruby 1.9.0. +2004/04/26: [spec] change value DEFAULT_MATCH_STACK_LIMIT_SIZE = 0 : unlimited +2004/04/26: [new] add onig_get_match_stack_limit_size() and + onig_set_match_stack_limit_size(). +2004/04/26: [bug] add error check to re.c.181.patch and re.c.168.patch. +2004/04/23: [impl] remove ctype_support_level from OnigEncodingType. +2004/04/22: [spec] allow the range from single byte char to multibyte char in + character class for implementation reason. + ex. /[a-\xbb\xcc]/ in EUC-JP encoding. +2004/04/21: [impl] remove max_enc_len_by_first_byte() from OnigEncodingType. +2004/04/20: [new] add onig_copyright(). +2004/04/20: [impl] add regversion.c. +2004/04/15: [new] add onig_get_ambig_flag(). +2004/04/14: [bug] (thanks Isao Sonobe) + undefined bytecode error happens if ONIG_OPTION_FIND_LONGEST + is setted. + should finish matching process if find-condition + is fail at OP_END in match_at(). +2004/04/12: [impl] add ambig_flag to regex_t. +2004/04/09: [impl] move onig_set_meta_char() to regsyntax.c. +2004/04/09: [bug] (thanks HIROSE Masaaki) fix onig_version(). +2004/04/08: [impl] add regsyntax.c. +2004/04/07: [new] support UTF-16 LE. (ONIG_ENCODING_UTF16_LE) +2004/04/05: [impl] add ONIGENC_CTYPE_NEWLINE. +2004/04/05: [memo] (thanks Kazuo Saito) + Oniguruma 2.2.6 was merged to Ruby 1.9.0. +2004/04/02: [memo] Version 2.2.6 was released. +2004/03/26: [new] support UTF-16 BE. (ONIG_ENCODING_UTF16_BE) +2004/03/25: [spec] support non 8-bit encodings. +2004/03/16: [memo] 2.X branch for 8-bit encodings only. + +2004/03/16: Version 2.2.5 + +2004/03/16: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/03/16: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux]. +2004/03/16: [impl] add property name to error message of + ONIGERR_INVALID_CHAR_PROPERTY_NAME. +2004/03/16: [spec] allow prefix 'Is' for \p{...} in ONIG_SYNTAX_PERL. + add syntax op. ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS. +2004/03/15: [dist] add sample/syntax.c. +2004/03/15: [spec] support NOT op. in char property. \p{^...}, \P{^...}. + add syntax op. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT. +2004/03/15: [spec] rename ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY to + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY. +2004/03/10: [impl] move ONIGERR_XXX from regenc.h to oniguruma.h, + rename ONIGERR_XXX to ONIGENCERR_XXX in regenc.h. +2004/03/08: [impl] (thanks eban) + replace defined(__CYGWIN__) to defined(__GNUC__). +2004/03/08: [bug] (thanks eban) [ruby-dev:23172] + need to separate initialization for bcc32. +2004/03/06: [memo] (thanks Kazuo Saito) + Oniguruma 2.2.4 was merged to Ruby 1.9.0. +2004/03/05: [API] change second argument type of onig_set_meta_char() + from unsigned int to OnigCodePoint. +2004/03/05: [dist] (thanks Kazuo Saito) + add MANIFEST-RUBY. + +2004/03/04: Version 2.2.4 + +2004/03/04: [impl] (thanks Moriyoshi Koizumi) + fix many warnings in Win32 VC++ with /W3 option. + +2004/03/02: Version 2.2.3 + +2004/03/02: [bug] (thanks Isao Sonobe) + return invalid capture region value if capture history + is used. (OP_MEMORY_END_PUSH_REC bug) + ex. /\g

(?@

\(\g\)){0}(?(?:\g

)*|){0}/ + .match("((())())") +2004/03/02: [impl] (thanks Kazuo Saito) + add :nodoc: to onig_stat_print() for RDoc. +2004/03/02: [impl] don't use ONIG_SOURCE_IS_WRAPPED. + +2004/02/27: Version 2.2.2 + +2004/02/27: [impl] fix the position of onig_stat_print(). +2004/02/27: [impl] define ONIG_RUBY_DEFINE_GLOBAL_FUNCTION() in regint.h + for ignored by RDoc. + +2004/02/26: Version 2.2.1 + +2004/02/26: [bug] [bugs.php.net:#26677] (thanks behrens) + invalid definition at onig_error_code_to_str() + in the case of NOT HAVE_STDARG_PROTOTYPES. + +2004/02/25: Version 2.2.0 + +2004/02/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/02/24: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux]. +2004/02/24: [bug] undefined IS_BLANK() and IS_GRAPH() was used in + onigenc_is_code_ctype() in the case of Ruby M17N. +2004/02/24: [new] support ISO-8859-16. (ONIG_ENCODING_ISO_8859_16) +2004/02/24: [bug] should not fold match for 0xdf in iso8859_6.c. +2004/02/24: [new] support ISO-8859-14. (ONIG_ENCODING_ISO_8859_14) +2004/02/23: [new] support ISO-8859-13. (ONIG_ENCODING_ISO_8859_13) +2004/02/23: [new] support ISO-8859-10. (ONIG_ENCODING_ISO_8859_10) +2004/02/20: [bug] fix iso_8859_4_mbc_is_case_ambig(). +2004/02/20: [new] support ISO-8859-9. (ONIG_ENCODING_ISO_8859_9) +2004/02/19: [bug] correct ctype tables for ISO-8859-3, ISO-8859-4, + ISO-8859-6, ISO-8859-7, ISO-8859-8, KOI8_R. +2004/02/18: [bug] wrong replaced name OnigSyntaxGnuOnigex. +2004/02/17: [spec] check capture status for empty infinite loop. + [ruby-dev:20224] etc... + ex. /(?:\1a|())*/.match("a"), + /(?:()|()|()|(x)|()|())*\2b\5/.match("b") + add USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK. + add OP_NULL_CHECK_END_MEMST, OP_NULL_CHECK_END_MEMST_PUSH. + add stack type STK_NULL_CHECK_END. +2004/02/13: [impl] add OnigEncodingEUC_CN to enc/euc_kr.c. +2004/02/13: [bug] (thanks Simon Strandgaard) + parsing of nested repeat was invalid. + ex. /ab{2,3}*/ was /(?:a(?:b{2,3}))*/, + should be /a(?:b{2,3}*)/ +2004/02/12: [bug] (thanks Simon Strandgaard) + OP_REPEAT_INC_NG process in match_at() is wrong. + ex. bad match /a.{0,2}?a/ =~ "0aXXXa0" +2004/02/12: [bug] (thanks Simon Strandgaard) + wrong fetch after (?x) option. ex. "(?x)\ta .\n+b" +2004/02/12: [bug] (thanks Simon Strandgaard) + [\^] is not a empty char class. +2004/02/09: [new] add onig_set_syntax_op(), onig_set_syntax_op2(), + onig_set_syntax_behavior(), onig_set_syntax_options(). +2004/02/06: [dist] add a new target 'site' to Makefile.in. +2004/02/06: [dist] add index.html. +2004/02/03: [bug] oniggnu.h was not installed by 'make install'. + +2004/02/02: Version 2.1.0 + +2004/02/02: [test] success in ruby 1.9.0 (2004-02-02) [i686-linux]. +2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/02/02: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2004/02/02: [new] support ISO-8859-11. (ONIG_ENCODING_ISO_8859_11) +2004/02/02: [new] support ISO-8859-5. (ONIG_ENCODING_ISO_8859_5) +2004/02/02: [impl] should check single byte encoding or not in and_cclass() + and or_cclass(). +2004/01/30: [dist] add oniggnu.h. +2004/01/30: [bug] ISO-8859-7 0xb7 (middle dot) is Punct type. +2004/01/30: [new] support ISO-8859-8. (ONIG_ENCODING_ISO_8859_8) +2004/01/29: [new] support ISO-8859-7. (ONIG_ENCODING_ISO_8859_7) +2004/01/29: [new] support ISO-8859-6. (ONIG_ENCODING_ISO_8859_6) +2004/01/28: [new] support KOI8-R. (ONIG_ENCODING_KOI8_R) +2004/01/28: [new] support KOI8. (ONIG_ENCODING_KOI8) +2004/01/27: [dist] rename enc/isotable.c to enc/mktable.c. +2004/01/27: [new] support ISO-8859-4. (ONIG_ENCODING_ISO_8859_4) +2004/01/26: [new] support ISO-8859-3. (ONIG_ENCODING_ISO_8859_3) +2004/01/26: [bug] EncISO_8859_{1,15}_CtypeTable[256] was wrong. + (0x80 - 0xff is not ASCII) +2004/01/23: [new] support ISO-8859-2. (ONIG_ENCODING_ISO_8859_2) +2004/01/23: [dist] add enc/isotable.c. +2004/01/22: [new] support EUC-TW. (ONIG_ENCODING_EUC_TW) +2004/01/22: [bug] definition of GET_ALIGNMENT_PAD_SIZE() and + ALIGNMENT_RIGHT() was wrong. + type casting should be unsigned int, not int. +2004/01/22: [impl] add defined(__x86_64) || defined(__x86_64__) + to unaligned word access condition. (AMD64 ?) +2004/01/21: [dist] rename enc/eucjp.c to enc/euc_jp.c. +2004/01/21: [new] support EUC-KR. (ONIG_ENCODING_EUC_KR) +2004/01/20: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2004/01/20: [dist] change Makefile.in. +2004/01/20: [spec] add \p{...}, \P{...} in char class. +2004/01/20: [new] character property operators \p{...}, \P{...}. + supported in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL. +2004/01/19: [spec] allow /a{,n}/ as /a{0,n}/. (but don't allow /a{,}/) +2004/01/19: [dist] rename onigcomp200.h to onigcmpt200.h. +2004/01/19: [dist] update re.c.168.patch. svn add re.c.181.patch. +2004/01/16: [dist] update sample/*.c for new API. +2004/01/16: [dist] add onigcomp200.h. (for old API compatibility) +2004/01/16: [dist] update documents API, RE and RE.ja. +2004/01/16: [spec] change prefix REG_ -> ONIG_, regex_ onig_, + ENC_ -> ONIGENC, enc_ -> onigenc_. +2004/01/15: [impl] rename ENC_IS_MBC_E_WORD() to ENC_IS_MBC_WORD(). + rename ENC_CTYPE_SUPPORT_LEVEL_SB_ONLY to + ENC_CTYPE_SUPPORT_LEVEL_SB. +2004/01/14: [impl] rename UNALIGNED_WORD_ACCESS to + PLATFORM_UNALIGNED_WORD_ACCESS. +2004/01/14: [impl] change MATCH_STACK_LIMIT_SIZE value from 200000 to 500000. +2004/01/13: [impl] remove ENC_CODE_TO_MBC_FIRST(enc,code) in regenc.h. + remove code_to_mbc_first member in RegCharEncodingType. +2004/01/13: [impl] remove head byte bitset information in cclass->mbuf. +2003/12/26: [impl] change macro name ismb_xxxx() in enc/*.c for + escape conflict. + +2003/12/24: Version 2.0.0 + +2003/12/24: [spec] ignore case option is effective to numbered char. + ex. /\x61/i =~ "A" +2003/12/24: [test] success in ruby 1.8.1 (2003-12-24) [i686-linux]. +2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2003/12/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32]. +2003/12/24: [test] success in regex.c compile test on ruby-m17n. + (but can't make miniruby because re.c patch fail.) +2003/12/24: [bug] (thanks H.Miyamoto) /[\W]/ was wrong in 1.9.5. +2003/12/22: [spec] implement fold match on UTF-8 encoding. +2003/12/19: [impl] add ctype_support_level and ctype_add_codes() member to + RegCharEncoding type. +2003/12/19: [impl] add add_ctype_to_cc() in regparse.c. +2003/12/19: [impl] add enc_is_code_ctype() in REG_RUBY_M17N case. +2003/12/19: [impl] change ENC_CODE_TO_MBC() interface. +2003/12/18: [new] implement fold match. (variable number of char + match in ignore case mode.) + ex. German alphabet ess-tsett(U+00DF) match "SS" and "ss". +2003/12/17: [impl] refactoring of encoding system. +2003/12/17: [impl] add enc_init() in regenc.c. +2003/12/17: [new] support Big5. (REG_ENCODING_BIG5) +2003/12/16: [impl] change CodePoint from unsigned int to unsigned long. +2003/12/16: [new] support ISO 8859-15. (REG_ENCODING_ISO_8859_15) +2003/12/16: [impl] change P_() macro definition condition for Win32. +2003/12/16: [dist] add sample/encode.c +2003/12/16: [new] support ISO 8859-1. (REG_ENCODING_ISO_8859_1) +2003/12/15: [impl] rename IS_ENC_XXXX to ENC_IS_XXXX. +2003/12/15: [impl] rename RegDefaultCharEncoding to EncDefaultCharEncoding. +2003/12/15: [impl] divide encoding files. (enc/ascii.c, enc/utf8.c etc...) +2003/12/15: [bug] unexpected infinite loop in regex_snprintf_with_pattern(). + change local var. type char* to UChar*. +2003/12/15: [impl] remove REG_MBLEN_TABLE[]. +2003/12/15: [spec] rename function prefix regex_get_prev_char_head(), + regex_get_left_adjust_char_head() and + regex_get_right_adjust_char_head() to enc_xxxxxx(). +2003/12/15: [impl] rename function prefixes in regenc.h from regex_ to enc_. +2003/12/12: [impl] remove USE_SBMB_CLASS. +2003/12/12: [impl] rename mb -> mbc, mblen() to enc_len(). +2003/12/12: [impl] rename WCINT to CodePoint. +2003/12/11: [impl] delete IS_XXXX() ctype macros from regint.h. +2003/12/11: [impl] add enc->wc_is_ctype() and RegAsciiCtypeTable[256]. +2003/12/11: [impl] remove RegAsciiCaseAmbigTable. +2003/12/10: [impl] use ENC_TO_LOWER() for ignore case comparison. +2003/12/08: [impl] *** re-defined RegCharEncoding in oniguruma.h. *** +2003/12/08: [impl] add USE_POSIX_REGION_OPTION to regint.h. +2003/12/08: [impl] add IS_ENC_WORD() to regenc.h. +2003/12/05: [impl] rename IS_CODE_XXXX() to IS_ENC_XXXX(). +2003/12/05: [impl] delete IS_CODE_WORD() from regenc.h. +2003/12/04: [spec] rename REG_SYN_OP_BACK_REF to REG_SYN_OP_DECIMAL_BACKREF. +2003/12/04: [spec] add (REG_SYN_OP_ESC_W_WORD | REG_SYN_OP_ESC_B_WORD_BOUND | + REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | REG_SYN_OP_BACK_REF) + to RegSyntaxGrep. +2003/12/04: [spec] remove REG_ENCODING_DEFAULT and REGCODE_DEFAULT. +2003/12/04: [spec] move declarations of regex_get_default_encoding() and + regex_set_default_encoding() from oniguruma.h to regenc.h. +2003/12/03: [new] add regex_get_default_encoding() and + regex_set_default_encoding(). +2003/12/03: [spec] REG_ENCODING_DEFAULT meaning is changed. + (current default value, not initial default value.) +2003/12/03: [spec] REGCODE_XXX is obsoleted. use REG_ENCODING_XXX. +2003/12/02: [memo] alias svnst='svn status | grep -v "^\?"' +2003/12/02: [spec] move regex_set_default_trans_table() declaration + from oniguruma.h to regenc.h. (obsoleted API) +2003/12/02: [impl] move variables RegDefaultCharEncoding, DefaultTransTable and + AmbiguityTable to regenc.c. +2003/12/01: [impl] add regex_continuous_sbmb() to regenc.c. +2003/12/01: [dist] add regenc.h and regenc.c. +2003/11/18: [dist] change testconv.rb. +2003/11/18: [bug] (thanks Masaru Tsuda) + memory leak in parse_subexp(). +2003/11/18: [bug] (thanks Masaru Tsuda) + memory leak in names_clear() and parse_char_class(). +2003/11/17: [bug] memory leak in parse_char_class(). +2003/11/17: [bug] (thanks Masaru Tsuda) + OptExactInfo length should not over OPT_EXACT_MAXLEN. + (concat_opt_exact_info_str()) + +2003/11/12: Version 1.9.5 + +2003/11/12: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin]. +2003/11/12: [test] success in ruby 1.8.1 (2003-11-11) [i686-linux]. +2003/11/12: [spec] add definition of REG_INEFFECTIVE_META_CHAR. +2003/11/11: [dist] add a sample program sample/sql.c. +2003/11/11: [new] add variable meta character. + regex_set_meta_char() +2003/11/11: [spec] add syntax op. REG_SYN_OP_VARIABLE_META_CHARS. +2003/11/11: [spec] rename REG_SYN_OP_ESC_CAPITAL_Q_QUOTE to + REG_SYN_OP2_ESC_CAPITAL_Q_QUOTE, + REG_SYN_OP_QMARK_GROUP_EFFECT to + REG_SYN_OP2_QMARK_GROUP_EFFECT. +2003/11/06: [impl] define THREAD_PASS as rb_thread_schedule() in Ruby mode. +2003/11/05: [spec] add syntax behavior REG_SYN_WARN_REDUNDANT_NESTED_REPEAT. +2003/11/05: [spec] rename REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED to + REG_SYN_WARN_CC_OP_NOT_ESCAPED. +2003/11/04: [new] add regex_set_warn_func() and regex_set_verb_warn_func(). +2003/10/30: [new] add regex_name_to_backref_number(). + (for multiplex definition name, see sample/names.c) +2003/10/30: [spec] add name_end and reg argument to callback function of + regex_foreach_name(). (see sample/names.c) +2003/10/29: [spec] add syntax behavior REG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME. + add error code REGERR_MULTIPLEX_DEFINED_NAME. +2003/10/14: [dist] modify sample/simple.c. +2003/10/03: [bug] (thanks nobu) [ruby-dev:21472] + sub-anchor of optimization map info was wrong + in concat_left_node_opt_info(). + ex. /^(x?y)/ = "xy" fail. + +2003/09/17: Version 1.9.4 + +2003/09/17: [spec] change specification of char-class range in ignore case mode + follows with Ruby 1.8(2003-09-17). + ex. /[H-c]/i ==> (H-Z, 0x5b-0x60, a-c)/i + ==> H-Z, h-z, 0x5b-0x60, a-c, A-C +2003/09/16: [bug] (thanks Guy Decoux) + remove env->option == option check in parse_effect(). + change env->option for dynamic option in parse_exp(). + (ex. bad match /(?i)(?-i)a/ =~ "A") +2003/09/12: [spec] rename REG_SYN_ALLOW_RANGE_OP_IN_CC to + REG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC, + REG_SYN_ESCAPE_IN_CC to REG_SYN_BACKSLASH_ESCAPE_IN_CC. +2003/09/11: [bug] change to IS_SYNTAX_OP2 at REG_SYN_OP2_ESC_GNU_BUF_ANCHOR. +2003/09/09: [spec] rename REG_SYN_OP2_ESC_M_BAR_META to + REG_SYN_OP2_ESC_CAPITAL_M_BAR_META, + REG_SYN_OP_ESC_Q_QUOTE to REG_SYN_OP_ESC_CAPITAL_Q_QUOTE, + REG_SYN_OP_ESC_SUBEXP to REG_SYN_OP_ESC_LPAREN_SUBEXP, + REG_SYN_OP_ESC_BUF_ANCHOR to REG_SYN_OP_ESC_AZ_BUF_ANCHOR, + REG_SYN_OP_ESC_GNU_BUF_ANCHOR to + REG_SYN_OP2_ESC_GNU_BUF_ANCHOR, + REG_SYN_OP_ESC_CONTROL_CHAR to REG_SYN_OP_ESC_CONTROL_CHARS, + REG_SYN_OP_ESC_WORD to REG_SYN_OP_ESC_W_WORD, + REG_SYN_OP_ESC_WORD_BEGIN_END to + REG_SYN_OP_ESC_LTGT_WORD_BEGIN_END, + REG_SYN_OP_ESC_WORD_BOUND to REG_SYN_OP_ESC_B_WORD_BOUND, + REG_SYN_OP_ESC_WHITE_SPACE to REG_SYN_OP_ESC_S_WHITE_SPACE, + REG_SYN_OP_ESC_DIGIT to REG_SYN_OP_ESC_D_DIGIT, + REG_SYN_OP_CC to REG_SYN_OP_BRACKET_CC, + REG_SYN_OP2_CCLASS_SET to REG_SYN_OP2_CCLASS_SET_OP, + REG_SYN_CONTEXT_INDEP_OPS to + REG_SYN_CONTEXT_INDEP_REPEAT_OPS, + REG_SYN_CONTEXT_INVALID_REPEAT_OPS to + REG_SYN_CONTEXT_INVALID_REPEAT_OPS. + add REG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR. +2003/09/08: [spec] rename REG_SYN_OP_ANYCHAR to REG_SYN_OP_DOT_ANYCHAR, + REG_SYN_OP_0INF to REG_SYN_OP_ASTERISK_ZERO_INF, + REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_ASTERISK_ZERO_INF, + REG_SYN_OP_1INF to REG_SYN_OP_PLUS_ONE_INF, + REG_SYN_OP_ESC_1INF to REG_SYN_OP_ESC_PLUS_ONE_INF, + REG_SYN_OP_0INF to REG_SYN_OP_QMARK_ZERO_ONE, + REG_SYN_OP_ESC_0INF to REG_SYN_OP_ESC_QMARK_ZERO_ONE, + REG_SYN_OP_INTERVAL to REG_SYN_OP_BRACE_INTERVAL, + REG_SYN_OP_ESC_INTERVAL to REG_SYN_OP_ESC_BRACE_INTERVAL, + REG_SYN_OP_SUBEXP to REG_SYN_OP_LPAREN_SUBEXP, + REG_SYN_OP_ALT to REG_SYN_OP_VBAR_ALT, + REG_SYN_OP_ESC_ALT to REG_SYN_OP_ESC_VBAR_ALT, + REG_SYN_OP_NON_GREEDY to REG_SYN_OP_QMARK_NON_GREEDY, + REG_SYN_OP_SUBEXP_EFFECT to REG_SYN_OP_QMARK_GROUP_EFFECT, + REG_SYN_OP2_POSSESSIVE_{REPEAT,INTERVAL} to + REG_SYN_OP2_PLUS_POSSESSIVE_{REPEAT,INTERVAL}, + REG_SYN_OP2_SUBEXP_CALL to REG_SYN_OP2_ESC_G_SUBEXP_CALL, + REG_SYN_OP2_NAMED_GROUP to REG_SYN_OP2_QMARK_LT_NAMED_GROUP + and REG_SYN_OP2_ESC_K_NAMED_BACKREF. +2003/09/02: [tune] call reduce_nested_qualifier() after disabling capture for + no-name group in noname_disable_map(). + ex. /(a+)*(?...)/ +2003/09/02: [impl] include is forgotten to erase in regcomp.c. +2003/09/01: [dist] update doc/RE and doc/RE.ja. +2003/08/26: [bug] (thanks Guy Decoux) + should not double free node at the case TK_CC_CC_OPEN + in parse_char_class(). + +2003/08/19: Version 1.9.3 + +2003/08/19: [inst] change re.c.180.patch. +2003/08/19: [impl] rename 'list of captures' to 'capture history'. +2003/08/19: [dist] add doc/RE.ja. (Japanese) +2003/08/19: [new] add regex_copy_syntax(). +2003/08/19: [spec] rename REG_SYN_OP2_ATMARK_LIST_OF_CAPTURES to + REG_SYN_OP2_ATMARK_CAPTURE_HISTORY. +2003/08/18: [spec] (thanks nobu) + don't use IMPORT in oniguruma.h and onigposix.h. +2003/08/18: [impl] (thanks nobu) change error output to stdout in testconv.rb. +2003/08/18: [inst] (thanks nobu) lacked $(srcdir) in Makefile.in. +2003/08/18: [bug] REG_MBLEN_TABLE[SJIS][0xFD-0xFF] should be 1. +2003/08/18: [bug] (thanks nobu) mbctab_sjis[0x80] should be 0. +2003/08/18: [bug] (thanks nobu) + single/multi-byte decision was wrong in parse_char_class(). + add regex_wc2mblen(). + should not set fetched to 1 in TK_RAW_BYTE case. +2003/08/18: [bug] should update BitSet in the case inc_n >= 0 + in add_wc_range_to_buf(). +2003/08/13: [bug] change re.c.180.patch for fix rb_reg_to_s() in re.c. +2003/08/11: [bug] should clear region->list in regex_region_resize(). + +2003/08/08: Version 1.9.2 + +2003/08/08: [test] success in ruby 1.8.0 (2003-08-08) on Windows 2000 + VC++ 6.0 and Cygwin. +2003/08/08: [impl] don't define macro vsnprintf for WIN32 platform, + because definition is added in win32\win32.h. +2003/08/08: [test] success in ruby 1.8.0 and ruby 1.6.8(2003-08-03) on Linux. +2003/08/08: [dist] change re.c.180.patch and re.c.168.patch. +2003/08/08: [new] (thanks akr) + implemented list of captures. (?@...), (?@...) +2003/08/07: [dist] add sample/listcap.c. +2003/08/06: [bug] OP_MEMORY_END_PUSH_REC case in match_at(). + renewal of mem_start_stk[] should be after + STACK_PUSH_MEM_END() call. +2003/07/29: [new] add regex_get_encoding(), regex_get_options() and + regex_get_syntax(). +2003/07/25: [spec] (thanks akr) + change group(...) to shy-group(?:...) if named group is + used in the pattern. + add REG_SYN_CAPTURE_ONLY_NAMED_GROUP. +2003/07/24: [spec] rename REG_OPTION_CAPTURE_ONLY_NAMED_GROUP to + REG_OPTION_DONT_CAPTURE_GROUP. + add REG_OPTION_CAPTURE_GROUP. +2003/07/17: [spec] rename REG_SYN_OP2_NAMED_SUBEXP to REG_SYN_OP2_NAMED_GROUP. +2003/07/17: [spec] add REGERR_EMPTY_GROUP_NAME. +2003/07/17: [spec] rename REGERR_INVALID_SUBEXP_NAME + to REGERR_INVALID_CHAR_IN_GROUP_NAME. +2003/07/17: [spec] restrict usable chars of group name to alphabet, digit, + '_' or multibyte-char in fetch_name(). [ruby-dev:20706] +2003/07/16: [impl] minor change of sample/names.c. +2003/07/14: [impl] rename USE_NAMED_SUBEXP to USE_NAMED_GROUP. +2003/07/14: [bug] add fetch_name() for USE_NAMED_SUBEXP off case. +2003/07/14: [API] add regex_number_of_names(). +2003/07/08: [impl] change error message for undefined group number call. + 'undefined group reference: /(a)\g<2>/' + --> 'undefined group <2> reference: /(a)\g<2>/' +2003/07/08: [dist] modify doc/RE. +2003/07/07: [impl] OP_SET_OPTION is not needed in compiled code. + add IS_DYNAMIC_OPTION() to regint.h. +2003/07/07: [spec] called group should not ignore outside option (?i:...). + ex. /(?i:(?(a)\2)){0}\g/.match("aA") + add opcode OP_BACKREFN_IC and OP_BACKREF_MULTI_IC. + set option status to effect memory in optimize_node_left(). +2003/07/07: [impl] add opcode OP_ANYCHAR_ML, OP_ANYCHAR_ML_STAR and + OP_ANYCHAR_ML_START_PEEK_NEXT. +2003/07/07: [bug] (thanks nobu) REG_MBLEN_TABLE[SJIS][0x80] should be 1. +2003/07/07: [spec] rename REG_SYN_OP_QUOTE to REG_SYN_OP_ESC_Q_QUOTE. + +2003/07/04: Version 1.9.1 + +2003/07/04: [new] add REG_OPTION_CAPTURE_ONLY_NAMED_GROUP. (thanks .NET) +2003/07/04: [spec] check mbuf member in the case of + REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC in parse_char_class(). +2003/07/04: [spec] typo REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED. + should be REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPED. +2003/07/04: [bug] conflict values on REG_SYN_WARN_FOR_CC_OP_NOT_ESCAPEED and + REG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC. (thanks nobu) +2003/07/03: [spec] add REG_SYN_OP_ESC_CONTROL_CHAR flag. +2003/07/03: [spec] remove REG_SYN_OP_ESC_OCTAL3 and REG_SYN_OP_ESC_X_HEX2 + flag from RegSyntaxGnuRegex. +2003/07/03: [spec] remove REG_SYN_OP_NON_GREEDY flag from RegSyntaxGnuRegex. +2003/07/02: [dist] fix doc/RE. +2003/07/01: [impl] add config flag USE_VARIABLE_SYNTAX. + (turn off variable syntax on Ruby) +2003/07/01: [spec] add syntax behavior REG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND. +2003/06/30: [spec] allow different length top-level alternatives + in look-behind. ex. (?<=abc|abcd), (? (?>a*)b + (thanks Jeffrey E. F. Friedl) +2003/06/02: [impl] remove multibyte-BitSet for OP_CCLASS_MB/OP_CCLASS_MB_NOT. +2003/05/30: [new] char class intersection operator &&[...] like Java(TM). + (thanks akr) +2003/05/30: [bug] should use bbuf_free() for CClassNode in regex_node_free(). +2003/05/29: [bug] wrong usage of syntax REG_SYN_ALLOW_EMPTY_RANGE_IN_CC. + /[d-a]/ should be error. +2003/05/28: [impl] optimize stop-backtrack compiled code. + (/(?>a*)/, /(?>\w+)/ etc...) + add OP_POP opcode. +2003/05/28: [new] possessive repeat operator. (?+, *+, ++, {n,m}+) +2003/05/27: [spec] '-' at beginning of char-class should be warn only if + it is start of range. (ex. /[--a]/) +2003/05/27: [spec] should not warn for right bracket at beginning of pattern. + ex. /]aaa/ +2003/05/27: [spec] change CCEND_ESC_WARN() from VERB_WARNING() to WARNING(). +2003/05/27: [spec] /[]aaa/ should be empty char-class error. + /[]aaa]/ should be warn for 'without backslash'. + (add char_exist_check() in regparse.c) +2003/05/26: [bug] OP_REPEAT in recursive subexp call. + ex. /(?(a|b\gc){3,5})/.match("baaaaca") => "baaaaca" + was wrong result. (should be "aaaa") +2003/05/26: [impl] add num_call member to regex_t. +2003/05/26: [impl] add repeat_range member to regex_t. + (for delete upper,lower members from StackType.u.repeat) +2003/05/26: [bug] change print_names() to external regex_print_names(). +2003/05/26: [tune] change OP_NULL_CHECK_END process in match_at(). +2003/05/26: [spec] change CCEND_ESC_WARN() from WARNING() to VERB_WARNING(). +2003/05/26: [spec] remove POSIXLINE option. (?p:...) + (be made the same as Ruby.) +2003/05/22: [spec] use OP_NULL_CHECK_XXX only if repeat is infinite. + prev. /(?:()|()){0,10}\1\2/ =~ "" ==> FAIL + now /(?:()|()){0,10}\1\2/ =~ "" ==> MATCH + +2003/05/22: [impl] change target_empty setting condition in setup_tree(). +2003/05/19: [impl] avoid zero length repeat optimization. (thanks matz) + /()*/ ==> /()?/, /()+/ ==> /()/ etc... +2003/05/19: [impl] minor changes for gcc -Wall. (-DREG_DEBUG_STATISTICS case) +2003/05/19: [spec] rename regex_foreach_names() to regex_foreach_name(). +2003/05/16: [new] add --with-statistics option to configure. +2003/05/16: [bug] move RegOpInfo[] definition to regint.h. +2003/05/16: [new] add regex_version(). + +2003/05/14: Version 1.8.6 + +2003/05/14: [bug] use _vsnprintf() on Win32. +2003/05/14: [spec] define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE. + (/\n$/ =~ "\n", /\n\Z/ =~ "\n") [ruby-dev:20125] +2003/05/14: [impl] minor changes for gcc -Wall. +2003/05/14: [impl] add string.h check in AC_CHECK_HEADERS(). +2003/05/13: [impl] minor changes for gcc -Wall. +2003/05/13: [impl] add regex_snprintf_with_pattern(). +2003/05/13: [spec] add warning for char class meta character without escape + in Ruby mode ('[', '-', ']'). +2003/05/13: [impl] define WARNING() and VERB_WARNING() in regint.h. +2003/05/13: [bug] correct is_code_ascii() for /[[:ascii:]]/. +2003/05/12: [dist] add regular expression document (doc/RE). +2003/05/12: [spec] specification of $(END_LINE) was made the same as Ruby 1.8. + [ruby-dev:20130] (thanks matz) +2003/05/12: [memo] shifted to Subversion(version 0.21.0) from CVS. + +2003/03/19: Version 1.8.5 + +2003/03/19: [impl] change REG_EXTERN definition. (thanks nobu) +2003/03/19: [impl] abbreviation for long error_par in regex_error_code_to_str(). +2003/03/18: [dist] change re.c.XXX.patch for GNU regex API changes. +2003/03/18: [spec] change API regex_new(), regex_recompile() and + regex_error_code_to_str(). + change API re_compile_pattern() and re_recompile_pattern(). +2003/03/18: [spec] replace REGERR_END_PATTERN_AT_GROUP_{COMMENT|OPTION} to + REGERR_END_PATTERN_IN_GROUP. +2003/03/17: [impl] should free err_arg. +2003/03/17: [bug] mistake(high -> to) in add_wc_range_to_buf(). +2003/03/17: [spec] add err_arg argument to regex_new() and regex_recompile(). + for detail error message. (thanks akr) + +2003/03/12: Version 1.8.4 + +2003/03/12: [tune] use cached value of effect node in get_min_match_length(). +2003/03/12: [bug] escaped alphabet should be TK_RAW_BYTE + in fetch_token() and fetch_token_in_cc(). +2003/03/12: [spec] change named backref and subexp call format. + backref: \k, call: \g (thanks akr) +2003/03/11: [inst] add regparse.[ch] in win32/Makefile. +2003/03/11: [bug] if UNALIGNED_WORD_ACCESS isn't setted + then compile error in unset_addr_list_fix(). (thanks knu) +2003/03/10: [impl] divide regcomp.c to regcomp.c, regparse.c and regparse.h. +2003/03/10: [bug] should handle multi-byte code name in fetch_name(). +2003/03/10: [spec] remove REGERR_TABLE_FOR_IGNORE_CASE_IS_NOT_SETTED. +2003/03/10: [spec] support POSIX API option REG_NOSUB. + add comp_options member to POSIX API regex_t. + +2003/03/10: Version 1.8.3 + +2003/03/10: [bug] can not compile with Ruby 1.6.8. + (inconsistent st.h with 1.6 and 1.8) + use hash table on Ruby 1.8 only. +2003/03/10: [spec] forbid to use '\' in group name. +2003/03/08: [impl] remove check_backref_number(). +2003/03/08: [bug] called group in 0-repeat should not be eliminated from + compile code. ex. /(?*n)(?){0}/ (thanks akr) + add is_refered member to QualifierNode. +2003/03/07: [impl] use hash table(st.[ch]) for implementation of name table. + (enable on Ruby in default) +2003/03/07: [new] add regex_foreach_names(). +2003/03/06: [impl] add member reg->stack_pop_level. +2003/03/06: [impl] add operator OP_MEMORY_START and member reg->backtrack_mem. +2003/03/06: [bug] if REG_OPTION_FIND_LONGEST or REG_OPTION_NOT_EMPTY, + should handle backtrack of MEM_END. + add OP_MEMORY_END_PUSH and OP_MEMORY_END_PUSH_REC. +2003/03/06: [impl] rename OP_MEMORY_END_PUSH to OP_MEMORY_END_MARK. +2003/03/06: [spec] change error messages. +2003/03/06: [tune] add tiny_pop check in STACK_POP. + +2003/03/05: Version 1.8.2 + +2003/03/05: [impl] use cache info in EFFECT_MEMORY case + in optimize_node_info(). +2003/03/05: [impl] add EFFECT_MEMORY node reference count check + in optimize_node_left(). +2003/03/05: [impl] add min-len, max-len, char-len cache in EffectNode. +2003/03/05: [spec] allow to call in look behind. ex. /(?<=(?*a))/ +2003/03/05: [bug] forgotten N_ANCHOR case in check_backref_number(), + subexp_inf_recursive_check_trav() etc... +2003/03/05: [impl] rename USE_ONIGURUMA_EXTENSION to USE_SBMB_CLASS. +2003/03/04: [impl] add CALL-node info in optimize_node_left(). +2003/03/04: [spec] prohibit left recursion of subexp call. ex. (?|(?*n)a) + add subexp_inf_recursive_check_trav(). +2003/03/04: [spec] rename REG_SYN_STRICT_CHECK_BACKREF_NUMBER + to REG_SYN_STRICT_CHECK_BACKREF +2003/03/03: [bug] /(?a(?*n)|)/ isn't infinite recursion. + fix N_LIST case in subexp_recursive_check(). (thanks akr) +2003/03/03: [bug] /(?|(?*n))+/ segmentation fault. + should re-allocate in unset_addr_list_add(). (thanks akr) + +2003/03/01: Version 1.8.1 + +2003/03/01: [bug] change STACK_GET_MEM_START() and STACK_PUSH_MEM_END(). +2003/03/01: [new] add reg_name_to_group_numbers() to POSIX API. +2003/03/01: [impl] use OP_MEMORY_END_PUSH in callable subexp compiled code + only if subexp is recursive. +2003/03/01: [spec] rename regex_name_to_backrefs() to + regex_name_to_group_numbers(). +2003/02/28: [impl] use function stack_double() instead of macro. +2003/02/28: [new] subexp call. (?*name) (thanks akr) +2003/02/28: [spec] add match stack limit check. (MATCH_STACK_LIMIT_SIZE) +2003/02/28: [impl] check recursive subexp call. +2003/02/28: [impl] add opcode OP_MEMORY_END_PUSH for callable subexp. +2003/02/28: [impl] add opcode OP_CALL, OP_RETURN. + add stack type STK_CALL_FRAME, STK_RETURN, STK_MEM_END. +2003/02/26: [spec] add new syntax behavior REG_SYN_STRICT_CHECK_BACKREF_NUMBER. + if it is setted, then error /(\1)/, /\1(..)/ etc... +2003/02/26: [spec] if backref number is greater than max group number, + then return compile error. (REGERR_INVALID_BACKREF_NUMBER) +2003/02/26: [tune] bad implemented N_ALT case in get_min_match_length(). +2003/02/26: [dist] auto update testc.c and win32/testc.c in dist target. +2003/02/26: [impl] add -win option to testconv.rb. +2003/02/25: [spec] allow to assign same name to different group. + add OP_BACKREF_MULTI. +2003/02/24: [impl] reduce redundant repeat of empty target. + ex. /()*/ ==> /()?/, /()+/ ==> /()/, /(?:)+/ ==> // +2003/02/24: [impl] change condition in regex_is_allow_reverse_match(). +2003/02/24: [impl] convert i(/../, ...) functions in testconv.rb. +2003/02/24: [impl] change name table struct. + +2003/02/22: Version 1.8.0 + +2003/02/22: [new] named subexp, named back reference. (thanks akr) + define: (?...), back-ref: \g +2003/02/22: [impl] use str_node_can_be_split(). +2003/02/21: [dist] add sample/posix.c +2003/02/21: [spec] rename some error code symbols. +2003/02/21: [spec] max number of multibyte ranges(255) is small. + 255 --> 1000. (thanks MoonWolf) +2003/02/20: [new] supported Basic Regular Expression(BRE) in POSIX API. + (REG_EXTENDED option: Extended RE) +2003/02/20: [new] variable syntax. + +2003/02/12: Version 1.7.2 + +2003/02/12: [bug] mismatch /\?a/i.match('?A'). + check raw value in scan_make_node() and scan_backslash(). + (thanks Nobu) +2003/02/12: [impl] rename 'max_mem' to 'num_mem' in regex_t. +2003/02/12: [impl] rename 'code' to 'enc' in regex_t. +2003/02/12: [spec] remove transtable argument in regex_new and regex_recompile. + remove transtable member in regex_t. +2003/02/10: [inst] change backup file suffix name from '.orig' to '.ruby_orig'. + (win32/Makefile) +2003/02/10: [spec] number check in scan_char_class() ignore-case mode. + ex. /[\x58-\x64]/i +2003/02/10: [impl] don't use OP_MEMORY_END_PUSH (and STK_MEM_END). +2003/02/10: [impl] lift up head_exact value from child qualifier node to parent. +2003/02/10: [tune] change stack type values. +2003/02/10: [dist] add HISTORY. +2003/02/08: [tune] change stack type values. +2003/02/08: [tune] add STACK_BASE_CHECK(). +2003/02/08: [tune] add STACK_PUSH_ENSURED(). +2003/02/08: [dist] change contents of doc/API. +2003/02/07: [inst] change backup file suffix name from '.orig' to '.ruby_orig'. +2003/02/07: [spec] range in char-class should be same spec. with Ruby + in ignore-case mode. (ex. /[A-c]/i == /[a-c]/i) + (thanks MoonWolf) +2003/02/07: [spec] [!--] should be allowed. (thanks MoonWolf) +2003/02/07: [dist] refresh re.c.180.patch for re.c (2003-02-06). + +2003/02/07: Version 1.7.1 + +2003/02/07: [impl] check first byte of string in ignore-case mode. + (get_head_exact_node()) +2003/02/07: [impl] remove redundant statements in setup_tree(). +2003/02/06: [new] create Win32 DLL. +2003/02/06: [impl] use P_() macro for function prototype. +2003/02/06: [impl] add HAVE_PROTOTYPE, HAVE_STDARG_PROTOTYPES in + configure.in and config.h.in. +2003/02/06: [spec] /[0-9-a]/ is allowed as usual char '-' and 'a' in Ruby. + add USE_BETTER_COMPATIBILITY_FOR_ORIGINAL_REGEX in + regint.h. (thanks MoonWolf) +2003/02/06: [spec] rename REG_MBCTYPE_XXXX to REG_ENCODING_XXXX in onigposix.h. +2003/02/05: [spec] rename MBCTYPE_XXXX to REG_MBCTYPE_XXXX in onigposix.h. +2003/02/05: [spec] add POSIX API error REG_EONIG_THREAD to onigposix.h. +2003/02/05: [dist] add .cvsignore file. + +2003/02/04: Version 1.7 + +2003/02/04: [bug] typo miss in regex_region_copy(). +2003/02/04: [impl] change THREAD_PASS macro. (regint.h) +2003/02/04: [dist] add API document file doc/API. +2003/02/04: [tune] if sub_anchor has ANCHOR_BEGIN_LINE then + set REG_OPTIMIZE_EXACT_BM in set_optimize_exact_info(). +2003/02/04: [spec] reimplement regex_clone() and it is obsoleted. +2003/02/04: [bug] add REGERR_OVER_THREAD_PASS_LIMIT_COUNT + to regerror.c regposix.c. +2003/02/03: [bug] Hankaku-Kana may be second byte in Shift_JIS + regex_is_allow_reverse_match(). +2003/02/03: [impl] add optimization type REG_OPTIMIZE_EXACT_BM_NOT_REV. + remove exact_allow_reverse_match member in regex_t. +2003/02/03: [impl] add exact_allow_reverse_match member in regex_t. +2003/02/03: [impl] compile-search conflict in regex_search() is handled. +2003/02/01: [tune] decrease regex_region_clear() calling from regex_search(). +2003/02/01: [tune] remove region argument from match_at(). +2003/01/31: [tune] don't use strlen() in regexec() and regcomp(). +2003/01/31: [tune] decrease regex_reduce_chain() calling in regex_search(). +2003/01/31: [bug] STRING_CMP() in regexec.c was wrong in ignore-case. +2003/01/31: [impl] convert to lower-case char at string compile time. + change SBTRANSCMP() in regexec.c. +2003/01/31: [impl] rename TTRANS() to TOLOWER(). +2003/01/30: [bug] .c.o --> .c.obj in win32\Makefile. +2003/01/30: [impl] add -DNOT_RUBY to Makefile.in. + NOT_RUBY is refered in regint.h for escape double + including config.h. +2003/01/30: [impl] when string hasn't case ambiguity, don't compile + to ignore case opcode. +2003/01/29: [impl] add SJIS, UTF-8 test_sb() test. +2003/01/29: [dist] add INSTALL-RUBY file. +2003/01/28: [test] success in Cygwin, Ruby 1.8.0 (2003-01-27). +2003/01/24: [inst] add rback target to Makefile.in. +2003/01/24: [impl] change SBCMP() -> IS_NEWLINE() in match_at(). +2003/01/23: [impl] add encoding arg to scan_xxxx_number(). +2003/01/23: [impl] rename WCInt to WCINT. +2003/01/22: [bug] POSIX API regexec() was not thread safe. + remove region member from POSIX regex_t. + [new] add search time option REG_OPTION_POSIX_REGION. + (region argument is treated as regmatch_t[] type) + speed up regexec(). +2003/01/22: [memo] start CVS entry in my box. + +2003/01/21: Version 1.6 + +2003/01/21: [test] Mac OS X 10.1, Ruby 1.8.0 (2003-01-20) +2003/01/20: [impl] add UTF-8 check to test.rb. (thanks UENO Katsuhiro) +2003/01/18: [impl] change REGION_NOTPOS to REG_REGION_NOTPOS in regex.h. +2003/01/17: [dist] add sample/simple.c. +2003/01/17: [inst] add configure option --with-rubydir. +2003/01/17: [bug] bad implemeted POSIX API options. + default: /./ not match "\n", anchor not match "\n" + REG_NEWLINE: /./ not match "\n", anchor match "\n" +2003/01/16: [impl] rewrite POSIX API regexec() for speed up. +2003/01/16: [impl] add region member to POSIX regex_t struct. +2003/01/16: [inst] rename library file from 'libregex.a' to 'libonig.a'. +2003/01/15: [dist] add testc.c to distribution file. +2003/01/15: [test] success in 'make rtest/ctest/ptest' on Windows 2000. +2003/01/15: [bug] change '/' to \' in win32/Makefile. +2003/01/14: [test] success in Ruby make test on Windows 2000. + VC++6.0, Ruby 1.6.8 (2003-01-12) +2003/01/14: [inst] change Makefile.in and win32/Makefile. +2003/01/11: [inst] changes for Win32 platform. (regint.h, reggnu.c, regcomp.c) +2003/01/11: [dist] add win32 directory. (config.h, Makefile, testc.c) +2003/01/10: [inst] add onigposix.h to install target. (Makefile.in) +2003/01/10: [bug] lacked a comma in ESTRING[]. (regposerr.c) +2003/01/10: [bug] local variable name was wrong. buf -> tbuf (regerror()) +2003/01/10: [spec] remove REG_RUBY_M17N case from onigposix.h and regposix.c. + +2003/01/09: Version 1.5 + +2003/01/09: [inst] replace Ruby re.c.XXX.patch files. (166 -> 168, 172 -> 180) +2003/01/09: [new] implement POSIX API. (thanks knu) + (onigposix.h, regposix.c, regposerr.c) +2003/01/08: [spec] remove REGERR_END_PATTERN_AFTER_BACKSLASH in regex.h. +2003/01/08: [spec] region arg can be NULL in regex_search() and regex_match(). + +2003/01/08: Version 1.4 + +2003/01/08: [inst] add test program converter (test.rb -> testc.c). +2003/01/08: [bug] move GET_WCINT() from regcomp.c to regint.h. +2003/01/07: [inst] add new test script (test.rb). +2002/12/30: [bug] wrong merge in multibyte mode (alt_merge_opt_exact_info()). +2002/12/28: [inst] add rtest target to Makefile.in. +2002/12/28: [bug] /\xfe/.match("\xfe") mismatch in multibyte mode. + add "raw" flag arg to concat_opt_exact_info_str(). +2002/12/25: [bug] check condition was wrong in alt_merge_opt_map_info(). +2002/12/25: [impl] add threshold_len check in regex_search(). +2002/12/23: [bug] prec-read in alternative (/a|(?=z).f/.match("zf") => nil) +2002/12/23: [bug] \G in alternative (/a|\Gz/.match("bza") => "z"). + add start member in MatchArg. (regexec.c) +2002/12/21: [impl] **** rewrite all optimization process. **** +2002/12/16: [impl] remove node subtype EFFECT_EMPTY. +2002/12/12: [impl] reconstruct node types. (regcomp.c) +2002/12/11: [impl] add regerror.c +2002/12/10: [bug] [ruby-dev:19042] (thanks Nobu) + anchor(\G etc...) influenced outside of "|". (/a|\Gb/) +2002/11/30: [bug] [ruby-dev:18966] (thanks Nobu) + char-class(\S, [^\s] etc...) optimize map-info was wrong. +2002/11/29: [bug] infinite loop on NULL-pointer str search (regex_search()). + (thanks matz) +2002/11/29: [bug] change static -> extern (regex_chain_reduce()). +2002/11/29: [bug] change encoding to RegDefaultCharEncoding + in re_recompile_pattern(). (adapt to re.c) +2002/04/24: [spec] USE_ONIGURUMA_EXTENSION is disabled in default. +2002/04/24: [new] add searching time option: REG_OPTION_NOTBOL/NOTEOL. + add searching time option argument to regex_search() and + regex_match(). (prepare for POSIX API) +2002/04/20: [impl] divide regex.c file into regcomp.c, regexec.c, reggnu.c + and regint.h. +2002/04/09: [impl] move IS_MULTILINE() to outside of loop in OP_ANYCHAR_STAR. +2002/04/08: [impl] don't use OP_REPEAT operator for '??'. +2002/04/06: [impl] reduce redundant nested repeat operators(?,*,+,??,*?,+?). + ex. (?:a*)?, (?:a??)* etc.. +2002/04/06: [spec] should not warn for /(?:a?)+?/. +2002/04/04: [spec] should allow fixed length alternative and repeat pattern + in look-behind. ex. /(?<=(a|b){3})/ (thanks Guy Decoux) +2002/04/02: [spec] should warn for /(?:a+)?/ and /(?:a*)??/. (thanks akr) + +2002/04/01: Version 1.3 + +2002/04/01: [dist] add COPYING. +2002/03/30: [spec] warn redundant nested repeat operator + in Ruby verbose mode. ex. (?:a*)? +2002/03/30: [spec] nested repeat operator error check should be + same with GNU regex. (thanks Guy Decoux) +2002/03/30: [new] add \x{hexadecimal-wide-char}. (thanks matz) +2002/03/27: [bug] MBCTYPE_XXX symbol values should be same with GNU regex. +2002/03/27: [impl] add THREAD_ATOMIC to regex_clone(), regex_init(), regex_end(). +2002/03/25: [spec] if encoding is utf-8, allow combination of singlebyte and + multibyte code range in char class. + (cancelled 2002/04/01: for M17N compatibility) +2002/03/25: [dist] description of the license condition is added to README. +2002/03/23: [bug] should set all bits of reg->mem_stats, + if REG_OPTION_FIND_LONGEST or REG_OPTION_NOT_EMPTY. +2002/03/23: [new] add a new option REG_OPTION_NOT_EMPTY. +2002/03/20: [spec] allow incompleted left brace as an usual char. + ex. /{/, /({)/, /a{2,3/ etc... +2002/03/20: [impl] serialize integer in bytecode. + (switch by UNALIGNED_WORD_ACCESS in regex.c) +2002/03/20: [impl] change re_mbcinit() for REG_RUBY_M17N. +2002/03/19: [impl] word alignment of char class multi-byte code ranges. +2002/03/19: [impl] replace OP_EXACTMB4N with OP_EXACTMB3N. +2002/03/19: [bug] OP_CCLASS_MB_NOT process in matchAt() is wrong. +2002/03/19: [new] add re_mbctab[] for Ruby extension library compatibility. +2002/03/19: [spec] allow nested repeat operator, if operator is {n,m} type. +2002/03/19: [new] add REG_IS_PATTERN_ERROR(ecode) in regex.h +2002/03/18: [spec] /[a-b-c]/ should be error. +2002/03/18: [bug] /[\w-a]/ should be error. (thanks Guy Decoux) +2002/03/18: [bug] /[\]/ should be error. (thanks Guy Decoux) +2002/03/18: [bug] /()*/ etc.. should not be error. (thanks Guy Decoux) +2002/03/18: [spec] /a{1}*/ should not be error. (thanks Guy Decoux) +2002/03/18: [bug] ab{2}{3} was interpreded to (?:a(?:b{2})){3} + (thanks Guy Decoux) +2002/03/18: [bug] abort /(?i)*a/ etc... (thanks Guy Decoux) +2002/03/18: [bug] abort /a|*/,/a|{1}/ etc... (thanks Guy Decoux) + +2002/03/13: Version 1.2 + +2002/03/13: [test] success in rubicon/builtin/AllBuiltinTests.rb. + (thanks rubicon) +2002/03/13: [bug] OP_EXACTMBN process in matchAt() is wrong. +2002/03/13: [bug] start argument of BackwardSearchRange() is wrong. +2002/03/12: [spec] change function name style from CamelCase + to underline_separation. (includes API) +2002/03/12: [bug] if pattern has nested null-check, cause infinite loop. + correct STACK_NULL_CHECK() macro. (thanks Guy Decoux) +2002/03/11: [bug] it is wrong that four numbers to continue as + an octal value in scanBackSlash(). ex. /\0111/ + (thanks matz) +2002/03/11: [new] \k (single-byte word char), \K (multi-byte char). +2002/03/09: [inst] add two targets to Makefile.in (166 and 172). +2002/03/09: [spec] decrease REG_MAX_BACKREF_NUM, REG_MAX_REPEAT_NUM + values. +2002/03/08: [spec] allow use of "\A"(begin-buf) in look-behind. +2002/03/08: [impl] add a new opcode OP_PUSH_IF_PEEK_NEXT. +2002/03/08: [impl] add a new opcode OP_ANYCHAR_STAR_PEEK_NEXT. +2002/03/07: [spec] prohibit use of capture group "(...)" + in negative look-behind. +2002/03/07: [inst] add configure.in, config.h.in, Makefile.in. +2002/03/07: [impl] call Init_REGEX_STAT() in RegexInit(). +2002/03/07: [spec] less length string match with negative look-behind. + ex. /(? +svn copy file:///home/kosako/svnreps/svnrep_onig/trunk file:///home/kosako/svnreps/svnrep_onig/tags/5.0.0 -m "ADD TAG: 5.0.0" + + +svn propset svn:ignore -F .cvsignore . +svn commit -m "..." + + + +cvs history -T + + +cvs rtag "VERSION_X_X_X" oniguruma + + + +* write Makefile.am and configure.in. +> aclocal +> libtoolize or glibtoolize +> automake --foreign --add-missing +> autoconf +> configure --with-rubydir=... CFLAGS="-O2 -Wall" + + + + + VERSION = current:revision:age + + current: interface number (from 0) + revision: implementation number of same interface (from 0) + age: number of supported previous interfaces + (if current only supported then age == 0) + +//END diff --git a/oniguruma/INSTALL b/oniguruma/INSTALL new file mode 100644 index 0000000..56b077d --- /dev/null +++ b/oniguruma/INSTALL @@ -0,0 +1,236 @@ +Installation Instructions +************************* + +Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005 Free +Software Foundation, Inc. + +This file is free documentation; the Free Software Foundation gives +unlimited permission to copy, distribute and modify it. + +Basic Installation +================== + +These are generic installation instructions. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. (Caching is +disabled by default to prevent problems with accidental use of stale +cache files.) + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You only need +`configure.ac' if you want to change it or regenerate `configure' using +a newer version of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. If you're + using `csh' on an old version of System V, you might need to type + `sh ./configure' instead to prevent `csh' from trying to execute + `configure' itself. + + Running `configure' takes awhile. While running, it prints some + messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + +Compilers and Options +===================== + +Some systems require unusual options for compilation or linking that the +`configure' script does not know about. Run `./configure --help' for +details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + +You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you must use a version of `make' that +supports the `VPATH' variable, such as GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + If you have to use a `make' that does not support the `VPATH' +variable, you have to compile the package for one architecture at a +time in the source code directory. After you have installed the +package for one architecture, use `make distclean' before reconfiguring +for another architecture. + +Installation Names +================== + +By default, `make install' will install the package's files in +`/usr/local/bin', `/usr/local/man', etc. You can specify an +installation prefix other than `/usr/local' by giving `configure' the +option `--prefix=PREFIX'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +give `configure' the option `--exec-prefix=PREFIX', the package will +use PREFIX as the prefix for installing programs and libraries. +Documentation and other data files will still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=DIR' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + +Optional Features +================= + +Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + +Specifying the System Type +========================== + +There may be some features `configure' cannot figure out automatically, +but needs to determine by the type of machine the package will run on. +Usually, assuming the package is built to be run on the _same_ +architectures, `configure' can figure that out, but if it prints a +message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the `--target=TYPE' option to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + +If you want to set default values for `configure' scripts to share, you +can create a site shell script called `config.site' that gives default +values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + +Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified `gcc' to be used as the C compiler (unless it is +overridden in the site shell script). Here is a another example: + + /bin/bash ./configure CONFIG_SHELL=/bin/bash + +Here the `CONFIG_SHELL=/bin/bash' operand causes subsequent +configuration-related scripts to be executed by `/bin/bash'. + +`configure' Invocation +====================== + +`configure' recognizes the following options to control how it operates. + +`--help' +`-h' + Print a summary of the options to `configure', and exit. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. + diff --git a/oniguruma/Makefile.am b/oniguruma/Makefile.am new file mode 100644 index 0000000..4f34df1 --- /dev/null +++ b/oniguruma/Makefile.am @@ -0,0 +1,94 @@ +## Makefile.am for Oniguruma +encdir = $(top_srcdir)/enc +sampledir = $(top_srcdir)/sample +libname = libonig.la + +#AM_CFLAGS = -DNOT_RUBY +AM_CFLAGS = +INCLUDES = -I$(top_srcdir) -I$(includedir) + +SUBDIRS = . sample + +include_HEADERS = oniguruma.h oniggnu.h onigposix.h +lib_LTLIBRARIES = $(libname) + +libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ + regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \ + regenc.c regsyntax.c regtrav.c regversion.c st.c \ + regposix.c regposerr.c \ + $(encdir)/unicode.c $(encdir)/ascii.c $(encdir)/utf8.c \ + $(encdir)/utf16_be.c $(encdir)/utf16_le.c \ + $(encdir)/utf32_be.c $(encdir)/utf32_le.c \ + $(encdir)/euc_jp.c $(encdir)/sjis.c $(encdir)/iso8859_1.c \ + $(encdir)/iso8859_2.c $(encdir)/iso8859_3.c \ + $(encdir)/iso8859_4.c $(encdir)/iso8859_5.c \ + $(encdir)/iso8859_6.c $(encdir)/iso8859_7.c \ + $(encdir)/iso8859_8.c $(encdir)/iso8859_9.c \ + $(encdir)/iso8859_10.c $(encdir)/iso8859_11.c \ + $(encdir)/iso8859_13.c $(encdir)/iso8859_14.c \ + $(encdir)/iso8859_15.c $(encdir)/iso8859_16.c \ + $(encdir)/euc_tw.c $(encdir)/euc_kr.c $(encdir)/big5.c \ + $(encdir)/gb18030.c $(encdir)/koi8_r.c $(encdir)/cp1251.c + +libonig_la_LDFLAGS = -version-info $(LTVERSION) + +EXTRA_DIST = HISTORY README.ja index.html index_ja.html \ + doc/API doc/API.ja doc/RE doc/RE.ja doc/FAQ doc/FAQ.ja \ + win32/Makefile win32/config.h win32/testc.c \ + $(encdir)/koi8.c $(encdir)/mktable.c \ + $(sampledir)/encode.c $(sampledir)/listcap.c $(sampledir)/names.c \ + $(sampledir)/posix.c $(sampledir)/simple.c $(sampledir)/sql.c \ + $(sampledir)/syntax.c + +bin_SCRIPTS = onig-config + +onig-config: onig-config.in + +dll: + $(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \ + $(LIBS) + strip libonig.dll + +# Ruby TEST +rtest: + $(RUBYDIR)/ruby -w -Ke $(srcdir)/test.rb + +# character-types-table source generator +mktable: $(encdir)/mktable.c $(srcdir)/regenc.h + $(CC) -I$(top_srcdir) -o mktable $(encdir)/mktable.c + + +# TEST +TESTS = testc testp testcu + +check_PROGRAMS = testc testp testcu + +atest: testc testp testcu + @echo "[Oniguruma API, ASCII/EUC-JP check]" + @$(top_builddir)/testc | grep RESULT + @echo "[POSIX API, ASCII/EUC-JP check]" + @$(top_builddir)/testp | grep RESULT + @echo "[Oniguruma API, UTF-16 check]" + @$(top_builddir)/testcu | grep RESULT + +testc_SOURCES = testc.c +testc_LDADD = libonig.la + +testp_SOURCES = testc.c +testp_LDADD = libonig.la +testp_CFLAGS = -DPOSIX_TEST + +testcu_SOURCES = testu.c +testcu_LDADD = libonig.la + + +#testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb +# ruby -Ke $(srcdir)/testconv.rb < $(srcdir)/test.rb > $@ + +#testu.c: $(srcdir)/test.rb $(srcdir)/testconvu.rb +# ruby -Ke $(srcdir)/testconvu.rb $(srcdir)/test.rb > $@ + +#win32/testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb +# ruby -Ke $(srcdir)/testconv.rb -win < $(srcdir)/test.rb | nkf -cs > $@ + +## END OF FILE diff --git a/oniguruma/Makefile.in b/oniguruma/Makefile.in new file mode 100644 index 0000000..1c488d7 --- /dev/null +++ b/oniguruma/Makefile.in @@ -0,0 +1,1238 @@ +# Makefile.in generated by automake 1.10 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +TESTS = testc$(EXEEXT) testp$(EXEEXT) testcu$(EXEEXT) +check_PROGRAMS = testc$(EXEEXT) testp$(EXEEXT) testcu$(EXEEXT) +subdir = . +DIST_COMMON = README $(am__configure_deps) $(include_HEADERS) \ + $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ + $(srcdir)/config.h.in $(srcdir)/onig-config.in \ + $(top_srcdir)/configure AUTHORS COPYING INSTALL config.guess \ + config.sub depcomp install-sh ltmain.sh missing +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.in +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = config.h +CONFIG_CLEAN_FILES = onig-config +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \ + "$(DESTDIR)$(includedir)" +libLTLIBRARIES_INSTALL = $(INSTALL) +LTLIBRARIES = $(lib_LTLIBRARIES) +libonig_la_LIBADD = +am_libonig_la_OBJECTS = regerror.lo regparse.lo regext.lo regcomp.lo \ + regexec.lo reggnu.lo regenc.lo regsyntax.lo regtrav.lo \ + regversion.lo st.lo regposix.lo regposerr.lo unicode.lo \ + ascii.lo utf8.lo utf16_be.lo utf16_le.lo utf32_be.lo \ + utf32_le.lo euc_jp.lo sjis.lo iso8859_1.lo iso8859_2.lo \ + iso8859_3.lo iso8859_4.lo iso8859_5.lo iso8859_6.lo \ + iso8859_7.lo iso8859_8.lo iso8859_9.lo iso8859_10.lo \ + iso8859_11.lo iso8859_13.lo iso8859_14.lo iso8859_15.lo \ + iso8859_16.lo euc_tw.lo euc_kr.lo big5.lo gb18030.lo koi8_r.lo \ + cp1251.lo +libonig_la_OBJECTS = $(am_libonig_la_OBJECTS) +libonig_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libonig_la_LDFLAGS) $(LDFLAGS) -o $@ +am_testc_OBJECTS = testc.$(OBJEXT) +testc_OBJECTS = $(am_testc_OBJECTS) +testc_DEPENDENCIES = libonig.la +am_testcu_OBJECTS = testu.$(OBJEXT) +testcu_OBJECTS = $(am_testcu_OBJECTS) +testcu_DEPENDENCIES = libonig.la +am_testp_OBJECTS = testp-testc.$(OBJEXT) +testp_OBJECTS = $(am_testp_OBJECTS) +testp_DEPENDENCIES = libonig.la +testp_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(testp_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +binSCRIPT_INSTALL = $(INSTALL_SCRIPT) +SCRIPTS = $(bin_SCRIPTS) +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(libonig_la_SOURCES) $(testc_SOURCES) $(testcu_SOURCES) \ + $(testp_SOURCES) +DIST_SOURCES = $(libonig_la_SOURCES) $(testc_SOURCES) \ + $(testcu_SOURCES) $(testp_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +includeHEADERS_INSTALL = $(INSTALL_HEADER) +HEADERS = $(include_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + { test ! -d $(distdir) \ + || { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -fr $(distdir); }; } +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +distuninstallcheck_listfiles = find . -type f -print +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LTVERSION = @LTVERSION@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +RUBYDIR = @RUBYDIR@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STATISTICS = @STATISTICS@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +encdir = $(top_srcdir)/enc +sampledir = $(top_srcdir)/sample +libname = libonig.la + +#AM_CFLAGS = -DNOT_RUBY +AM_CFLAGS = +INCLUDES = -I$(top_srcdir) -I$(includedir) +SUBDIRS = . sample +include_HEADERS = oniguruma.h oniggnu.h onigposix.h +lib_LTLIBRARIES = $(libname) +libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ + regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \ + regenc.c regsyntax.c regtrav.c regversion.c st.c \ + regposix.c regposerr.c \ + $(encdir)/unicode.c $(encdir)/ascii.c $(encdir)/utf8.c \ + $(encdir)/utf16_be.c $(encdir)/utf16_le.c \ + $(encdir)/utf32_be.c $(encdir)/utf32_le.c \ + $(encdir)/euc_jp.c $(encdir)/sjis.c $(encdir)/iso8859_1.c \ + $(encdir)/iso8859_2.c $(encdir)/iso8859_3.c \ + $(encdir)/iso8859_4.c $(encdir)/iso8859_5.c \ + $(encdir)/iso8859_6.c $(encdir)/iso8859_7.c \ + $(encdir)/iso8859_8.c $(encdir)/iso8859_9.c \ + $(encdir)/iso8859_10.c $(encdir)/iso8859_11.c \ + $(encdir)/iso8859_13.c $(encdir)/iso8859_14.c \ + $(encdir)/iso8859_15.c $(encdir)/iso8859_16.c \ + $(encdir)/euc_tw.c $(encdir)/euc_kr.c $(encdir)/big5.c \ + $(encdir)/gb18030.c $(encdir)/koi8_r.c $(encdir)/cp1251.c + +libonig_la_LDFLAGS = -version-info $(LTVERSION) +EXTRA_DIST = HISTORY README.ja index.html index_ja.html \ + doc/API doc/API.ja doc/RE doc/RE.ja doc/FAQ doc/FAQ.ja \ + win32/Makefile win32/config.h win32/testc.c \ + $(encdir)/koi8.c $(encdir)/mktable.c \ + $(sampledir)/encode.c $(sampledir)/listcap.c $(sampledir)/names.c \ + $(sampledir)/posix.c $(sampledir)/simple.c $(sampledir)/sql.c \ + $(sampledir)/syntax.c + +bin_SCRIPTS = onig-config +testc_SOURCES = testc.c +testc_LDADD = libonig.la +testp_SOURCES = testc.c +testp_LDADD = libonig.la +testp_CFLAGS = -DPOSIX_TEST +testcu_SOURCES = testu.c +testcu_LDADD = libonig.la +all: config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +am--refresh: + @: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --foreign '; \ + cd $(srcdir) && $(AUTOMAKE) --foreign \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) + +config.h: stamp-h1 + @if test ! -f $@; then \ + rm -f stamp-h1; \ + $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \ + else :; fi + +stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status + @rm -f stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status config.h +$(srcdir)/config.h.in: $(am__configure_deps) + cd $(top_srcdir) && $(AUTOHEADER) + rm -f stamp-h1 + touch $@ + +distclean-hdr: + -rm -f config.h stamp-h1 +onig-config: $(top_builddir)/config.status $(srcdir)/onig-config.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + if test -f $$p; then \ + f=$(am__strip_dir) \ + echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(libdir)/$$f"; \ + else :; fi; \ + done + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + p=$(am__strip_dir) \ + echo " $(LIBTOOL) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$p'"; \ + $(LIBTOOL) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$p"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ + dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ + test "$$dir" != "$$p" || dir=.; \ + echo "rm -f \"$${dir}/so_locations\""; \ + rm -f "$${dir}/so_locations"; \ + done +libonig.la: $(libonig_la_OBJECTS) $(libonig_la_DEPENDENCIES) + $(libonig_la_LINK) -rpath $(libdir) $(libonig_la_OBJECTS) $(libonig_la_LIBADD) $(LIBS) + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +testc$(EXEEXT): $(testc_OBJECTS) $(testc_DEPENDENCIES) + @rm -f testc$(EXEEXT) + $(LINK) $(testc_OBJECTS) $(testc_LDADD) $(LIBS) +testcu$(EXEEXT): $(testcu_OBJECTS) $(testcu_DEPENDENCIES) + @rm -f testcu$(EXEEXT) + $(LINK) $(testcu_OBJECTS) $(testcu_LDADD) $(LIBS) +testp$(EXEEXT): $(testp_OBJECTS) $(testp_DEPENDENCIES) + @rm -f testp$(EXEEXT) + $(testp_LINK) $(testp_OBJECTS) $(testp_LDADD) $(LIBS) +install-binSCRIPTS: $(bin_SCRIPTS) + @$(NORMAL_INSTALL) + test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)" + @list='$(bin_SCRIPTS)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f $$d$$p; then \ + f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \ + echo " $(binSCRIPT_INSTALL) '$$d$$p' '$(DESTDIR)$(bindir)/$$f'"; \ + $(binSCRIPT_INSTALL) "$$d$$p" "$(DESTDIR)$(bindir)/$$f"; \ + else :; fi; \ + done + +uninstall-binSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(bin_SCRIPTS)'; for p in $$list; do \ + f=`echo "$$p" | sed 's|^.*/||;$(transform)'`; \ + echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \ + rm -f "$(DESTDIR)$(bindir)/$$f"; \ + done + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ascii.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/big5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cp1251.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/euc_jp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/euc_kr.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/euc_tw.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gb18030.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_11.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_13.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_14.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_15.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_3.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_5.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_6.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_7.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iso8859_9.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/koi8_r.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regcomp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regenc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regerror.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regexec.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regext.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reggnu.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regparse.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regposerr.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regposix.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regsyntax.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regtrav.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regversion.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sjis.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/st.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testc.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testp-testc.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testu.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unicode.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf16_be.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf16_le.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf32_be.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf32_le.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf8.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +unicode.lo: $(encdir)/unicode.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unicode.lo -MD -MP -MF $(DEPDIR)/unicode.Tpo -c -o unicode.lo `test -f '$(encdir)/unicode.c' || echo '$(srcdir)/'`$(encdir)/unicode.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/unicode.Tpo $(DEPDIR)/unicode.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/unicode.c' object='unicode.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unicode.lo `test -f '$(encdir)/unicode.c' || echo '$(srcdir)/'`$(encdir)/unicode.c + +ascii.lo: $(encdir)/ascii.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ascii.lo -MD -MP -MF $(DEPDIR)/ascii.Tpo -c -o ascii.lo `test -f '$(encdir)/ascii.c' || echo '$(srcdir)/'`$(encdir)/ascii.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/ascii.Tpo $(DEPDIR)/ascii.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/ascii.c' object='ascii.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ascii.lo `test -f '$(encdir)/ascii.c' || echo '$(srcdir)/'`$(encdir)/ascii.c + +utf8.lo: $(encdir)/utf8.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utf8.lo -MD -MP -MF $(DEPDIR)/utf8.Tpo -c -o utf8.lo `test -f '$(encdir)/utf8.c' || echo '$(srcdir)/'`$(encdir)/utf8.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/utf8.Tpo $(DEPDIR)/utf8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/utf8.c' object='utf8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utf8.lo `test -f '$(encdir)/utf8.c' || echo '$(srcdir)/'`$(encdir)/utf8.c + +utf16_be.lo: $(encdir)/utf16_be.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utf16_be.lo -MD -MP -MF $(DEPDIR)/utf16_be.Tpo -c -o utf16_be.lo `test -f '$(encdir)/utf16_be.c' || echo '$(srcdir)/'`$(encdir)/utf16_be.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/utf16_be.Tpo $(DEPDIR)/utf16_be.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/utf16_be.c' object='utf16_be.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utf16_be.lo `test -f '$(encdir)/utf16_be.c' || echo '$(srcdir)/'`$(encdir)/utf16_be.c + +utf16_le.lo: $(encdir)/utf16_le.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utf16_le.lo -MD -MP -MF $(DEPDIR)/utf16_le.Tpo -c -o utf16_le.lo `test -f '$(encdir)/utf16_le.c' || echo '$(srcdir)/'`$(encdir)/utf16_le.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/utf16_le.Tpo $(DEPDIR)/utf16_le.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/utf16_le.c' object='utf16_le.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utf16_le.lo `test -f '$(encdir)/utf16_le.c' || echo '$(srcdir)/'`$(encdir)/utf16_le.c + +utf32_be.lo: $(encdir)/utf32_be.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utf32_be.lo -MD -MP -MF $(DEPDIR)/utf32_be.Tpo -c -o utf32_be.lo `test -f '$(encdir)/utf32_be.c' || echo '$(srcdir)/'`$(encdir)/utf32_be.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/utf32_be.Tpo $(DEPDIR)/utf32_be.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/utf32_be.c' object='utf32_be.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utf32_be.lo `test -f '$(encdir)/utf32_be.c' || echo '$(srcdir)/'`$(encdir)/utf32_be.c + +utf32_le.lo: $(encdir)/utf32_le.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utf32_le.lo -MD -MP -MF $(DEPDIR)/utf32_le.Tpo -c -o utf32_le.lo `test -f '$(encdir)/utf32_le.c' || echo '$(srcdir)/'`$(encdir)/utf32_le.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/utf32_le.Tpo $(DEPDIR)/utf32_le.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/utf32_le.c' object='utf32_le.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utf32_le.lo `test -f '$(encdir)/utf32_le.c' || echo '$(srcdir)/'`$(encdir)/utf32_le.c + +euc_jp.lo: $(encdir)/euc_jp.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT euc_jp.lo -MD -MP -MF $(DEPDIR)/euc_jp.Tpo -c -o euc_jp.lo `test -f '$(encdir)/euc_jp.c' || echo '$(srcdir)/'`$(encdir)/euc_jp.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/euc_jp.Tpo $(DEPDIR)/euc_jp.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/euc_jp.c' object='euc_jp.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o euc_jp.lo `test -f '$(encdir)/euc_jp.c' || echo '$(srcdir)/'`$(encdir)/euc_jp.c + +sjis.lo: $(encdir)/sjis.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sjis.lo -MD -MP -MF $(DEPDIR)/sjis.Tpo -c -o sjis.lo `test -f '$(encdir)/sjis.c' || echo '$(srcdir)/'`$(encdir)/sjis.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/sjis.Tpo $(DEPDIR)/sjis.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/sjis.c' object='sjis.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sjis.lo `test -f '$(encdir)/sjis.c' || echo '$(srcdir)/'`$(encdir)/sjis.c + +iso8859_1.lo: $(encdir)/iso8859_1.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_1.lo -MD -MP -MF $(DEPDIR)/iso8859_1.Tpo -c -o iso8859_1.lo `test -f '$(encdir)/iso8859_1.c' || echo '$(srcdir)/'`$(encdir)/iso8859_1.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_1.Tpo $(DEPDIR)/iso8859_1.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_1.c' object='iso8859_1.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_1.lo `test -f '$(encdir)/iso8859_1.c' || echo '$(srcdir)/'`$(encdir)/iso8859_1.c + +iso8859_2.lo: $(encdir)/iso8859_2.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_2.lo -MD -MP -MF $(DEPDIR)/iso8859_2.Tpo -c -o iso8859_2.lo `test -f '$(encdir)/iso8859_2.c' || echo '$(srcdir)/'`$(encdir)/iso8859_2.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_2.Tpo $(DEPDIR)/iso8859_2.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_2.c' object='iso8859_2.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_2.lo `test -f '$(encdir)/iso8859_2.c' || echo '$(srcdir)/'`$(encdir)/iso8859_2.c + +iso8859_3.lo: $(encdir)/iso8859_3.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_3.lo -MD -MP -MF $(DEPDIR)/iso8859_3.Tpo -c -o iso8859_3.lo `test -f '$(encdir)/iso8859_3.c' || echo '$(srcdir)/'`$(encdir)/iso8859_3.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_3.Tpo $(DEPDIR)/iso8859_3.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_3.c' object='iso8859_3.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_3.lo `test -f '$(encdir)/iso8859_3.c' || echo '$(srcdir)/'`$(encdir)/iso8859_3.c + +iso8859_4.lo: $(encdir)/iso8859_4.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_4.lo -MD -MP -MF $(DEPDIR)/iso8859_4.Tpo -c -o iso8859_4.lo `test -f '$(encdir)/iso8859_4.c' || echo '$(srcdir)/'`$(encdir)/iso8859_4.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_4.Tpo $(DEPDIR)/iso8859_4.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_4.c' object='iso8859_4.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_4.lo `test -f '$(encdir)/iso8859_4.c' || echo '$(srcdir)/'`$(encdir)/iso8859_4.c + +iso8859_5.lo: $(encdir)/iso8859_5.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_5.lo -MD -MP -MF $(DEPDIR)/iso8859_5.Tpo -c -o iso8859_5.lo `test -f '$(encdir)/iso8859_5.c' || echo '$(srcdir)/'`$(encdir)/iso8859_5.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_5.Tpo $(DEPDIR)/iso8859_5.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_5.c' object='iso8859_5.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_5.lo `test -f '$(encdir)/iso8859_5.c' || echo '$(srcdir)/'`$(encdir)/iso8859_5.c + +iso8859_6.lo: $(encdir)/iso8859_6.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_6.lo -MD -MP -MF $(DEPDIR)/iso8859_6.Tpo -c -o iso8859_6.lo `test -f '$(encdir)/iso8859_6.c' || echo '$(srcdir)/'`$(encdir)/iso8859_6.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_6.Tpo $(DEPDIR)/iso8859_6.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_6.c' object='iso8859_6.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_6.lo `test -f '$(encdir)/iso8859_6.c' || echo '$(srcdir)/'`$(encdir)/iso8859_6.c + +iso8859_7.lo: $(encdir)/iso8859_7.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_7.lo -MD -MP -MF $(DEPDIR)/iso8859_7.Tpo -c -o iso8859_7.lo `test -f '$(encdir)/iso8859_7.c' || echo '$(srcdir)/'`$(encdir)/iso8859_7.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_7.Tpo $(DEPDIR)/iso8859_7.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_7.c' object='iso8859_7.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_7.lo `test -f '$(encdir)/iso8859_7.c' || echo '$(srcdir)/'`$(encdir)/iso8859_7.c + +iso8859_8.lo: $(encdir)/iso8859_8.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_8.lo -MD -MP -MF $(DEPDIR)/iso8859_8.Tpo -c -o iso8859_8.lo `test -f '$(encdir)/iso8859_8.c' || echo '$(srcdir)/'`$(encdir)/iso8859_8.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_8.Tpo $(DEPDIR)/iso8859_8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_8.c' object='iso8859_8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_8.lo `test -f '$(encdir)/iso8859_8.c' || echo '$(srcdir)/'`$(encdir)/iso8859_8.c + +iso8859_9.lo: $(encdir)/iso8859_9.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_9.lo -MD -MP -MF $(DEPDIR)/iso8859_9.Tpo -c -o iso8859_9.lo `test -f '$(encdir)/iso8859_9.c' || echo '$(srcdir)/'`$(encdir)/iso8859_9.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_9.Tpo $(DEPDIR)/iso8859_9.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_9.c' object='iso8859_9.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_9.lo `test -f '$(encdir)/iso8859_9.c' || echo '$(srcdir)/'`$(encdir)/iso8859_9.c + +iso8859_10.lo: $(encdir)/iso8859_10.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_10.lo -MD -MP -MF $(DEPDIR)/iso8859_10.Tpo -c -o iso8859_10.lo `test -f '$(encdir)/iso8859_10.c' || echo '$(srcdir)/'`$(encdir)/iso8859_10.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_10.Tpo $(DEPDIR)/iso8859_10.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_10.c' object='iso8859_10.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_10.lo `test -f '$(encdir)/iso8859_10.c' || echo '$(srcdir)/'`$(encdir)/iso8859_10.c + +iso8859_11.lo: $(encdir)/iso8859_11.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_11.lo -MD -MP -MF $(DEPDIR)/iso8859_11.Tpo -c -o iso8859_11.lo `test -f '$(encdir)/iso8859_11.c' || echo '$(srcdir)/'`$(encdir)/iso8859_11.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_11.Tpo $(DEPDIR)/iso8859_11.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_11.c' object='iso8859_11.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_11.lo `test -f '$(encdir)/iso8859_11.c' || echo '$(srcdir)/'`$(encdir)/iso8859_11.c + +iso8859_13.lo: $(encdir)/iso8859_13.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_13.lo -MD -MP -MF $(DEPDIR)/iso8859_13.Tpo -c -o iso8859_13.lo `test -f '$(encdir)/iso8859_13.c' || echo '$(srcdir)/'`$(encdir)/iso8859_13.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_13.Tpo $(DEPDIR)/iso8859_13.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_13.c' object='iso8859_13.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_13.lo `test -f '$(encdir)/iso8859_13.c' || echo '$(srcdir)/'`$(encdir)/iso8859_13.c + +iso8859_14.lo: $(encdir)/iso8859_14.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_14.lo -MD -MP -MF $(DEPDIR)/iso8859_14.Tpo -c -o iso8859_14.lo `test -f '$(encdir)/iso8859_14.c' || echo '$(srcdir)/'`$(encdir)/iso8859_14.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_14.Tpo $(DEPDIR)/iso8859_14.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_14.c' object='iso8859_14.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_14.lo `test -f '$(encdir)/iso8859_14.c' || echo '$(srcdir)/'`$(encdir)/iso8859_14.c + +iso8859_15.lo: $(encdir)/iso8859_15.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_15.lo -MD -MP -MF $(DEPDIR)/iso8859_15.Tpo -c -o iso8859_15.lo `test -f '$(encdir)/iso8859_15.c' || echo '$(srcdir)/'`$(encdir)/iso8859_15.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_15.Tpo $(DEPDIR)/iso8859_15.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_15.c' object='iso8859_15.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_15.lo `test -f '$(encdir)/iso8859_15.c' || echo '$(srcdir)/'`$(encdir)/iso8859_15.c + +iso8859_16.lo: $(encdir)/iso8859_16.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iso8859_16.lo -MD -MP -MF $(DEPDIR)/iso8859_16.Tpo -c -o iso8859_16.lo `test -f '$(encdir)/iso8859_16.c' || echo '$(srcdir)/'`$(encdir)/iso8859_16.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/iso8859_16.Tpo $(DEPDIR)/iso8859_16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/iso8859_16.c' object='iso8859_16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o iso8859_16.lo `test -f '$(encdir)/iso8859_16.c' || echo '$(srcdir)/'`$(encdir)/iso8859_16.c + +euc_tw.lo: $(encdir)/euc_tw.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT euc_tw.lo -MD -MP -MF $(DEPDIR)/euc_tw.Tpo -c -o euc_tw.lo `test -f '$(encdir)/euc_tw.c' || echo '$(srcdir)/'`$(encdir)/euc_tw.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/euc_tw.Tpo $(DEPDIR)/euc_tw.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/euc_tw.c' object='euc_tw.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o euc_tw.lo `test -f '$(encdir)/euc_tw.c' || echo '$(srcdir)/'`$(encdir)/euc_tw.c + +euc_kr.lo: $(encdir)/euc_kr.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT euc_kr.lo -MD -MP -MF $(DEPDIR)/euc_kr.Tpo -c -o euc_kr.lo `test -f '$(encdir)/euc_kr.c' || echo '$(srcdir)/'`$(encdir)/euc_kr.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/euc_kr.Tpo $(DEPDIR)/euc_kr.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/euc_kr.c' object='euc_kr.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o euc_kr.lo `test -f '$(encdir)/euc_kr.c' || echo '$(srcdir)/'`$(encdir)/euc_kr.c + +big5.lo: $(encdir)/big5.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT big5.lo -MD -MP -MF $(DEPDIR)/big5.Tpo -c -o big5.lo `test -f '$(encdir)/big5.c' || echo '$(srcdir)/'`$(encdir)/big5.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/big5.Tpo $(DEPDIR)/big5.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/big5.c' object='big5.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o big5.lo `test -f '$(encdir)/big5.c' || echo '$(srcdir)/'`$(encdir)/big5.c + +gb18030.lo: $(encdir)/gb18030.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT gb18030.lo -MD -MP -MF $(DEPDIR)/gb18030.Tpo -c -o gb18030.lo `test -f '$(encdir)/gb18030.c' || echo '$(srcdir)/'`$(encdir)/gb18030.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/gb18030.Tpo $(DEPDIR)/gb18030.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/gb18030.c' object='gb18030.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o gb18030.lo `test -f '$(encdir)/gb18030.c' || echo '$(srcdir)/'`$(encdir)/gb18030.c + +koi8_r.lo: $(encdir)/koi8_r.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT koi8_r.lo -MD -MP -MF $(DEPDIR)/koi8_r.Tpo -c -o koi8_r.lo `test -f '$(encdir)/koi8_r.c' || echo '$(srcdir)/'`$(encdir)/koi8_r.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/koi8_r.Tpo $(DEPDIR)/koi8_r.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/koi8_r.c' object='koi8_r.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o koi8_r.lo `test -f '$(encdir)/koi8_r.c' || echo '$(srcdir)/'`$(encdir)/koi8_r.c + +cp1251.lo: $(encdir)/cp1251.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cp1251.lo -MD -MP -MF $(DEPDIR)/cp1251.Tpo -c -o cp1251.lo `test -f '$(encdir)/cp1251.c' || echo '$(srcdir)/'`$(encdir)/cp1251.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/cp1251.Tpo $(DEPDIR)/cp1251.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$(encdir)/cp1251.c' object='cp1251.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cp1251.lo `test -f '$(encdir)/cp1251.c' || echo '$(srcdir)/'`$(encdir)/cp1251.c + +testp-testc.o: testc.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(testp_CFLAGS) $(CFLAGS) -MT testp-testc.o -MD -MP -MF $(DEPDIR)/testp-testc.Tpo -c -o testp-testc.o `test -f 'testc.c' || echo '$(srcdir)/'`testc.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/testp-testc.Tpo $(DEPDIR)/testp-testc.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='testc.c' object='testp-testc.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(testp_CFLAGS) $(CFLAGS) -c -o testp-testc.o `test -f 'testc.c' || echo '$(srcdir)/'`testc.c + +testp-testc.obj: testc.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(testp_CFLAGS) $(CFLAGS) -MT testp-testc.obj -MD -MP -MF $(DEPDIR)/testp-testc.Tpo -c -o testp-testc.obj `if test -f 'testc.c'; then $(CYGPATH_W) 'testc.c'; else $(CYGPATH_W) '$(srcdir)/testc.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/testp-testc.Tpo $(DEPDIR)/testp-testc.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='testc.c' object='testp-testc.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(testp_CFLAGS) $(CFLAGS) -c -o testp-testc.obj `if test -f 'testc.c'; then $(CYGPATH_W) 'testc.c'; else $(CYGPATH_W) '$(srcdir)/testc.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" + @list='$(include_HEADERS)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + f=$(am__strip_dir) \ + echo " $(includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(includedir)/$$f'"; \ + $(includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(includedir)/$$f"; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; for p in $$list; do \ + f=$(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(includedir)/$$f'"; \ + rm -f "$(DESTDIR)$(includedir)/$$f"; \ + done + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +check-TESTS: $(TESTS) + @failed=0; all=0; xfail=0; xpass=0; skip=0; ws='[ ]'; \ + srcdir=$(srcdir); export srcdir; \ + list=' $(TESTS) '; \ + if test -n "$$list"; then \ + for tst in $$list; do \ + if test -f ./$$tst; then dir=./; \ + elif test -f $$tst; then dir=; \ + else dir="$(srcdir)/"; fi; \ + if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *$$ws$$tst$$ws*) \ + xpass=`expr $$xpass + 1`; \ + failed=`expr $$failed + 1`; \ + echo "XPASS: $$tst"; \ + ;; \ + *) \ + echo "PASS: $$tst"; \ + ;; \ + esac; \ + elif test $$? -ne 77; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *$$ws$$tst$$ws*) \ + xfail=`expr $$xfail + 1`; \ + echo "XFAIL: $$tst"; \ + ;; \ + *) \ + failed=`expr $$failed + 1`; \ + echo "FAIL: $$tst"; \ + ;; \ + esac; \ + else \ + skip=`expr $$skip + 1`; \ + echo "SKIP: $$tst"; \ + fi; \ + done; \ + if test "$$failed" -eq 0; then \ + if test "$$xfail" -eq 0; then \ + banner="All $$all tests passed"; \ + else \ + banner="All $$all tests behaved as expected ($$xfail expected failures)"; \ + fi; \ + else \ + if test "$$xpass" -eq 0; then \ + banner="$$failed of $$all tests failed"; \ + else \ + banner="$$failed of $$all tests did not behave as expected ($$xpass unexpected passes)"; \ + fi; \ + fi; \ + dashes="$$banner"; \ + skipped=""; \ + if test "$$skip" -ne 0; then \ + skipped="($$skip tests were not run)"; \ + test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \ + dashes="$$skipped"; \ + fi; \ + report=""; \ + if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \ + report="Please report to $(PACKAGE_BUGREPORT)"; \ + test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \ + dashes="$$report"; \ + fi; \ + dashes=`echo "$$dashes" | sed s/./=/g`; \ + echo "$$dashes"; \ + echo "$$banner"; \ + test -z "$$skipped" || echo "$$skipped"; \ + test -z "$$report" || echo "$$report"; \ + echo "$$dashes"; \ + test "$$failed" -eq 0; \ + else :; fi + +distdir: $(DISTFILES) + $(am__remove_distdir) + test -d $(distdir) || mkdir $(distdir) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + distdir=`$(am__cd) $(distdir) && pwd`; \ + top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ + (cd $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$top_distdir" \ + distdir="$$distdir/$$subdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + distdir) \ + || exit 1; \ + fi; \ + done + -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r $(distdir) +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2 + $(am__remove_distdir) + +dist-tarZ: distdir + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__remove_distdir) + +dist-shar: distdir + shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz + $(am__remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__remove_distdir) + +dist dist-all: distdir + tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz + $(am__remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir); chmod a+w $(distdir) + mkdir $(distdir)/_build + mkdir $(distdir)/_inst + chmod a-w $(distdir) + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && cd $(distdir)/_build \ + && ../configure --srcdir=.. --prefix="$$dc_install_base" \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck + $(am__remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @cd $(distuninstallcheck_dir) \ + && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-recursive +all-am: Makefile $(LTLIBRARIES) $(SCRIPTS) $(HEADERS) config.h +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-checkPROGRAMS clean-generic clean-libLTLIBRARIES \ + clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-hdr distclean-libtool distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +info: info-recursive + +info-am: + +install-data-am: install-includeHEADERS + +install-dvi: install-dvi-recursive + +install-exec-am: install-binSCRIPTS install-libLTLIBRARIES + +install-html: install-html-recursive + +install-info: install-info-recursive + +install-man: + +install-pdf: install-pdf-recursive + +install-ps: install-ps-recursive + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-binSCRIPTS uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \ + install-strip + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am am--refresh check check-TESTS check-am clean \ + clean-checkPROGRAMS clean-generic clean-libLTLIBRARIES \ + clean-libtool ctags ctags-recursive dist dist-all dist-bzip2 \ + dist-gzip dist-shar dist-tarZ dist-zip distcheck distclean \ + distclean-compile distclean-generic distclean-hdr \ + distclean-libtool distclean-tags distcleancheck distdir \ + distuninstallcheck dvi dvi-am html html-am info info-am \ + install install-am install-binSCRIPTS install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-recursive uninstall uninstall-am \ + uninstall-binSCRIPTS uninstall-includeHEADERS \ + uninstall-libLTLIBRARIES + + +onig-config: onig-config.in + +dll: + $(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \ + $(LIBS) + strip libonig.dll + +# Ruby TEST +rtest: + $(RUBYDIR)/ruby -w -Ke $(srcdir)/test.rb + +# character-types-table source generator +mktable: $(encdir)/mktable.c $(srcdir)/regenc.h + $(CC) -I$(top_srcdir) -o mktable $(encdir)/mktable.c + +atest: testc testp testcu + @echo "[Oniguruma API, ASCII/EUC-JP check]" + @$(top_builddir)/testc | grep RESULT + @echo "[POSIX API, ASCII/EUC-JP check]" + @$(top_builddir)/testp | grep RESULT + @echo "[Oniguruma API, UTF-16 check]" + @$(top_builddir)/testcu | grep RESULT + +#testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb +# ruby -Ke $(srcdir)/testconv.rb < $(srcdir)/test.rb > $@ + +#testu.c: $(srcdir)/test.rb $(srcdir)/testconvu.rb +# ruby -Ke $(srcdir)/testconvu.rb $(srcdir)/test.rb > $@ + +#win32/testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb +# ruby -Ke $(srcdir)/testconv.rb -win < $(srcdir)/test.rb | nkf -cs > $@ +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/oniguruma/README b/oniguruma/README new file mode 100644 index 0000000..8390afd --- /dev/null +++ b/oniguruma/README @@ -0,0 +1,189 @@ +README 2007/05/31 + +Oniguruma ---- (C) K.Kosako + +http://www.geocities.jp/kosako3/oniguruma/ + +Oniguruma is a regular expressions library. +The characteristics of this library is that different character encoding +for every regular expression object can be specified. + +Supported character encodings: + + ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, + EUC-JP, EUC-TW, EUC-KR, EUC-CN, + Shift_JIS, Big5, GB18030, KOI8-R, CP1251, + ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, + ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, + ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 + +* GB18030: contributed by KUBO Takehiro +* CP1251: contributed by Byte +------------------------------------------------------------ + +License + + BSD license. + + +Install + + Case 1: Unix and Cygwin platform + + 1. ./configure + 2. make + 3. make install + + * uninstall + + make uninstall + + * test (ASCII/EUC-JP) + + make atest + + * configuration check + + onig-config --cflags + onig-config --libs + onig-config --prefix + onig-config --exec-prefix + + + + Case 2: Win32 platform (VC++) + + 1. copy win32\Makefile Makefile + 2. copy win32\config.h config.h + 3. nmake + + onig_s.lib: static link library + onig.dll: dynamic link library + + * test (ASCII/Shift_JIS) + 4. copy win32\testc.c testc.c + 5. nmake ctest + + + +Regular Expressions + + See doc/RE (or doc/RE.ja for Japanese). + + +Usage + + Include oniguruma.h in your program. (Oniguruma API) + See doc/API for Oniguruma API. + + If you want to disable UChar type (== unsigned char) definition + in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then + include oniguruma.h. + + If you want to disable regex_t type definition in oniguruma.h, + define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h. + + Example of the compiling/linking command line in Unix or Cygwin, + (prefix == /usr/local case) + + cc sample.c -L/usr/local/lib -lonig + + + If you want to use static link library(onig_s.lib) in Win32, + add option -DONIG_EXTERN=extern to C compiler. + + + +Sample Programs + + sample/simple.c example of the minimum (Oniguruma API) + sample/names.c example of the named group callback. + sample/encode.c example of some encodings. + sample/listcap.c example of the capture history. + sample/posix.c POSIX API sample. + sample/sql.c example of the variable meta characters. + (SQL-like pattern matching) + +Test Programs + sample/syntax.c Perl, Java and ASIS syntax test. + sample/crnl.c --enable-crnl-as-line-terminator test + + +Source Files + + oniguruma.h Oniguruma API header file. (public) + onig-config.in configuration check program template. + + regenc.h character encodings framework header file. + regint.h internal definitions + regparse.h internal definitions for regparse.c and regcomp.c + regcomp.c compiling and optimization functions + regenc.c character encodings framework. + regerror.c error message function + regext.c extended API functions. (deluxe version API) + regexec.c search and match functions + regparse.c parsing functions. + regsyntax.c pattern syntax functions and built-in syntax definitions. + regtrav.c capture history tree data traverse functions. + regversion.c version info function. + st.h hash table functions header file + st.c hash table functions + + oniggnu.h GNU regex API header file. (public) + reggnu.c GNU regex API functions + + onigposix.h POSIX API header file. (public) + regposerr.c POSIX error message function. + regposix.c POSIX API functions. + + enc/mktable.c character type table generator. + enc/ascii.c ASCII encoding. + enc/euc_jp.c EUC-JP encoding. + enc/euc_tw.c EUC-TW encoding. + enc/euc_kr.c EUC-KR, EUC-CN encoding. + enc/sjis.c Shift_JIS encoding. + enc/big5.c Big5 encoding. + enc/gb18030.c GB18030 encoding. + enc/koi8.c KOI8 encoding. + enc/koi8_r.c KOI8-R encoding. + enc/cp1251.c CP1251 encoding. + enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1) + enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2) + enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3) + enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4) + enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic) + enc/iso8859_6.c ISO-8859-6 encoding. (Arabic) + enc/iso8859_7.c ISO-8859-7 encoding. (Greek) + enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew) + enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish) + enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic) + enc/iso8859_11.c ISO-8859-11 encoding. (Thai) + enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim) + enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic) + enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro) + enc/iso8859_16.c ISO-8859-16 encoding. + (Latin-10 or South-Eastern European with Euro) + enc/utf8.c UTF-8 encoding. + enc/utf16_be.c UTF-16BE encoding. + enc/utf16_le.c UTF-16LE encoding. + enc/utf32_be.c UTF-32BE encoding. + enc/utf32_le.c UTF-32LE encoding. + enc/unicode.c Unicode information data. + + win32/Makefile Makefile for Win32 (VC++) + win32/config.h config.h for Win32 + + + +ToDo + + ? case fold flag: Katakana <-> Hiragana. + ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z) + ?? \X (== \PM\pM*) + ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS. + ?? transmission stopper. (return ONIG_STOP from match_at()) + +and I'm thankful to Akinori MUSHA. + + +Mail Address: K.Kosako diff --git a/oniguruma/README.ja b/oniguruma/README.ja new file mode 100644 index 0000000..b14822c --- /dev/null +++ b/oniguruma/README.ja @@ -0,0 +1,195 @@ +README.ja 2007/05/31 + +オエシヨ ---- (C) K.Kosako + +http://www.geocities.jp/kosako3/oniguruma/ + +オエシヨ、マタオオャノスクス・鬣、・ヨ・鬣熙ヌ、「、。」 +、ウ、ホ・鬣、・ヨ・鬣熙ホニテトケ、マ。「、ス、、セ、、ホタオオャノスクス・ェ・ヨ・ク・ァ・ッ・ネ、エ、ネ、ヒ +ハクサ・ィ・・ウ。シ・ヌ・」・・ー、サリト熙ヌ、ュ、、ウ、ネ、ヌ、「、。」 + +・オ・ン。シ・ネ、キ、ニ、、、ハクサ・ィ・・ウ。シ・ヌ・」・・ー: + + ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, + EUC-JP, EUC-TW, EUC-KR, EUC-CN, + Shift_JIS, Big5, GB18030, KOI8-R, CP1251, + ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, + ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, + ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 + +* GB18030: オラハンキヘホサ眛カ。 +* CP1251: Byteサ眛カ。 +------------------------------------------------------------ + +・鬣、・サ・・ケ + + BSD・鬣、・サ・・ケ、ヒスセ、ヲ。」 + + +・、・・ケ・ネ。シ・ + + ・ア。シ・ケ」ア: Unix、ネCygwinエトカュ + + 1. ./configure + 2. make + 3. make install + + ・「・・、・・ケ・ネ。シ・ + + make uninstall + + ニーコ・ニ・ケ・ネ (ASCII/EUC-JP) + + make atest + + + ケスタョウホヌァ + + onig-config --cflags + onig-config --libs + onig-config --prefix + onig-config --exec-prefix + + + + ・ア。シ・ケ」イ: Win32(VC++)エトカュ + + 1. copy win32\Makefile Makefile + 2. copy win32\config.h config.h + 3. nmake + + onig_s.lib: static link library + onig.dll: dynamic link library + + * ニーコ・ニ・ケ・ネ (ASCII/Shift_JIS) + 4. copy win32\testc.c testc.c + 5. nmake ctest + + + +タオオャノスクス + + doc/RE.ja、サイセネ + + +サネヘムハヒ。 + + サネヘム、ケ、・ラ・・ー・鬣爨ヌ。「oniguruma.h、・、・・ッ・。シ・ノ、ケ、(Oniguruma API、ホセケ)。」 + Oniguruma API、ヒ、ト、、、ニ、マ。「doc/API.ja、サイセネ。」 + + oniguruma.h、ヌトオチ、オ、、ニ、、、キソフセUChar(== unsigned char)、フオク、ヒ、キ、ソ、、セケ + 、ヒ、マ。「ONIG_ESCAPE_UCHAR_COLLISION、define、キ、ニ、ォ、駮niguruma.h、・、・・ッ・。シ・ノ + 、ケ、、ウ、ネ。」、ウ、ホ、ネ、ュ、ヒ、マUChar、マトオチ、オ、、コ。「OnigUChar、ネ、、、ヲフセチー、ホトオチ、ホ、゚、ャ + ヘュク、ヒ、ハ、。」 + + oniguruma.h、ヌトオチ、オ、、ニ、、、キソフセregex_t、フオク、ヒ、キ、ソ、、セケ遉ヒ、マ。「 + ONIG_ESCAPE_REGEX_T_COLLISION、define、キ、ニ、ォ、駮niguruma.h、・、・・ッ・。シ・ノ + 、ケ、、ウ、ネ。」、ウ、ホ、ネ、ュ、ヒ、マregex_t、マトオチ、オ、、コ。「OnigRegexType, OnigRegex、ネ、、、ヲ + フセチー、ホトオチ、ホ、゚、ャヘュク、ヒ、ハ、。」 + + Unix/Cygwinセ螟ヌ・ウ・・ム・、・。「・・・ッ、ケ、セケ遉ホホ罍ァ + (prefix、ャ/usr/local、ホ、ネ、ュ) + cc sample.c -L/usr/local/lib -lonig + + GNU libtool、サネヘム、キ、ニ、、、、ホ、ヌ。「・ラ・鬣テ・ネ・ユ・ゥ。シ・爨ャカヲヘュ・鬣、・ヨ・鬣熙・オ・ン。シ・ネ、キ、ニ + 、、、、ミ。「サネヘム、ヌ、ュ、、隍ヲ、ヒ、ハ、テ、ニ、、、。」 + タナナェ・鬣、・ヨ・鬣熙ネカヲヘュ・鬣、・ヨ・鬣熙ホ、ノ、チ、鬢サネヘム、ケ、、ォ、サリト熙ケ、ハヒ。。「シツケヤサナタ、ヌ、ホ + エトカュタ゚トハヒ。、ヒ、ト、、、ニ、マ。「シォハャ、ヌトエ、ル、ニイシ、オ、、。」 + + + Win32、ヌ・ケ・ソ・ニ・」・テ・ッ・・・ッ・鬣、・ヨ・鬣(onig_s.lib)、・・・ッ、ケ、セケ遉ヒ、マ。「 + ・ウ・・ム・、・、ケ、、ネ、ュ、ヒ -DONIG_EXTERN=extern 、・ウ・・ム・、・ーソ、ヒトノイテ、ケ、、ウ、ネ。」 + + +サネヘムホ罕ラ・・ー・鬣 + + sample/simple.c コヌセョホ (Oniguruma API) + sample/names.c フセチーノユ、ュ・ー・。シ・ラ・ウ。シ・・ミ・テ・ッサネヘムホ + sample/encode.c エ、ト、ォ、ホハクサ・ィ・・ウ。シ・ヌ・」・・ーサネヘムホ + sample/listcap.c ハ盖ヘヘホオ。ヌス、ホサネヘムホ + sample/posix.c POSIX APIサネヘムホ + sample/sql.c イトハム・皈ソハクサオ。ヌスサネヘムホ (SQL-like ・ム・ソ。シ・) + +・ニ・ケ・ネ・ラ・・ー・鬣 + sample/syntax.c Perl。「Java。「ASISハクヒ。、ホ・ニ・ケ・ネ + sample/crnl.c --enable-crnl-as-line-terminator ・ニ・ケ・ネ + + +・ス。シ・ケ・ユ・。・、・ + + oniguruma.h オエシヨAPI・リ・テ・タ (クウォ) + onig-config.in onig-config・ラ・・ー・鬣 ・ニ・・ラ・。シ・ネ + + regenc.h ハクサ・ィ・・ウ。シ・ヌ・」・・ーマネチネ、゚・リ・テ・タ + regint.h ニ篷タクタ + regparse.h regparse.c、ネregcomp.c、ホ、ソ、皃ホニ篷タクタ + regcomp.c ・ウ・・ム・、・。「コヌナャイスエリソ + regenc.c ハクサ・ィ・・ウ。シ・ヌ・」・・ーマネチネ、゚ + regerror.c ・ィ・鬘シ・皈テ・サ。シ・クエリソ + regext.c ウネト・APIエリソ + regexec.c ク。コ。「セネケ邏リソ + regparse.c タオオャノスクス・ム・ソ。シ・イタマエリソ + regsyntax.c タオオャノスクス・ム・ソ。シ・ハクヒ。エリソ。「チネケ、゚ハクヒ。トオチ + regtrav.c ハ盖ヘヘホフレス茣エリソ + regversion.c ネヌセハエリソ + st.h ・マ・テ・キ・螂ニ。シ・ヨ・エリソタクタ + st.c ・マ・テ・キ・螂ニ。シ・ヨ・エリソ + + oniggnu.h GNU regex API・リ・テ・タ (クウォ) + reggnu.c GNU regex APIエリソ + + onigposix.h POSIX API・リ・テ・タ (クウォ) + regposerr.c POSIX API・ィ・鬘シ・皈テ・サ。シ・クエリソ + regposix.c POSIX APIエリソ + + enc/mktable.c ハクサ・ソ・、・ラ・ニ。シ・ヨ・タクタョ・ラ・・ー・鬣 + enc/ascii.c ASCII ・ィ・・ウ。シ・ヌ・」・・ー + enc/euc_jp.c EUC-JP ・ィ・・ウ。シ・ヌ・」・・ー + enc/euc_tw.c EUC-TW ・ィ・・ウ。シ・ヌ・」・・ー + enc/euc_kr.c EUC-KR, EUC-CN ・ィ・・ウ。シ・ヌ・」・・ー + enc/sjis.c Shift_JIS ・ィ・・ウ。シ・ヌ・」・・ー + enc/big5.c Big5 ・ィ・・ウ。シ・ヌ・」・・ー + enc/gb18030.c GB18030 ・ィ・・ウ。シ・ヌ・」・・ー + enc/koi8.c KOI8 ・ィ・・ウ。シ・ヌ・」・・ー + enc/koi8_r.c KOI8-R ・ィ・・ウ。シ・ヌ・」・・ー + enc/cp1251.c CP1251 ・ィ・・ウ。シ・ヌ・」・・ー + enc/iso8859_1.c ISO-8859-1 (Latin-1) + enc/iso8859_2.c ISO-8859-2 (Latin-2) + enc/iso8859_3.c ISO-8859-3 (Latin-3) + enc/iso8859_4.c ISO-8859-4 (Latin-4) + enc/iso8859_5.c ISO-8859-5 (Cyrillic) + enc/iso8859_6.c ISO-8859-6 (Arabic) + enc/iso8859_7.c ISO-8859-7 (Greek) + enc/iso8859_8.c ISO-8859-8 (Hebrew) + enc/iso8859_9.c ISO-8859-9 (Latin-5 、゙、ソ、マ Turkish) + enc/iso8859_10.c ISO-8859-10 (Latin-6 、゙、ソ、マ Nordic) + enc/iso8859_11.c ISO-8859-11 (Thai) + enc/iso8859_13.c ISO-8859-13 (Latin-7 、゙、ソ、マ Baltic Rim) + enc/iso8859_14.c ISO-8859-14 (Latin-8 、゙、ソ、マ Celtic) + enc/iso8859_15.c ISO-8859-15 (Latin-9 、゙、ソ、マ West European with Euro) + enc/iso8859_16.c ISO-8859-16 + (Latin-10 、゙、ソ、マ South-Eastern European with Euro) + enc/utf8.c UTF-8 ・ィ・・ウ。シ・ヌ・」・・ー + enc/utf16_be.c UTF-16BE ・ィ・・ウ。シ・ヌ・」・・ー + enc/utf16_le.c UTF-16LE ・ィ・・ウ。シ・ヌ・」・・ー + enc/utf32_be.c UTF-32BE ・ィ・・ウ。シ・ヌ・」・・ー + enc/utf32_le.c UTF-32LE ・ィ・・ウ。シ・ヌ・」・・ー + enc/unicode.c Unicodeセハ + + win32/Makefile Win32ヘム Makefile (for VC++) + win32/config.h Win32ヘム config.h + + + +サトキ + + ? case fold flag: Katakana <-> Hiragana + ? ONIG_OPTION_NOTBOS/NOTEOSトノイテ (\A, \z, \Z) + ?? \X (== \PM\pM*) + ?? ハクヒ。ヘラチヌ ONIG_SYN_CONTEXT_INDEP_ANCHORS、ホシツチ + ?? ク。コーフテヨーワニート莉゚ア鮟ササメ (match_at()、ォ、餔NIG_STOP、ハヨ、ケ) + +and I'm thankful to Akinori MUSHA. + + +・「・ノ・・ケ: K.Kosako diff --git a/oniguruma/config.h.in b/oniguruma/config.h.in new file mode 100644 index 0000000..1f44719 --- /dev/null +++ b/oniguruma/config.h.in @@ -0,0 +1,34 @@ +#cmakedefine CRAY_STACKSEG_END 1 +#cmakedefine C_ALLOCA 1 +#cmakedefine HAVE_ALLOCA 1 +#cmakedefine HAVE_ALLOCA_H 1 +#cmakedefine HAVE_DLFCN_H 1 +#cmakedefine HAVE_INTTYPES_H 1 +#cmakedefine HAVE_MEMORY_H 1 +#cmakedefine HAVE_PROTOTYPES 1 +#cmakedefine HAVE_STDARG_PROTOTYPES 1 +#cmakedefine HAVE_STDINT_H 1 +#cmakedefine HAVE_STDLIB_H 1 +#cmakedefine HAVE_STRINGS_H 1 +#cmakedefine HAVE_STRING_H 1 +#cmakedefine HAVE_SYS_STAT_H 1 +#cmakedefine HAVE_SYS_TIMES_H 1 +#cmakedefine HAVE_SYS_TIME_H 1 +#cmakedefine HAVE_SYS_TYPES_H 1 +#cmakedefine HAVE_UNISTD_H 1 +#cmakedefine LT_OBJDIR 1 +#cmakedefine PACKAGE 1 +#cmakedefine PACKAGE_BUGREPORT 1 +#cmakedefine PACKAGE_NAME 1 +#cmakedefine PACKAGE_STRING 1 +#cmakedefine PACKAGE_TARNAME 1 +#cmakedefine PACKAGE_VERSION 1 +#cmakedefine SIZEOF_INT 1 +#cmakedefine SIZEOF_LONG 1 +#cmakedefine SIZEOF_SHORT 1 +#cmakedefine STACK_DIRECTION 1 +#cmakedefine STDC_HEADERS 1 +#cmakedefine TIME_WITH_SYS_TIME 1 +#cmakedefine USE_COMBINATION_EXPLOSION_CHECK 1 +#cmakedefine USE_CRNL_AS_LINE_TERMINATOR 1 +#cmakedefine VERSION 1 diff --git a/oniguruma/enc/ascii.c b/oniguruma/enc/ascii.c new file mode 100644 index 0000000..c2715f4 --- /dev/null +++ b/oniguruma/enc/ascii.c @@ -0,0 +1,58 @@ +/********************************************************************** + ascii.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingASCII = { + onigenc_single_byte_mbc_enc_len, + "US-ASCII", /* name */ + 1, /* max byte length */ + 1, /* min byte length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + ascii_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/big5.c b/oniguruma/enc/big5.c new file mode 100644 index 0000000..ca1e01b --- /dev/null +++ b/oniguruma/enc/big5.c @@ -0,0 +1,162 @@ +/********************************************************************** + big5.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_BIG5[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +big5_mbc_enc_len(const UChar* p) +{ + return EncLen_BIG5[*p]; +} + +static OnigCodePoint +big5_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end); +} + +static int +big5_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf); +} + +static int +big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_BIG5, flag, + pp, end, lower); +} + +#if 0 +static int +big5_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end); +} +#endif + +static int +big5_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype); +} + +static const char BIG5_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 +}; + +#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1) +#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)] + +static UChar* +big5_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (BIG5_ISMB_TRAIL(*p)) { + while (p > start) { + if (! BIG5_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enclen(ONIG_ENCODING_BIG5, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + + return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE); +} + +OnigEncodingType OnigEncodingBIG5 = { + big5_mbc_enc_len, + "Big5", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + big5_mbc_to_code, + onigenc_mb2_code_to_mbclen, + big5_code_to_mbc, + big5_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + big5_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + big5_left_adjust_char_head, + big5_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/cp1251.c b/oniguruma/enc/cp1251.c new file mode 100644 index 0000000..63e58d2 --- /dev/null +++ b/oniguruma/enc/cp1251.c @@ -0,0 +1,200 @@ +/********************************************************************** + cp1251.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2006-2007 Byte + * K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c] +#define ENC_IS_CP1251_CTYPE(code,ctype) \ + ((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncCP1251_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247', + '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', + '\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncCP1251_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, + 0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, + 0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, + 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2, + 0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0, + 0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_CP1251_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +static int +cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_CP1251_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xb8, 0xa8 }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xdf } +}; + +static int +cp1251_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingCP1251 = { + onigenc_single_byte_mbc_enc_len, + "CP1251", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + cp1251_mbc_case_fold, + cp1251_apply_all_case_fold, + cp1251_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + cp1251_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/euc_jp.c b/oniguruma/enc/euc_jp.c new file mode 100644 index 0000000..f605297 --- /dev/null +++ b/oniguruma/enc/euc_jp.c @@ -0,0 +1,285 @@ +/********************************************************************** + euc_jp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) + +static const int EncLen_EUCJP[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +mbc_enc_len(const UChar* p) +{ + return EncLen_EUCJP[*p]; +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(ONIG_ENCODING_EUC_JP, p); + n = (OnigCodePoint )*p++; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +code_to_mbclen(OnigCodePoint code) +{ + if (ONIGENC_IS_CODE_ASCII(code)) return 1; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +#if 0 +static int +code_to_mbc_first(OnigCodePoint code) +{ + int first; + + if ((code & 0xff0000) != 0) { + first = (code >> 16) & 0xff; + } + else if ((code & 0xff00) != 0) { + first = (code >> 8) & 0xff; + } + else { + return (int )code; + } + return first; +} +#endif + +static int +code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff)); + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +static int +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + int len; + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + + len = enclen(ONIG_ENCODING_EUC_JP, p); + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* In this encoding + mb-trail bytes doesn't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!eucjp_islead(*p) && p > start) p--; + len = enclen(ONIG_ENCODING_EUC_JP, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + if (c <= 0x7e || c == 0x8e || c == 0x8f) + return TRUE; + else + return FALSE; +} + + +static int PropertyInited = 0; +static const OnigCodePoint** PropertyList; +static int PropertyListNum; +static int PropertyListSize; +static hash_table_type* PropertyNameTable; + +static const OnigCodePoint CR_Hiragana[] = { + 1, + 0xa4a1, 0xa4f3 +}; /* CR_Hiragana */ + +static const OnigCodePoint CR_Katakana[] = { + 3, + 0xa5a1, 0xa5f6, + 0xaaa6, 0xaaaf, + 0xaab1, 0xaadd +}; /* CR_Katakana */ + +static int +init_property_list(void) +{ + int r; + + PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PropertyInited = 1; + + end: + return r; +} + +static int +property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + hash_data_type ctype; + + PROPERTY_LIST_INIT_CHECK; + + if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, p, end); + } + + return (int )ctype; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + } + else { + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (unsigned int )PropertyListNum) + return ONIGERR_TYPE_BUG; + + return onig_is_in_code_range((UChar* )PropertyList[ctype], code); + } + + return FALSE; +} + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + return ONIG_NO_SUPPORT_CONFIG; + } + else { + *sb_out = 0x80; + + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; + + *ranges = PropertyList[ctype]; + return 0; + } +} + + +OnigEncodingType OnigEncodingEUC_JP = { + mbc_enc_len, + "EUC-JP", /* name */ + 3, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + property_name_to_ctype, + is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + is_allowed_reverse_match +}; diff --git a/oniguruma/enc/euc_kr.c b/oniguruma/enc/euc_kr.c new file mode 100644 index 0000000..1beef09 --- /dev/null +++ b/oniguruma/enc/euc_kr.c @@ -0,0 +1,158 @@ +/********************************************************************** + euc_kr.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_EUCKR[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +euckr_mbc_enc_len(const UChar* p) +{ + return EncLen_EUCKR[*p]; +} + +static OnigCodePoint +euckr_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end); +} + +static int +euckr_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf); +} + +static int +euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag, + pp, end, lower); +} + +#if 0 +static int +euckr_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end); +} +#endif + +static int +euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype); +} + +#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff) + +static UChar* +euckr_left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* Assumed in this encoding, + mb-trail bytes don't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!euckr_islead(*p) && p > start) p--; + len = enclen(ONIG_ENCODING_EUC_KR, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + if (c <= 0x7e) return TRUE; + else return FALSE; +} + +OnigEncodingType OnigEncodingEUC_KR = { + euckr_mbc_enc_len, + "EUC-KR", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + euckr_mbc_to_code, + onigenc_mb2_code_to_mbclen, + euckr_code_to_mbc, + euckr_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + euckr_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euckr_left_adjust_char_head, + euckr_is_allowed_reverse_match +}; + +/* Same with OnigEncodingEUC_KR except the name */ +OnigEncodingType OnigEncodingEUC_CN = { + euckr_mbc_enc_len, + "EUC-CN", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + euckr_mbc_to_code, + onigenc_mb2_code_to_mbclen, + euckr_code_to_mbc, + euckr_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + euckr_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euckr_left_adjust_char_head, + euckr_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/euc_tw.c b/oniguruma/enc/euc_tw.c new file mode 100644 index 0000000..2ddeb93 --- /dev/null +++ b/oniguruma/enc/euc_tw.c @@ -0,0 +1,129 @@ +/********************************************************************** + euc_tw.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_EUCTW[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static int +euctw_mbc_enc_len(const UChar* p) +{ + return EncLen_EUCTW[*p]; +} + +static OnigCodePoint +euctw_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end); +} + +static int +euctw_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf); +} + +static int +euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_TW, flag, + pp, end, lower); +} + +static int +euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype); +} + +#define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) + +static UChar* +euctw_left_adjust_char_head(const UChar* start, const UChar* s) +{ + /* Assumed in this encoding, + mb-trail bytes don't mix with single bytes. + */ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + while (!euctw_islead(*p) && p > start) p--; + len = enclen(ONIG_ENCODING_EUC_TW, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + if (c <= 0x7e) return TRUE; + else return FALSE; +} + +OnigEncodingType OnigEncodingEUC_TW = { + euctw_mbc_enc_len, + "EUC-TW", /* name */ + 4, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + euctw_mbc_to_code, + onigenc_mb4_code_to_mbclen, + euctw_code_to_mbc, + euctw_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + euctw_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euctw_left_adjust_char_head, + euctw_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/gb18030.c b/oniguruma/enc/gb18030.c new file mode 100644 index 0000000..6bbd109 --- /dev/null +++ b/oniguruma/enc/gb18030.c @@ -0,0 +1,495 @@ +/********************************************************************** + gb18030.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2005-2007 KUBO Takehiro + * K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#if 1 +#define DEBUG_GB18030(arg) +#else +#define DEBUG_GB18030(arg) printf arg +#endif + +enum { + C1, /* one-byte char */ + C2, /* one-byte or second of two-byte char */ + C4, /* one-byte or second or fourth of four-byte char */ + CM /* first of two- or four-byte char or second of two-byte char */ +}; + +static const char GB18030_MAP[] = { + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, + C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1, + C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, + CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1 +}; + +static int +gb18030_mbc_enc_len(const UChar* p) +{ + if (GB18030_MAP[*p] != CM) + return 1; + p++; + if (GB18030_MAP[*p] == C4) + return 4; + if (GB18030_MAP[*p] == C1) + return 1; /* illegal sequence */ + return 2; +} + +static OnigCodePoint +gb18030_mbc_to_code(const UChar* p, const UChar* end) +{ + return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end); +} + +static int +gb18030_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf); +} + +static int +gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, + UChar* lower) +{ + return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_GB18030, flag, + pp, end, lower); +} + +#if 0 +static int +gb18030_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end); +} +#endif + +static int +gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype); +} + +enum state { + S_START, + S_one_C2, + S_one_C4, + S_one_CM, + + S_odd_CM_one_CX, + S_even_CM_one_CX, + + /* CMC4 : pair of "CM C4" */ + S_one_CMC4, + S_odd_CMC4, + S_one_C4_odd_CMC4, + S_even_CMC4, + S_one_C4_even_CMC4, + + S_odd_CM_odd_CMC4, + S_even_CM_odd_CMC4, + + S_odd_CM_even_CMC4, + S_even_CM_even_CMC4, + + /* C4CM : pair of "C4 CM" */ + S_odd_C4CM, + S_one_CM_odd_C4CM, + S_even_C4CM, + S_one_CM_even_C4CM, + + S_even_CM_odd_C4CM, + S_odd_CM_odd_C4CM, + S_even_CM_even_C4CM, + S_odd_CM_even_C4CM, +}; + +static UChar* +gb18030_left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + enum state state = S_START; + + DEBUG_GB18030(("----------------\n")); + for (p = s; p >= start; p--) { + DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p)); + switch (state) { + case S_START: + switch (GB18030_MAP[*p]) { + case C1: + return (UChar *)s; + case C2: + state = S_one_C2; /* C2 */ + break; + case C4: + state = S_one_C4; /* C4 */ + break; + case CM: + state = S_one_CM; /* CM */ + break; + } + break; + case S_one_C2: /* C2 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_odd_CM_one_CX; /* CM C2 */ + break; + } + break; + case S_one_C4: /* C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_one_CMC4; + break; + } + break; + case S_one_CM: /* CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)s; + case C4: + state = S_odd_C4CM; + break; + case CM: + state = S_odd_CM_one_CX; /* CM CM */ + break; + } + break; + + case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CM_one_CX; + break; + } + break; + case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_odd_CM_one_CX; + break; + } + break; + + case S_one_CMC4: /* CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 1); + case C4: + state = S_one_C4_odd_CMC4; /* C4 CM C4 */ + break; + case CM: + state = S_even_CM_one_CX; /* CM CM C4 */ + break; + } + break; + case S_odd_CMC4: /* CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 1); + case C4: + state = S_one_C4_odd_CMC4; + break; + case CM: + state = S_odd_CM_odd_CMC4; + break; + } + break; + case S_one_C4_odd_CMC4: /* C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CMC4; /* CM C4 CM C4 */ + break; + } + break; + case S_even_CMC4: /* CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 3); + case C4: + state = S_one_C4_even_CMC4; + break; + case CM: + state = S_odd_CM_even_CMC4; + break; + } + break; + case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_odd_CMC4; + break; + } + break; + + case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_even_CM_odd_CMC4; + break; + } + break; + case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_odd_CM_odd_CMC4; + break; + } + break; + + case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 1); + case CM: + state = S_even_CM_even_CMC4; + break; + } + break; + case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 3); + case CM: + state = S_odd_CM_even_CMC4; + break; + } + break; + + case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)s; + case CM: + state = S_one_CM_odd_C4CM; /* CM C4 CM */ + break; + } + break; + case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 2); /* |CM C4 CM */ + case C4: + state = S_even_C4CM; + break; + case CM: + state = S_even_CM_odd_C4CM; + break; + } + break; + case S_even_C4CM: /* C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* C4|CM C4 CM */ + case CM: + state = S_one_CM_even_C4CM; + break; + } + break; + case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + return (UChar *)(s - 0); /*|CM C4 CM C4|CM */ + case C4: + state = S_odd_C4CM; + break; + case CM: + state = S_even_CM_even_C4CM; + break; + } + break; + + case S_even_CM_odd_C4CM: /* CM CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 0); /* |CM CM|C4|CM */ + case CM: + state = S_odd_CM_odd_C4CM; + break; + } + break; + case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* |CM CM|CM C4 CM */ + case CM: + state = S_even_CM_odd_C4CM; + break; + } + break; + + case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */ + case CM: + state = S_odd_CM_even_C4CM; + break; + } + break; + case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */ + switch (GB18030_MAP[*p]) { + case C1: + case C2: + case C4: + return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */ + case CM: + state = S_even_CM_even_C4CM; + break; + } + break; + } + } + + DEBUG_GB18030(("state %d\n", state)); + switch (state) { + case S_START: return (UChar *)(s - 0); + case S_one_C2: return (UChar *)(s - 0); + case S_one_C4: return (UChar *)(s - 0); + case S_one_CM: return (UChar *)(s - 0); + + case S_odd_CM_one_CX: return (UChar *)(s - 1); + case S_even_CM_one_CX: return (UChar *)(s - 0); + + case S_one_CMC4: return (UChar *)(s - 1); + case S_odd_CMC4: return (UChar *)(s - 1); + case S_one_C4_odd_CMC4: return (UChar *)(s - 1); + case S_even_CMC4: return (UChar *)(s - 3); + case S_one_C4_even_CMC4: return (UChar *)(s - 3); + + case S_odd_CM_odd_CMC4: return (UChar *)(s - 3); + case S_even_CM_odd_CMC4: return (UChar *)(s - 1); + + case S_odd_CM_even_CMC4: return (UChar *)(s - 1); + case S_even_CM_even_CMC4: return (UChar *)(s - 3); + + case S_odd_C4CM: return (UChar *)(s - 0); + case S_one_CM_odd_C4CM: return (UChar *)(s - 2); + case S_even_C4CM: return (UChar *)(s - 2); + case S_one_CM_even_C4CM: return (UChar *)(s - 0); + + case S_even_CM_odd_C4CM: return (UChar *)(s - 0); + case S_odd_CM_odd_C4CM: return (UChar *)(s - 2); + case S_even_CM_even_C4CM: return (UChar *)(s - 2); + case S_odd_CM_even_C4CM: return (UChar *)(s - 0); + } + + return (UChar* )s; /* never come here. (escape warning) */ +} + +static int +gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + return GB18030_MAP[*s] == C1 ? TRUE : FALSE; +} + +OnigEncodingType OnigEncodingGB18030 = { + gb18030_mbc_enc_len, + "GB18030", /* name */ + 4, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + gb18030_mbc_to_code, + onigenc_mb4_code_to_mbclen, + gb18030_code_to_mbc, + gb18030_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + gb18030_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + gb18030_left_adjust_char_head, + gb18030_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_1.c b/oniguruma/enc/iso8859_1.c new file mode 100644 index 0000000..174b97f --- /dev/null +++ b/oniguruma/enc/iso8859_1.c @@ -0,0 +1,272 @@ +/********************************************************************** + iso8859_1.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ + ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_1_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + if (*p == 0x53 && end > p + 1 + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + if (*p == 0x73 && end > p + 1 + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0xc0 <= *p && *p <= 0xcf) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + else if (0xd0 <= *p && *p <= 0xdf) { + if (*p == 0xdf) { + items[0].byte_len = 1; + items[0].code_len = 2; + items[0].code[0] = (OnigCodePoint )'s'; + items[0].code[1] = (OnigCodePoint )'s'; + + items[1].byte_len = 1; + items[1].code_len = 2; + items[1].code[0] = (OnigCodePoint )'S'; + items[1].code[1] = (OnigCodePoint )'S'; + + items[2].byte_len = 1; + items[2].code_len = 2; + items[2].code[0] = (OnigCodePoint )'s'; + items[2].code[1] = (OnigCodePoint )'S'; + + items[3].byte_len = 1; + items[3].code_len = 2; + items[3].code[0] = (OnigCodePoint )'S'; + items[3].code[1] = (OnigCodePoint )'s'; + + return 4; + } + else if (*p != 0xd7) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + } + else if (0xe0 <= *p && *p <= 0xef) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + else if (0xf0 <= *p && *p <= 0xfe) { + if (*p != 0xf7) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + } + + return 0; +} + +static int +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p >= 0xaa && *p <= 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_1_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_1 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-1", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_10.c b/oniguruma/enc/iso8859_10.c new file mode 100644 index 0000000..e35c19d --- /dev/null +++ b/oniguruma/enc/iso8859_10.c @@ -0,0 +1,239 @@ +/********************************************************************** + iso8859_10.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \ + ((EncISO_8859_10_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_10_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247', + '\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_10_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_10_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa2, 0xb2 }, + { 0xa3, 0xb3 }, + { 0xa4, 0xb4 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa8, 0xb8 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_10 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-10", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_11.c b/oniguruma/enc/iso8859_11.c new file mode 100644 index 0000000..8a460a3 --- /dev/null +++ b/oniguruma/enc/iso8859_11.c @@ -0,0 +1,96 @@ +/********************************************************************** + iso8859_11.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ + ((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_11_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_11_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_11 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-11", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_13.c b/oniguruma/enc/iso8859_13.c new file mode 100644 index 0000000..3670d92 --- /dev/null +++ b/oniguruma/enc/iso8859_13.c @@ -0,0 +1,228 @@ +/********************************************************************** + iso8859_13.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \ + ((EncISO_8859_13_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_13_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_13_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x34a2, 0x00a0, 0x34a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x34a2, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x30e2, 0x00a0, 0x01a0, + 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf, 0xb5 are lower case letter, but can't convert. */ + if (*p == 0xb5) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_13_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_13 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-13", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_14.c b/oniguruma/enc/iso8859_14.c new file mode 100644 index 0000000..3596d44 --- /dev/null +++ b/oniguruma/enc/iso8859_14.c @@ -0,0 +1,241 @@ +/********************************************************************** + iso8859_14.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \ + ((EncISO_8859_14_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_14_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247', + '\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377', + '\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271', + '\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_14_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x30e2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x00a0, + 0x34a2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x34a2, + 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x00a0, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_14_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xa2 }, + { 0xa4, 0xa5 }, + { 0xa6, 0xab }, + { 0xa8, 0xb8 }, + { 0xaa, 0xba }, + { 0xac, 0xbc }, + { 0xaf, 0xff }, + + { 0xb0, 0xb1 }, + { 0xb2, 0xb3 }, + { 0xb4, 0xb5 }, + { 0xb7, 0xb9 }, + { 0xbb, 0xbf }, + { 0xbd, 0xbe }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_14 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-14", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_15.c b/oniguruma/enc/iso8859_15.c new file mode 100644 index 0000000..08492fb --- /dev/null +++ b/oniguruma/enc/iso8859_15.c @@ -0,0 +1,235 @@ +/********************************************************************** + iso8859_15.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \ + ((EncISO_8859_15_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_15_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_15_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, + 0x30e2, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x34a2, 0x30e2, 0x00a0, 0x01a0, + 0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p == 0xaa || *p == 0xb5 || *p == 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_15_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa6, 0xa8 }, + + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_15 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-15", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_16.c b/oniguruma/enc/iso8859_16.c new file mode 100644 index 0000000..8b39c58 --- /dev/null +++ b/oniguruma/enc/iso8859_16.c @@ -0,0 +1,237 @@ +/********************************************************************** + iso8859_16.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \ + ((EncISO_8859_16_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247', + '\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277', + '\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_16_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0, + 0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2, + 0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0, + 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_16_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xa2 }, + { 0xa3, 0xb3 }, + { 0xa6, 0xa8 }, + { 0xaa, 0xba }, + { 0xac, 0xae }, + { 0xaf, 0xbf }, + + { 0xb2, 0xb9 }, + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_16 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-16", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_2.c b/oniguruma/enc/iso8859_2.c new file mode 100644 index 0000000..80b93ba --- /dev/null +++ b/oniguruma/enc/iso8859_2.c @@ -0,0 +1,235 @@ +/********************************************************************** + iso8859_2.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ + ((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_2_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_2_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x00a0, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_2_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_2 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-2", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_3.c b/oniguruma/enc/iso8859_3.c new file mode 100644 index 0000000..fd1168c --- /dev/null +++ b/oniguruma/enc/iso8859_3.c @@ -0,0 +1,235 @@ +/********************************************************************** + iso8859_3.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \ + ((EncISO_8859_3_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_3_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_3_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x0000, 0x34a2, + 0x00a0, 0x30e2, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x30e2, 0x01a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x11a0, 0x0000, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (*p == 0xb5) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_3_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_3 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-3", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_4.c b/oniguruma/enc/iso8859_4.c new file mode 100644 index 0000000..c124f56 --- /dev/null +++ b/oniguruma/enc/iso8859_4.c @@ -0,0 +1,237 @@ +/********************************************************************** + iso8859_4.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \ + ((EncISO_8859_4_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_4_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247', + '\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_4_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0, + 0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x00a0, + 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + if (*p == 0xa2) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_4_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xb1 }, + { 0xa3, 0xb3 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_4 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-4", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_5.c b/oniguruma/enc/iso8859_5.c new file mode 100644 index 0000000..1ca67e7 --- /dev/null +++ b/oniguruma/enc/iso8859_5.c @@ -0,0 +1,226 @@ +/********************************************************************** + iso8859_5.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \ + ((EncISO_8859_5_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_5_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_5_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_5_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa1, 0xf1 }, + { 0xa2, 0xf2 }, + { 0xa3, 0xf3 }, + { 0xa4, 0xf4 }, + { 0xa5, 0xf5 }, + { 0xa6, 0xf6 }, + { 0xa7, 0xf7 }, + { 0xa8, 0xf8 }, + { 0xa9, 0xf9 }, + { 0xaa, 0xfa }, + { 0xab, 0xfb }, + { 0xac, 0xfc }, + { 0xae, 0xfe }, + { 0xaf, 0xff }, + + { 0xb0, 0xd0 }, + { 0xb1, 0xd1 }, + { 0xb2, 0xd2 }, + { 0xb3, 0xd3 }, + { 0xb4, 0xd4 }, + { 0xb5, 0xd5 }, + { 0xb6, 0xd6 }, + { 0xb7, 0xd7 }, + { 0xb8, 0xd8 }, + { 0xb9, 0xd9 }, + { 0xba, 0xda }, + { 0xbb, 0xdb }, + { 0xbc, 0xdc }, + { 0xbd, 0xdd }, + { 0xbe, 0xde }, + { 0xbf, 0xdf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_5 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-5", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_6.c b/oniguruma/enc/iso8859_6.c new file mode 100644 index 0000000..ab42eee --- /dev/null +++ b/oniguruma/enc/iso8859_6.c @@ -0,0 +1,96 @@ +/********************************************************************** + iso8859_6.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ + ((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_6_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0, + 0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_6_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_6 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-6", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_7.c b/oniguruma/enc/iso8859_7.c new file mode 100644 index 0000000..1090064 --- /dev/null +++ b/oniguruma/enc/iso8859_7.c @@ -0,0 +1,222 @@ +/********************************************************************** + iso8859_7.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ + ((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267', + '\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376', + '\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_7_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2, + 0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + if (*p == 0xc0 || *p == 0xe0) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_7_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xb6, 0xdc }, + { 0xb8, 0xdd }, + { 0xb9, 0xde }, + { 0xba, 0xdf }, + { 0xbc, 0xfc }, + { 0xbe, 0xfd }, + { 0xbf, 0xfe }, + + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + + +OnigEncodingType OnigEncodingISO_8859_7 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-7", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_8.c b/oniguruma/enc/iso8859_8.c new file mode 100644 index 0000000..fb9846f --- /dev/null +++ b/oniguruma/enc/iso8859_8.c @@ -0,0 +1,96 @@ +/********************************************************************** + iso8859_8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ + ((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const unsigned short EncISO_8859_8_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, + 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_8_CTYPE(code, ctype); + else + return FALSE; +} + +OnigEncodingType OnigEncodingISO_8859_8 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-8", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + onigenc_ascii_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/iso8859_9.c b/oniguruma/enc/iso8859_9.c new file mode 100644 index 0000000..079d681 --- /dev/null +++ b/oniguruma/enc/iso8859_9.c @@ -0,0 +1,228 @@ +/********************************************************************** + iso8859_9.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c] +#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \ + ((EncISO_8859_9_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static const unsigned short EncISO_8859_9_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +static int +mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + *lower++ = 's'; + *lower = 's'; + (*pp)++; + return 2; + } + + *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + (*pp)++; + return TRUE; + } + + (*pp)++; + v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xdf etc.. are lower case letter, but can't convert. */ + if (*p >= 0xaa && *p <= 0xba) + return FALSE; + else + return TRUE; + } + + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_ISO_8859_9_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } +}; + +static int +apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, f, arg); +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingISO_8859_9 = { + onigenc_single_byte_mbc_enc_len, + "ISO-8859-9", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + mbc_case_fold, + apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/koi8.c b/oniguruma/enc/koi8.c new file mode 100644 index 0000000..c664957 --- /dev/null +++ b/oniguruma/enc/koi8.c @@ -0,0 +1,250 @@ +/********************************************************************** + koi8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c] +#define ENC_IS_KOI8_CTYPE(code,ctype) \ + ((EncKOI8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncKOI8_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' +}; + +static const unsigned short EncKOI8_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 +}; + + +static int +koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_KOI8_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end) +{ + const OnigUChar* p = *pp; + + (*pp)++; + if (((flag & ONIGENC_CASE_FOLD_ASCII_CASE) != 0 && + ONIGENC_IS_MBC_ASCII(p)) || + ((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) != 0 && + !ONIGENC_IS_MBC_ASCII(p))) { + int v = (EncKOI8_CtypeTable[*p] & + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); + } + return FALSE; +} +#endif + +static int +koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_KOI8_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff }, + + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, + + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfe, 0xde }, + { 0xff, 0xdf } +}; + +static int +koi8_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +koi8_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingKOI8 = { + onigenc_single_byte_mbc_enc_len, + "KOI8", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + koi8_mbc_case_fold, + koi8_apply_all_case_fold, + koi8_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + koi8_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/koi8_r.c b/oniguruma/enc/koi8_r.c new file mode 100644 index 0000000..364dda1 --- /dev/null +++ b/oniguruma/enc/koi8_r.c @@ -0,0 +1,212 @@ +/********************************************************************** + koi8_r.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c] +#define ENC_IS_KOI8_R_CTYPE(code,ctype) \ + ((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) + +static const UChar EncKOI8_R_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' +}; + +static const unsigned short EncKOI8_R_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2 +}; + +static int +koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + *lower = ENC_KOI8_R_TO_LOWER_CASE(*p); + (*pp)++; + return 1; +} + +#if 0 +static int +koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + int v; + const UChar* p = *pp; + + (*pp)++; + v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + return (v != 0 ? TRUE : FALSE); +} +#endif + +static int +koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 256) + return ENC_IS_KOI8_R_CTYPE(code, ctype); + else + return FALSE; +} + +static const OnigPairCaseFoldCodes CaseFoldMap[] = { + { 0xa3, 0xb3 }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, + { 0xdf, 0xff } +}; + +static int +koi8_r_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + return onigenc_apply_all_case_fold_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, f, arg); +} + +static int +koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_get_case_fold_codes_by_str_with_map( + sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingKOI8_R = { + onigenc_single_byte_mbc_enc_len, + "KOI8-R", /* name */ + 1, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + onigenc_single_byte_mbc_to_code, + onigenc_single_byte_code_to_mbclen, + onigenc_single_byte_code_to_mbc, + koi8_r_mbc_case_fold, + koi8_r_apply_all_case_fold, + koi8_r_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + koi8_r_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + onigenc_single_byte_left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/mktable.c b/oniguruma/enc/mktable.c new file mode 100644 index 0000000..285216e --- /dev/null +++ b/oniguruma/enc/mktable.c @@ -0,0 +1,1162 @@ +/********************************************************************** + mktable.c +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#define __USE_ISOC99 +#include + +#include "regenc.h" + +#define ASCII 0 +#define UNICODE_ISO_8859_1 1 +#define ISO_8859_1 2 +#define ISO_8859_2 3 +#define ISO_8859_3 4 +#define ISO_8859_4 5 +#define ISO_8859_5 6 +#define ISO_8859_6 7 +#define ISO_8859_7 8 +#define ISO_8859_8 9 +#define ISO_8859_9 10 +#define ISO_8859_10 11 +#define ISO_8859_11 12 +#define ISO_8859_13 13 +#define ISO_8859_14 14 +#define ISO_8859_15 15 +#define ISO_8859_16 16 +#define KOI8 17 +#define KOI8_R 18 + +typedef struct { + int num; + char* name; +} ENC_INFO; + +static ENC_INFO Info[] = { + { ASCII, "ASCII" }, + { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" }, + { ISO_8859_1, "ISO_8859_1" }, + { ISO_8859_2, "ISO_8859_2" }, + { ISO_8859_3, "ISO_8859_3" }, + { ISO_8859_4, "ISO_8859_4" }, + { ISO_8859_5, "ISO_8859_5" }, + { ISO_8859_6, "ISO_8859_6" }, + { ISO_8859_7, "ISO_8859_7" }, + { ISO_8859_8, "ISO_8859_8" }, + { ISO_8859_9, "ISO_8859_9" }, + { ISO_8859_10, "ISO_8859_10" }, + { ISO_8859_11, "ISO_8859_11" }, + { ISO_8859_13, "ISO_8859_13" }, + { ISO_8859_14, "ISO_8859_14" }, + { ISO_8859_15, "ISO_8859_15" }, + { ISO_8859_16, "ISO_8859_16" }, + { KOI8, "KOI8" }, + { KOI8_R, "KOI8_R" } +}; + + +static int IsAlpha(int enc, int c) +{ + if (enc == ASCII) + return isalpha(c); + + if (c >= 0x41 && c <= 0x5a) return 1; + if (c >= 0x61 && c <= 0x7a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + break; + + case ISO_8859_2: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c >= 0xae && c <= 0xaf) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_3: + if (c == 0xa1) return 1; + if (c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xaf) return 1; + if (c == 0xb1) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xc2) return 1; + if (c >= 0xc4 && c <= 0xcf) return 1; + if (c >= 0xd1 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xe2) return 1; + if (c >= 0xe4 && c <= 0xef) return 1; + if (c >= 0xf1 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_4: + if (c >= 0xa1 && c <= 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_5: + if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; + if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; + break; + + case ISO_8859_6: + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c == 0xb6) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c == 0xc0) return 1; + if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; + if (c >= 0xdc && c <= 0xfe) return 1; + break; + + case ISO_8859_8: + if (c == 0xb5) return 1; + if (c >= 0xe0 && c <= 0xfa) return 1; + break; + + case ISO_8859_10: + if (c >= 0xa1 && c <= 0xa6) return 1; + if (c >= 0xa8 && c <= 0xac) return 1; + if (c == 0xae || c == 0xaf) return 1; + if (c >= 0xb1 && c <= 0xb6) return 1; + if (c >= 0xb8 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xff) return 1; + break; + + case ISO_8859_11: + if (c >= 0xa1 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case ISO_8859_13: + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xaf) return 1; + if (c == 0xb5) return 1; + if (c == 0xb8) return 1; + if (c == 0xba) return 1; + if (c >= 0xbf && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_14: + if (c == 0xa1 || c == 0xa2) return 1; + if (c == 0xa4 || c == 0xa5) return 1; + if (c == 0xa6 || c == 0xa8) return 1; + if (c >= 0xaa && c <= 0xac) return 1; + if (c >= 0xaf && c <= 0xb5) return 1; + if (c >= 0xb7 && c <= 0xff) return 1; + break; + + case ISO_8859_15: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xb4) return 1; + if (c == 0xb8) return 1; + if (c == 0xbc) return 1; + if (c == 0xbd) return 1; + if (c == 0xbe) return 1; + break; + + case ISO_8859_16: + if (c == 0xa1) return 1; + if (c == 0xa2) return 1; + if (c == 0xa3) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2) return 1; + if (c == 0xb3) return 1; + if (c == 0xb4) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c == 0xbd) return 1; + if (c == 0xbe) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c == 0xa3 || c == 0xb3) return 1; + /* fall */ + case KOI8: + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsBlank(int enc, int c) +{ + if (enc == ASCII) + return isblank(c); + + if (c == 0x09 || c == 0x20) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_3: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_6: + case ISO_8859_7: + case ISO_8859_8: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_11: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + case KOI8: + if (c == 0xa0) return 1; + break; + + case KOI8_R: + if (c == 0x9a) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsCntrl(int enc, int c) +{ + if (enc == ASCII) + return iscntrl(c); + + if (c >= 0x00 && c <= 0x1F) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + if (c == 0xad) return 1; + /* fall */ + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_3: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_6: + case ISO_8859_7: + case ISO_8859_8: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_11: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + case KOI8: + if (c >= 0x7f && c <= 0x9F) return 1; + break; + + + case KOI8_R: + if (c == 0x7f) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsDigit(int enc ARG_UNUSED, int c) +{ + if (c >= 0x30 && c <= 0x39) return 1; + return 0; +} + +static int IsGraph(int enc, int c) +{ + if (enc == ASCII) + return isgraph(c); + + if (c >= 0x21 && c <= 0x7e) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + if (c >= 0xa1 && c <= 0xff) return 1; + break; + + case ISO_8859_3: + if (c >= 0xa1) { + if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || + c == 0xe3 || c == 0xf0) + return 0; + else + return 1; + } + break; + + case ISO_8859_6: + if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) + return 1; + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c >= 0xa1 && c <= 0xfe && + c != 0xa4 && c != 0xa5 && c != 0xaa && + c != 0xae && c != 0xd2) return 1; + break; + + case ISO_8859_8: + if (c >= 0xa2 && c <= 0xfa) { + if (c >= 0xbf && c <= 0xde) return 0; + return 1; + } + break; + + case ISO_8859_11: + if (c >= 0xa1 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case KOI8: + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsLower(int enc, int c) +{ + if (enc == ASCII) + return islower(c); + + if (c >= 0x61 && c <= 0x7a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + break; + + case ISO_8859_2: + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_3: + if (c == 0xb1) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c == 0xbf) return 1; + if (c == 0xdf) return 1; + if (c >= 0xe0 && c <= 0xe2) return 1; + if (c >= 0xe4 && c <= 0xef) return 1; + if (c >= 0xf1 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_4: + if (c == 0xa2) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c == 0xdf) return 1; + if (c >= 0xe0 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_5: + if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; + break; + + case ISO_8859_6: + break; + + case ISO_8859_7: + if (c == 0xc0) return 1; + if (c >= 0xdc && c <= 0xfe) return 1; + break; + + case ISO_8859_8: + if (c == 0xb5) return 1; + break; + + case ISO_8859_10: + if (c >= 0xb1 && c <= 0xb6) return 1; + if (c >= 0xb8 && c <= 0xbc) return 1; + if (c == 0xbe || c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case ISO_8859_11: + break; + + case ISO_8859_13: + if (c == 0xb5) return 1; + if (c == 0xb8) return 1; + if (c == 0xba) return 1; + if (c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_14: + if (c == 0xa2) return 1; + if (c == 0xa5) return 1; + if (c == 0xab) return 1; + if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c == 0xbe || c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case ISO_8859_15: + if (c == 0xaa) return 1; + if (c == 0xb5) return 1; + if (c == 0xba) return 1; + if (c >= 0xdf && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + if (c == 0xa8) return 1; + if (c == 0xb8) return 1; + if (c == 0xbd) return 1; + break; + + case ISO_8859_16: + if (c == 0xa2) return 1; + if (c == 0xa8) return 1; + if (c == 0xae) return 1; + if (c == 0xb3) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbd) return 1; + if (c == 0xbf) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c == 0xa3) return 1; + /* fall */ + case KOI8: + if (c >= 0xc0 && c <= 0xdf) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsPrint(int enc, int c) +{ + if (enc == ASCII) + return isprint(c); + + if (c >= 0x20 && c <= 0x7e) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + if (c >= 0x09 && c <= 0x0d) return 1; + if (c == 0x85) return 1; + /* fall */ + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + if (c >= 0xa0 && c <= 0xff) return 1; + break; + + case ISO_8859_3: + if (c >= 0xa0) { + if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || + c == 0xe3 || c == 0xf0) + return 0; + else + return 1; + } + break; + + case ISO_8859_6: + if (c == 0xa0) return 1; + if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) + return 1; + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c >= 0xa0 && c <= 0xfe && + c != 0xa4 && c != 0xa5 && c != 0xaa && + c != 0xae && c != 0xd2) return 1; + break; + + case ISO_8859_8: + if (c >= 0xa0 && c <= 0xfa) { + if (c >= 0xbf && c <= 0xde) return 0; + if (c == 0xa1) return 0; + return 1; + } + break; + + case ISO_8859_11: + if (c >= 0xa0 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case KOI8: + if (c == 0xa0) return 1; + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c >= 0x80 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsPunct(int enc, int c) +{ + if (enc == ASCII) + return ispunct(c); + + if (enc == UNICODE_ISO_8859_1) { + if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 || + c == 0x7c || c == 0x7e) return 1; + if (c >= 0x3c && c <= 0x3e) return 1; + } + + if (c >= 0x21 && c <= 0x2f) return 1; + if (c >= 0x3a && c <= 0x40) return 1; + if (c >= 0x5b && c <= 0x60) return 1; + if (c >= 0x7b && c <= 0x7e) return 1; + + switch (enc) { + case ISO_8859_1: + case ISO_8859_9: + case ISO_8859_15: + if (c == 0xad) return 1; + /* fall */ + case UNICODE_ISO_8859_1: + if (c == 0xa1) return 1; + if (c == 0xab) return 1; + if (c == 0xb7) return 1; + if (c == 0xbb) return 1; + if (c == 0xbf) return 1; + break; + + case ISO_8859_2: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_14: + if (c == 0xad) return 1; + break; + + case ISO_8859_3: + case ISO_8859_10: + if (c == 0xad) return 1; + if (c == 0xb7) return 1; + if (c == 0xbd) return 1; + break; + + case ISO_8859_6: + if (c == 0xac) return 1; + if (c == 0xad) return 1; + if (c == 0xbb) return 1; + if (c == 0xbf) return 1; + break; + + case ISO_8859_7: + if (c == 0xa1 || c == 0xa2) return 1; + if (c == 0xab) return 1; + if (c == 0xaf) return 1; + if (c == 0xad) return 1; + if (c == 0xb7 || c == 0xbb) return 1; + break; + + case ISO_8859_8: + if (c == 0xab) return 1; + if (c == 0xad) return 1; + if (c == 0xb7) return 1; + if (c == 0xbb) return 1; + if (c == 0xdf) return 1; + break; + + case ISO_8859_13: + if (c == 0xa1 || c == 0xa5) return 1; + if (c == 0xab || c == 0xad) return 1; + if (c == 0xb4 || c == 0xb7) return 1; + if (c == 0xbb) return 1; + if (c == 0xff) return 1; + break; + + case ISO_8859_16: + if (c == 0xa5) return 1; + if (c == 0xab) return 1; + if (c == 0xad) return 1; + if (c == 0xb5) return 1; + if (c == 0xb7) return 1; + if (c == 0xbb) return 1; + break; + + case KOI8_R: + if (c == 0x9e) return 1; + break; + + case ISO_8859_11: + case KOI8: + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsSpace(int enc, int c) +{ + if (enc == ASCII) + return isspace(c); + + if (c >= 0x09 && c <= 0x0d) return 1; + if (c == 0x20) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + if (c == 0x85) return 1; + /* fall */ + case ISO_8859_1: + case ISO_8859_2: + case ISO_8859_3: + case ISO_8859_4: + case ISO_8859_5: + case ISO_8859_6: + case ISO_8859_7: + case ISO_8859_8: + case ISO_8859_9: + case ISO_8859_10: + case ISO_8859_11: + case ISO_8859_13: + case ISO_8859_14: + case ISO_8859_15: + case ISO_8859_16: + case KOI8: + if (c == 0xa0) return 1; + break; + + case KOI8_R: + if (c == 0x9a) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsUpper(int enc, int c) +{ + if (enc == ASCII) + return isupper(c); + + if (c >= 0x41 && c <= 0x5a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_2: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c >= 0xae && c <= 0xaf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_3: + if (c == 0xa1) return 1; + if (c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xaf) return 1; + if (c >= 0xc0 && c <= 0xc2) return 1; + if (c >= 0xc4 && c <= 0xcf) return 1; + if (c >= 0xd1 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_4: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xbd) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_5: + if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; + break; + + case ISO_8859_6: + break; + + case ISO_8859_7: + if (c == 0xb6) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; + break; + + case ISO_8859_8: + case ISO_8859_11: + break; + + case ISO_8859_10: + if (c >= 0xa1 && c <= 0xa6) return 1; + if (c >= 0xa8 && c <= 0xac) return 1; + if (c == 0xae || c == 0xaf) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + break; + + case ISO_8859_13: + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xaf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + break; + + case ISO_8859_14: + if (c == 0xa1) return 1; + if (c == 0xa4 || c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xaa || c == 0xac) return 1; + if (c == 0xaf || c == 0xb0) return 1; + if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1; + if (c == 0xbb || c == 0xbd) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + break; + + case ISO_8859_15: + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xde) return 1; + if (c == 0xa6) return 1; + if (c == 0xb4) return 1; + if (c == 0xbc) return 1; + if (c == 0xbe) return 1; + break; + + case ISO_8859_16: + if (c == 0xa1) return 1; + if (c == 0xa3) return 1; + if (c == 0xa6) return 1; + if (c == 0xaa) return 1; + if (c == 0xac) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2) return 1; + if (c == 0xb4) return 1; + if (c == 0xbc) return 1; + if (c == 0xbe) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + break; + + case KOI8_R: + if (c == 0xb3) return 1; + /* fall */ + case KOI8: + if (c >= 0xe0 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsXDigit(int enc, int c) +{ + if (enc == ASCII) + return isxdigit(c); + + if (c >= 0x30 && c <= 0x39) return 1; + if (c >= 0x41 && c <= 0x46) return 1; + if (c >= 0x61 && c <= 0x66) return 1; + return 0; +} + +static int IsWord(int enc, int c) +{ + if (enc == ASCII) { + return (isalpha(c) || isdigit(c) || c == 0x5f); + } + + if (c >= 0x30 && c <= 0x39) return 1; + if (c >= 0x41 && c <= 0x5a) return 1; + if (c == 0x5f) return 1; + if (c >= 0x61 && c <= 0x7a) return 1; + + switch (enc) { + case UNICODE_ISO_8859_1: + case ISO_8859_1: + case ISO_8859_9: + if (c == 0xaa) return 1; + if (c >= 0xb2 && c <= 0xb3) return 1; + if (c == 0xb5) return 1; + if (c >= 0xb9 && c <= 0xba) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + break; + + case ISO_8859_2: + if (c == 0xa1 || c == 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c >= 0xae && c <= 0xaf) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbc) return 1; + if (c >= 0xbe && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_3: + if (c == 0xa1) return 1; + if (c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xaf) return 1; + if (c >= 0xb1 && c <= 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbd) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xc2) return 1; + if (c >= 0xc4 && c <= 0xcf) return 1; + if (c >= 0xd1 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xe2) return 1; + if (c >= 0xe4 && c <= 0xef) return 1; + if (c >= 0xf1 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_4: + if (c >= 0xa1 && c <= 0xa3) return 1; + if (c == 0xa5 || c == 0xa6) return 1; + if (c >= 0xa9 && c <= 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xb1 || c == 0xb3) return 1; + if (c == 0xb5 || c == 0xb6) return 1; + if (c >= 0xb9 && c <= 0xbf) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_5: + if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; + if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; + break; + + case ISO_8859_6: + if (c >= 0xc1 && c <= 0xda) return 1; + if (c >= 0xe0 && c <= 0xea) return 1; + if (c >= 0xeb && c <= 0xf2) return 1; + break; + + case ISO_8859_7: + if (c == 0xb2 || c == 0xb3) return 1; + if (c == 0xb6) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c >= 0xbc && c <= 0xbf) return 1; + if (c == 0xc0) return 1; + if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; + if (c >= 0xdc && c <= 0xfe) return 1; + break; + + case ISO_8859_8: + if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c >= 0xe0 && c <= 0xfa) return 1; + break; + + case ISO_8859_10: + if (c >= 0xa1 && c <= 0xff) { + if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd) + return 1; + } + break; + + case ISO_8859_11: + if (c >= 0xa1 && c <= 0xda) return 1; + if (c >= 0xdf && c <= 0xfb) return 1; + break; + + case ISO_8859_13: + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c == 0xb8) return 1; + if (c == 0xba) return 1; + if (c >= 0xbf && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xfe) return 1; + break; + + case ISO_8859_14: + if (c >= 0xa1 && c <= 0xff) { + if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae || + c == 0xb6) return 0; + return 1; + } + break; + + case ISO_8859_15: + if (c == 0xaa) return 1; + if (c >= 0xb2 && c <= 0xb3) return 1; + if (c == 0xb5) return 1; + if (c >= 0xb9 && c <= 0xba) return 1; + if (c >= 0xbc && c <= 0xbe) return 1; + if (c >= 0xc0 && c <= 0xd6) return 1; + if (c >= 0xd8 && c <= 0xf6) return 1; + if (c >= 0xf8 && c <= 0xff) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xb4) return 1; + if (c == 0xb8) return 1; + break; + + case ISO_8859_16: + if (c == 0xa1) return 1; + if (c == 0xa2) return 1; + if (c == 0xa3) return 1; + if (c == 0xa6) return 1; + if (c == 0xa8) return 1; + if (c == 0xaa) return 1; + if (c == 0xac) return 1; + if (c == 0xae) return 1; + if (c == 0xaf) return 1; + if (c == 0xb2) return 1; + if (c == 0xb3) return 1; + if (c == 0xb4) return 1; + if (c >= 0xb8 && c <= 0xba) return 1; + if (c == 0xbc) return 1; + if (c == 0xbd) return 1; + if (c == 0xbe) return 1; + if (c == 0xbf) return 1; + if (c >= 0xc0 && c <= 0xde) return 1; + if (c >= 0xdf && c <= 0xff) return 1; + break; + + case KOI8_R: + if (c == 0x9d) return 1; + if (c == 0xa3 || c == 0xb3) return 1; + /* fall */ + case KOI8: + if (c >= 0xc0 && c <= 0xff) return 1; + break; + + default: + exit(-1); + } + + return 0; +} + +static int IsAscii(int enc ARG_UNUSED, int c) +{ + if (c >= 0x00 && c <= 0x7f) return 1; + return 0; +} + +static int IsNewline(int enc ARG_UNUSED, int c) +{ + if (c == 0x0a) return 1; + return 0; +} + +static int exec(FILE* fp, ENC_INFO* einfo) +{ +#define NCOL 8 + + int c, val, enc; + + enc = einfo->num; + + fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n", + einfo->name); + + for (c = 0; c < 256; c++) { + val = 0; + if (IsNewline(enc, c)) val |= BIT_CTYPE_NEWLINE; + if (IsAlpha (enc, c)) val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM); + if (IsBlank (enc, c)) val |= BIT_CTYPE_BLANK; + if (IsCntrl (enc, c)) val |= BIT_CTYPE_CNTRL; + if (IsDigit (enc, c)) val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM); + if (IsGraph (enc, c)) val |= BIT_CTYPE_GRAPH; + if (IsLower (enc, c)) val |= BIT_CTYPE_LOWER; + if (IsPrint (enc, c)) val |= BIT_CTYPE_PRINT; + if (IsPunct (enc, c)) val |= BIT_CTYPE_PUNCT; + if (IsSpace (enc, c)) val |= BIT_CTYPE_SPACE; + if (IsUpper (enc, c)) val |= BIT_CTYPE_UPPER; + if (IsXDigit(enc, c)) val |= BIT_CTYPE_XDIGIT; + if (IsWord (enc, c)) val |= BIT_CTYPE_WORD; + if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII; + + if (c % NCOL == 0) fputs(" ", fp); + fprintf(fp, "0x%04x", val); + if (c != 255) fputs(",", fp); + if (c != 0 && c % NCOL == (NCOL-1)) + fputs("\n", fp); + else + fputs(" ", fp); + } + fprintf(fp, "};\n"); + return 0; +} + +extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED) +{ + int i; + FILE* fp = stdout; + + setlocale(LC_ALL, "C"); + /* setlocale(LC_ALL, "POSIX"); */ + /* setlocale(LC_ALL, "en_GB.iso88591"); */ + /* setlocale(LC_ALL, "de_BE.iso88591"); */ + /* setlocale(LC_ALL, "fr_FR.iso88591"); */ + + for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) { + exec(fp, &Info[i]); + } + + return 0; +} diff --git a/oniguruma/enc/sjis.c b/oniguruma/enc/sjis.c new file mode 100644 index 0000000..7a54c9f --- /dev/null +++ b/oniguruma/enc/sjis.c @@ -0,0 +1,318 @@ +/********************************************************************** + sjis.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +static const int EncLen_SJIS[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 +}; + +static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 +}; + +#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1) +#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] + +static int +mbc_enc_len(const UChar* p) +{ + return EncLen_SJIS[*p]; +} + +static int +code_to_mbclen(OnigCodePoint code) +{ + if (code < 256) { + if (EncLen_SJIS[(int )code] == 1) + return 1; + else + return 0; + } + else if (code <= 0xffff) { + return 2; + } + else + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(ONIG_ENCODING_SJIS, p); + c = *p++; + n = c; + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +static int +code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); + *p++ = (UChar )(code & 0xff); + +#if 0 + if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf)) + return REGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +static int +mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + int len = enclen(ONIG_ENCODING_SJIS, p); + + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted char to lower */ + } +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end); + +} +#endif + +#if 0 +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} +#endif + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + int len; + + if (s <= start) return (UChar* )s; + p = s; + + if (SJIS_ISMB_TRAIL(*p)) { + while (p > start) { + if (! SJIS_ISMB_FIRST(*--p)) { + p++; + break; + } + } + } + len = enclen(ONIG_ENCODING_SJIS, p); + if (p + len > s) return (UChar* )p; + p += len; + return (UChar* )(p + ((s - p) & ~1)); +} + +static int +is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED) +{ + const UChar c = *s; + return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE); +} + + +static int PropertyInited = 0; +static const OnigCodePoint** PropertyList; +static int PropertyListNum; +static int PropertyListSize; +static hash_table_type* PropertyNameTable; + +static const OnigCodePoint CR_Hiragana[] = { + 1, + 0x829f, 0x82f1 +}; /* CR_Hiragana */ + +static const OnigCodePoint CR_Katakana[] = { + 4, + 0x00a6, 0x00af, + 0x00b1, 0x00dd, + 0x8340, 0x837e, + 0x8380, 0x8396, +}; /* CR_Katakana */ + +static int +init_property_list(void) +{ + int r; + + PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana); + PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana); + PropertyInited = 1; + + end: + return r; +} + +static int +property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + hash_data_type ctype; + + PROPERTY_LIST_INIT_CHECK; + + if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) { + return onigenc_minimum_property_name_to_ctype(enc, p, end); + } + + return (int )ctype; +} + +static int +is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (code_to_mbclen(code) > 1 ? TRUE : FALSE); + } + } + } + else { + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (unsigned int )PropertyListNum) + return ONIGERR_TYPE_BUG; + + return onig_is_in_code_range((UChar* )PropertyList[ctype], code); + } + + return FALSE; +} + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + if (ctype <= ONIGENC_MAX_STD_CTYPE) { + return ONIG_NO_SUPPORT_CONFIG; + } + else { + *sb_out = 0x80; + + PROPERTY_LIST_INIT_CHECK; + + ctype -= (ONIGENC_MAX_STD_CTYPE + 1); + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; + + *ranges = PropertyList[ctype]; + return 0; + } +} + +OnigEncodingType OnigEncodingSJIS = { + mbc_enc_len, + "Shift_JIS", /* name */ + 2, /* max byte length */ + 1, /* min byte length */ + onigenc_is_mbc_newline_0x0a, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + property_name_to_ctype, + is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + is_allowed_reverse_match +}; diff --git a/oniguruma/enc/unicode.c b/oniguruma/enc/unicode.c new file mode 100644 index 0000000..af7a86e --- /dev/null +++ b/oniguruma/enc/unicode.c @@ -0,0 +1,11356 @@ +/********************************************************************** + unicode.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ + ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) +#if 0 +#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \ + ((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0) +#endif + +static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, + 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0, + 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0, + 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0, + 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, + 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 +}; + +/* 'NEWLINE' */ +static const OnigCodePoint CR_NEWLINE[] = { + 1, + 0x000a, 0x000a +}; /* CR_NEWLINE */ + +/* 'Alpha': [[:Alpha:]] */ +static const OnigCodePoint CR_Alpha[] = { + 418, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09f0, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a70, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x180b, 0x180d, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Alpha */ + +/* 'Blank': [[:Blank:]] */ +static const OnigCodePoint CR_Blank[] = { + 9, + 0x0009, 0x0009, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Blank */ + +/* 'Cntrl': [[:Cntrl:]] */ +static const OnigCodePoint CR_Cntrl[] = { + 19, + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xd800, 0xf8ff, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Cntrl */ + +/* 'Digit': [[:Digit:]] */ +static const OnigCodePoint CR_Digit[] = { + 23, + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +}; /* CR_Digit */ + +/* 'Graph': [[:Graph:]] */ +static const OnigCodePoint CR_Graph[] = { + 424, + 0x0021, 0x007e, + 0x00a1, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1681, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x200b, 0x2027, + 0x202a, 0x202e, + 0x2030, 0x205e, + 0x2060, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3001, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Graph */ + +/* 'Lower': [[:Lower:]] */ +static const OnigCodePoint CR_Lower[] = { + 480, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x2c30, 0x2c5e, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +}; /* CR_Lower */ + +/* 'Print': [[:Print:]] */ +static const OnigCodePoint CR_Print[] = { + 423, + 0x0009, 0x000d, + 0x0020, 0x007e, + 0x0085, 0x0085, + 0x00a0, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xe000, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Print */ + +/* 'Punct': [[:Punct:]] */ +static const OnigCodePoint CR_Punct[] = { + 96, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d, + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x205e, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27c5, 0x27c6, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_Punct */ + +/* 'Space': [[:Space:]] */ +static const OnigCodePoint CR_Space[] = { + 11, + 0x0009, 0x000d, + 0x0020, 0x0020, + 0x0085, 0x0085, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Space */ + +/* 'Upper': [[:Upper:]] */ +static const OnigCodePoint CR_Upper[] = { + 476, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2c00, 0x2c2e, + 0x2c80, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +}; /* CR_Upper */ + +/* 'XDigit': [[:XDigit:]] */ +static const OnigCodePoint CR_XDigit[] = { + 3, + 0x0030, 0x0039, + 0x0041, 0x0046, + 0x0061, 0x0066 +}; /* CR_XDigit */ + +/* 'Word': [[:Word:]] */ +static const OnigCodePoint CR_Word[] = { + 464, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x005f, 0x005f, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b2, 0x00b3, + 0x00b5, 0x00b5, + 0x00b9, 0x00ba, + 0x00bc, 0x00be, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x09f4, 0x09f9, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bf2, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f33, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1369, 0x137c, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0x2070, 0x2071, + 0x2074, 0x2079, + 0x207f, 0x2089, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2cfd, 0x2cfd, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3007, + 0x3021, 0x302f, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3192, 0x3195, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff3f, 0xff3f, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Word */ + +/* 'Alnum': [[:Alnum:]] */ +static const OnigCodePoint CR_Alnum[] = { + 436, + 0x0030, 0x0039, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x0300, 0x036f, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x0483, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0610, 0x0615, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x0669, + 0x066e, 0x06d3, + 0x06d5, 0x06dc, + 0x06de, 0x06e8, + 0x06ea, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09f1, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b6f, + 0x0b71, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bef, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e4e, + 0x0e50, 0x0e59, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f18, 0x0f19, + 0x0f20, 0x0f29, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f84, + 0x0f86, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1049, + 0x1050, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x135f, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1734, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17b3, + 0x17b6, 0x17d3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dd, + 0x17e0, 0x17e9, + 0x180b, 0x180d, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x1a00, 0x1a1b, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x20d0, 0x20eb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x302a, 0x302f, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x3099, 0x309a, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa827, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0100, 0xe01ef +}; /* CR_Alnum */ + +/* 'ASCII': [[:ASCII:]] */ +static const OnigCodePoint CR_ASCII[] = { + 1, + 0x0000, 0x007f +}; /* CR_ASCII */ + +#ifdef USE_UNICODE_PROPERTIES + +/* 'Any': - */ +static const OnigCodePoint CR_Any[] = { + 1, + 0x0000, 0x10ffff +}; /* CR_Any */ + +/* 'Assigned': - */ +static const OnigCodePoint CR_Assigned[] = { + 420, + 0x0000, 0x0241, + 0x0250, 0x036f, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x037e, 0x037e, + 0x0384, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x0589, 0x058a, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0x0600, 0x0603, + 0x060b, 0x0615, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x0621, 0x063a, + 0x0640, 0x065e, + 0x0660, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x076d, + 0x0780, 0x07b1, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0970, + 0x097d, 0x097d, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4, + 0x0e01, 0x0e3a, + 0x0e3f, 0x0e5b, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059, + 0x10a0, 0x10c5, + 0x10d0, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x13a0, 0x13f4, + 0x1401, 0x1676, + 0x1680, 0x169c, + 0x16a0, 0x16f0, + 0x1700, 0x170c, + 0x170e, 0x1714, + 0x1720, 0x1736, + 0x1740, 0x1753, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x1a1b, + 0x1a1e, 0x1a1f, + 0x1d00, 0x1dc3, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2000, 0x2063, + 0x206a, 0x2071, + 0x2074, 0x208e, + 0x2090, 0x2094, + 0x20a0, 0x20b5, + 0x20d0, 0x20eb, + 0x2100, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x2b13, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2cea, + 0x2cf9, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3000, 0x303f, + 0x3041, 0x3096, + 0x3099, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x3190, 0x31b7, + 0x31c0, 0x31cf, + 0x31f0, 0x321e, + 0x3220, 0x3243, + 0x3250, 0x32fe, + 0x3300, 0x4db5, + 0x4dc0, 0x9fbb, + 0xa000, 0xa48c, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa800, 0xa82b, + 0xac00, 0xd7a3, + 0xd800, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3f, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe00, 0xfe19, + 0xfe20, 0xfe23, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xfeff, 0xfeff, + 0xff01, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1018a, + 0x10300, 0x1031e, + 0x10320, 0x10323, + 0x10330, 0x1034a, + 0x10380, 0x1039d, + 0x1039f, 0x103c3, + 0x103c8, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d1dd, + 0x1d200, 0x1d245, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xe0100, 0xe01ef, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Assigned */ + +/* 'C': Major Category */ +static const OnigCodePoint CR_C[] = { + 422, + 0x0000, 0x001f, + 0x007f, 0x009f, + 0x00ad, 0x00ad, + 0x0242, 0x024f, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037b, 0x037d, + 0x037f, 0x0383, + 0x038b, 0x038b, + 0x038d, 0x038d, + 0x03a2, 0x03a2, + 0x03cf, 0x03cf, + 0x0487, 0x0487, + 0x04cf, 0x04cf, + 0x04fa, 0x04ff, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058b, 0x0590, + 0x05ba, 0x05ba, + 0x05c8, 0x05cf, + 0x05eb, 0x05ef, + 0x05f5, 0x060a, + 0x0616, 0x061a, + 0x061c, 0x061d, + 0x0620, 0x0620, + 0x063b, 0x063f, + 0x065f, 0x065f, + 0x06dd, 0x06dd, + 0x070e, 0x070f, + 0x074b, 0x074c, + 0x076e, 0x077f, + 0x07b2, 0x0900, + 0x093a, 0x093b, + 0x094e, 0x094f, + 0x0955, 0x0957, + 0x0971, 0x097c, + 0x097e, 0x0980, + 0x0984, 0x0984, + 0x098d, 0x098e, + 0x0991, 0x0992, + 0x09a9, 0x09a9, + 0x09b1, 0x09b1, + 0x09b3, 0x09b5, + 0x09ba, 0x09bb, + 0x09c5, 0x09c6, + 0x09c9, 0x09ca, + 0x09cf, 0x09d6, + 0x09d8, 0x09db, + 0x09de, 0x09de, + 0x09e4, 0x09e5, + 0x09fb, 0x0a00, + 0x0a04, 0x0a04, + 0x0a0b, 0x0a0e, + 0x0a11, 0x0a12, + 0x0a29, 0x0a29, + 0x0a31, 0x0a31, + 0x0a34, 0x0a34, + 0x0a37, 0x0a37, + 0x0a3a, 0x0a3b, + 0x0a3d, 0x0a3d, + 0x0a43, 0x0a46, + 0x0a49, 0x0a4a, + 0x0a4e, 0x0a58, + 0x0a5d, 0x0a5d, + 0x0a5f, 0x0a65, + 0x0a75, 0x0a80, + 0x0a84, 0x0a84, + 0x0a8e, 0x0a8e, + 0x0a92, 0x0a92, + 0x0aa9, 0x0aa9, + 0x0ab1, 0x0ab1, + 0x0ab4, 0x0ab4, + 0x0aba, 0x0abb, + 0x0ac6, 0x0ac6, + 0x0aca, 0x0aca, + 0x0ace, 0x0acf, + 0x0ad1, 0x0adf, + 0x0ae4, 0x0ae5, + 0x0af0, 0x0af0, + 0x0af2, 0x0b00, + 0x0b04, 0x0b04, + 0x0b0d, 0x0b0e, + 0x0b11, 0x0b12, + 0x0b29, 0x0b29, + 0x0b31, 0x0b31, + 0x0b34, 0x0b34, + 0x0b3a, 0x0b3b, + 0x0b44, 0x0b46, + 0x0b49, 0x0b4a, + 0x0b4e, 0x0b55, + 0x0b58, 0x0b5b, + 0x0b5e, 0x0b5e, + 0x0b62, 0x0b65, + 0x0b72, 0x0b81, + 0x0b84, 0x0b84, + 0x0b8b, 0x0b8d, + 0x0b91, 0x0b91, + 0x0b96, 0x0b98, + 0x0b9b, 0x0b9b, + 0x0b9d, 0x0b9d, + 0x0ba0, 0x0ba2, + 0x0ba5, 0x0ba7, + 0x0bab, 0x0bad, + 0x0bba, 0x0bbd, + 0x0bc3, 0x0bc5, + 0x0bc9, 0x0bc9, + 0x0bce, 0x0bd6, + 0x0bd8, 0x0be5, + 0x0bfb, 0x0c00, + 0x0c04, 0x0c04, + 0x0c0d, 0x0c0d, + 0x0c11, 0x0c11, + 0x0c29, 0x0c29, + 0x0c34, 0x0c34, + 0x0c3a, 0x0c3d, + 0x0c45, 0x0c45, + 0x0c49, 0x0c49, + 0x0c4e, 0x0c54, + 0x0c57, 0x0c5f, + 0x0c62, 0x0c65, + 0x0c70, 0x0c81, + 0x0c84, 0x0c84, + 0x0c8d, 0x0c8d, + 0x0c91, 0x0c91, + 0x0ca9, 0x0ca9, + 0x0cb4, 0x0cb4, + 0x0cba, 0x0cbb, + 0x0cc5, 0x0cc5, + 0x0cc9, 0x0cc9, + 0x0cce, 0x0cd4, + 0x0cd7, 0x0cdd, + 0x0cdf, 0x0cdf, + 0x0ce2, 0x0ce5, + 0x0cf0, 0x0d01, + 0x0d04, 0x0d04, + 0x0d0d, 0x0d0d, + 0x0d11, 0x0d11, + 0x0d29, 0x0d29, + 0x0d3a, 0x0d3d, + 0x0d44, 0x0d45, + 0x0d49, 0x0d49, + 0x0d4e, 0x0d56, + 0x0d58, 0x0d5f, + 0x0d62, 0x0d65, + 0x0d70, 0x0d81, + 0x0d84, 0x0d84, + 0x0d97, 0x0d99, + 0x0db2, 0x0db2, + 0x0dbc, 0x0dbc, + 0x0dbe, 0x0dbf, + 0x0dc7, 0x0dc9, + 0x0dcb, 0x0dce, + 0x0dd5, 0x0dd5, + 0x0dd7, 0x0dd7, + 0x0de0, 0x0df1, + 0x0df5, 0x0e00, + 0x0e3b, 0x0e3e, + 0x0e5c, 0x0e80, + 0x0e83, 0x0e83, + 0x0e85, 0x0e86, + 0x0e89, 0x0e89, + 0x0e8b, 0x0e8c, + 0x0e8e, 0x0e93, + 0x0e98, 0x0e98, + 0x0ea0, 0x0ea0, + 0x0ea4, 0x0ea4, + 0x0ea6, 0x0ea6, + 0x0ea8, 0x0ea9, + 0x0eac, 0x0eac, + 0x0eba, 0x0eba, + 0x0ebe, 0x0ebf, + 0x0ec5, 0x0ec5, + 0x0ec7, 0x0ec7, + 0x0ece, 0x0ecf, + 0x0eda, 0x0edb, + 0x0ede, 0x0eff, + 0x0f48, 0x0f48, + 0x0f6b, 0x0f70, + 0x0f8c, 0x0f8f, + 0x0f98, 0x0f98, + 0x0fbd, 0x0fbd, + 0x0fcd, 0x0fce, + 0x0fd2, 0x0fff, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102b, 0x102b, + 0x1033, 0x1035, + 0x103a, 0x103f, + 0x105a, 0x109f, + 0x10c6, 0x10cf, + 0x10fd, 0x10ff, + 0x115a, 0x115e, + 0x11a3, 0x11a7, + 0x11fa, 0x11ff, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135e, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x1400, + 0x1677, 0x167f, + 0x169d, 0x169f, + 0x16f1, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17b4, 0x17b5, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18aa, 0x18ff, + 0x191d, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19aa, 0x19af, + 0x19ca, 0x19cf, + 0x19da, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a20, 0x1cff, + 0x1dc4, 0x1dff, + 0x1e9c, 0x1e9f, + 0x1efa, 0x1eff, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x206f, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x2095, 0x209f, + 0x20b6, 0x20cf, + 0x20ec, 0x20ff, + 0x214d, 0x2152, + 0x2184, 0x218f, + 0x23dc, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x269d, 0x269f, + 0x26b2, 0x2700, + 0x2705, 0x2705, + 0x270a, 0x270b, + 0x2728, 0x2728, + 0x274c, 0x274c, + 0x274e, 0x274e, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275f, 0x2760, + 0x2795, 0x2797, + 0x27b0, 0x27b0, + 0x27bf, 0x27bf, + 0x27c7, 0x27cf, + 0x27ec, 0x27ef, + 0x2b14, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c7f, + 0x2ceb, 0x2cf8, + 0x2d26, 0x2d2f, + 0x2d66, 0x2d6e, + 0x2d70, 0x2d7f, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2dff, + 0x2e18, 0x2e1b, + 0x2e1e, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312d, 0x3130, + 0x318f, 0x318f, + 0x31b8, 0x31bf, + 0x31d0, 0x31ef, + 0x321f, 0x321f, + 0x3244, 0x324f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fbc, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa6ff, + 0xa717, 0xa7ff, + 0xa82c, 0xabff, + 0xd7a4, 0xf8ff, + 0xfa2e, 0xfa2f, + 0xfa6b, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbb2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe24, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfffb, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018b, 0x102ff, + 0x1031f, 0x1031f, + 0x10324, 0x1032f, + 0x1034b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10840, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d129, + 0x1d173, 0x1d17a, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7ca, 0x1d7cd, + 0x1d800, 0x1ffff, + 0x2a6d7, 0x2f7ff, + 0x2fa1e, 0xe00ff, + 0xe01f0, 0x10ffff +}; /* CR_C */ + +/* 'Cc': General Category */ +static const OnigCodePoint CR_Cc[] = { + 2, + 0x0000, 0x001f, + 0x007f, 0x009f +}; /* CR_Cc */ + +/* 'Cf': General Category */ +static const OnigCodePoint CR_Cf[] = { + 14, + 0x00ad, 0x00ad, + 0x0600, 0x0603, + 0x06dd, 0x06dd, + 0x070f, 0x070f, + 0x17b4, 0x17b5, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2063, + 0x206a, 0x206f, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f +}; /* CR_Cf */ + +/* 'Cn': General Category */ +static const OnigCodePoint CR_Cn[] = { + 420, + 0x0242, 0x024f, + 0x0370, 0x0373, + 0x0376, 0x0379, + 0x037b, 0x037d, + 0x037f, 0x0383, + 0x038b, 0x038b, + 0x038d, 0x038d, + 0x03a2, 0x03a2, + 0x03cf, 0x03cf, + 0x0487, 0x0487, + 0x04cf, 0x04cf, + 0x04fa, 0x04ff, + 0x0510, 0x0530, + 0x0557, 0x0558, + 0x0560, 0x0560, + 0x0588, 0x0588, + 0x058b, 0x0590, + 0x05ba, 0x05ba, + 0x05c8, 0x05cf, + 0x05eb, 0x05ef, + 0x05f5, 0x05ff, + 0x0604, 0x060a, + 0x0616, 0x061a, + 0x061c, 0x061d, + 0x0620, 0x0620, + 0x063b, 0x063f, + 0x065f, 0x065f, + 0x070e, 0x070e, + 0x074b, 0x074c, + 0x076e, 0x077f, + 0x07b2, 0x0900, + 0x093a, 0x093b, + 0x094e, 0x094f, + 0x0955, 0x0957, + 0x0971, 0x097c, + 0x097e, 0x0980, + 0x0984, 0x0984, + 0x098d, 0x098e, + 0x0991, 0x0992, + 0x09a9, 0x09a9, + 0x09b1, 0x09b1, + 0x09b3, 0x09b5, + 0x09ba, 0x09bb, + 0x09c5, 0x09c6, + 0x09c9, 0x09ca, + 0x09cf, 0x09d6, + 0x09d8, 0x09db, + 0x09de, 0x09de, + 0x09e4, 0x09e5, + 0x09fb, 0x0a00, + 0x0a04, 0x0a04, + 0x0a0b, 0x0a0e, + 0x0a11, 0x0a12, + 0x0a29, 0x0a29, + 0x0a31, 0x0a31, + 0x0a34, 0x0a34, + 0x0a37, 0x0a37, + 0x0a3a, 0x0a3b, + 0x0a3d, 0x0a3d, + 0x0a43, 0x0a46, + 0x0a49, 0x0a4a, + 0x0a4e, 0x0a58, + 0x0a5d, 0x0a5d, + 0x0a5f, 0x0a65, + 0x0a75, 0x0a80, + 0x0a84, 0x0a84, + 0x0a8e, 0x0a8e, + 0x0a92, 0x0a92, + 0x0aa9, 0x0aa9, + 0x0ab1, 0x0ab1, + 0x0ab4, 0x0ab4, + 0x0aba, 0x0abb, + 0x0ac6, 0x0ac6, + 0x0aca, 0x0aca, + 0x0ace, 0x0acf, + 0x0ad1, 0x0adf, + 0x0ae4, 0x0ae5, + 0x0af0, 0x0af0, + 0x0af2, 0x0b00, + 0x0b04, 0x0b04, + 0x0b0d, 0x0b0e, + 0x0b11, 0x0b12, + 0x0b29, 0x0b29, + 0x0b31, 0x0b31, + 0x0b34, 0x0b34, + 0x0b3a, 0x0b3b, + 0x0b44, 0x0b46, + 0x0b49, 0x0b4a, + 0x0b4e, 0x0b55, + 0x0b58, 0x0b5b, + 0x0b5e, 0x0b5e, + 0x0b62, 0x0b65, + 0x0b72, 0x0b81, + 0x0b84, 0x0b84, + 0x0b8b, 0x0b8d, + 0x0b91, 0x0b91, + 0x0b96, 0x0b98, + 0x0b9b, 0x0b9b, + 0x0b9d, 0x0b9d, + 0x0ba0, 0x0ba2, + 0x0ba5, 0x0ba7, + 0x0bab, 0x0bad, + 0x0bba, 0x0bbd, + 0x0bc3, 0x0bc5, + 0x0bc9, 0x0bc9, + 0x0bce, 0x0bd6, + 0x0bd8, 0x0be5, + 0x0bfb, 0x0c00, + 0x0c04, 0x0c04, + 0x0c0d, 0x0c0d, + 0x0c11, 0x0c11, + 0x0c29, 0x0c29, + 0x0c34, 0x0c34, + 0x0c3a, 0x0c3d, + 0x0c45, 0x0c45, + 0x0c49, 0x0c49, + 0x0c4e, 0x0c54, + 0x0c57, 0x0c5f, + 0x0c62, 0x0c65, + 0x0c70, 0x0c81, + 0x0c84, 0x0c84, + 0x0c8d, 0x0c8d, + 0x0c91, 0x0c91, + 0x0ca9, 0x0ca9, + 0x0cb4, 0x0cb4, + 0x0cba, 0x0cbb, + 0x0cc5, 0x0cc5, + 0x0cc9, 0x0cc9, + 0x0cce, 0x0cd4, + 0x0cd7, 0x0cdd, + 0x0cdf, 0x0cdf, + 0x0ce2, 0x0ce5, + 0x0cf0, 0x0d01, + 0x0d04, 0x0d04, + 0x0d0d, 0x0d0d, + 0x0d11, 0x0d11, + 0x0d29, 0x0d29, + 0x0d3a, 0x0d3d, + 0x0d44, 0x0d45, + 0x0d49, 0x0d49, + 0x0d4e, 0x0d56, + 0x0d58, 0x0d5f, + 0x0d62, 0x0d65, + 0x0d70, 0x0d81, + 0x0d84, 0x0d84, + 0x0d97, 0x0d99, + 0x0db2, 0x0db2, + 0x0dbc, 0x0dbc, + 0x0dbe, 0x0dbf, + 0x0dc7, 0x0dc9, + 0x0dcb, 0x0dce, + 0x0dd5, 0x0dd5, + 0x0dd7, 0x0dd7, + 0x0de0, 0x0df1, + 0x0df5, 0x0e00, + 0x0e3b, 0x0e3e, + 0x0e5c, 0x0e80, + 0x0e83, 0x0e83, + 0x0e85, 0x0e86, + 0x0e89, 0x0e89, + 0x0e8b, 0x0e8c, + 0x0e8e, 0x0e93, + 0x0e98, 0x0e98, + 0x0ea0, 0x0ea0, + 0x0ea4, 0x0ea4, + 0x0ea6, 0x0ea6, + 0x0ea8, 0x0ea9, + 0x0eac, 0x0eac, + 0x0eba, 0x0eba, + 0x0ebe, 0x0ebf, + 0x0ec5, 0x0ec5, + 0x0ec7, 0x0ec7, + 0x0ece, 0x0ecf, + 0x0eda, 0x0edb, + 0x0ede, 0x0eff, + 0x0f48, 0x0f48, + 0x0f6b, 0x0f70, + 0x0f8c, 0x0f8f, + 0x0f98, 0x0f98, + 0x0fbd, 0x0fbd, + 0x0fcd, 0x0fce, + 0x0fd2, 0x0fff, + 0x1022, 0x1022, + 0x1028, 0x1028, + 0x102b, 0x102b, + 0x1033, 0x1035, + 0x103a, 0x103f, + 0x105a, 0x109f, + 0x10c6, 0x10cf, + 0x10fd, 0x10ff, + 0x115a, 0x115e, + 0x11a3, 0x11a7, + 0x11fa, 0x11ff, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135e, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x1400, + 0x1677, 0x167f, + 0x169d, 0x169f, + 0x16f1, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18aa, 0x18ff, + 0x191d, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19aa, 0x19af, + 0x19ca, 0x19cf, + 0x19da, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a20, 0x1cff, + 0x1dc4, 0x1dff, + 0x1e9c, 0x1e9f, + 0x1efa, 0x1eff, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x2064, 0x2069, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x2095, 0x209f, + 0x20b6, 0x20cf, + 0x20ec, 0x20ff, + 0x214d, 0x2152, + 0x2184, 0x218f, + 0x23dc, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x269d, 0x269f, + 0x26b2, 0x2700, + 0x2705, 0x2705, + 0x270a, 0x270b, + 0x2728, 0x2728, + 0x274c, 0x274c, + 0x274e, 0x274e, + 0x2753, 0x2755, + 0x2757, 0x2757, + 0x275f, 0x2760, + 0x2795, 0x2797, + 0x27b0, 0x27b0, + 0x27bf, 0x27bf, + 0x27c7, 0x27cf, + 0x27ec, 0x27ef, + 0x2b14, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c7f, + 0x2ceb, 0x2cf8, + 0x2d26, 0x2d2f, + 0x2d66, 0x2d6e, + 0x2d70, 0x2d7f, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2dff, + 0x2e18, 0x2e1b, + 0x2e1e, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312d, 0x3130, + 0x318f, 0x318f, + 0x31b8, 0x31bf, + 0x31d0, 0x31ef, + 0x321f, 0x321f, + 0x3244, 0x324f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fbc, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa6ff, + 0xa717, 0xa7ff, + 0xa82c, 0xabff, + 0xd7a4, 0xd7ff, + 0xfa2e, 0xfa2f, + 0xfa6b, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbb2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe24, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xfefe, + 0xff00, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfff8, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018b, 0x102ff, + 0x1031f, 0x1031f, + 0x10324, 0x1032f, + 0x1034b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10840, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d129, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7ca, 0x1d7cd, + 0x1d800, 0x1ffff, + 0x2a6d7, 0x2f7ff, + 0x2fa1e, 0xe0000, + 0xe0002, 0xe001f, + 0xe0080, 0xe00ff, + 0xe01f0, 0xeffff, + 0xffffe, 0xfffff, + 0x10fffe, 0x10ffff +}; /* CR_Cn */ + +/* 'Co': General Category */ +static const OnigCodePoint CR_Co[] = { + 3, + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Co */ + +/* 'Cs': General Category */ +static const OnigCodePoint CR_Cs[] = { + 1, + 0xd800, 0xdfff +}; /* CR_Cs */ + +/* 'L': Major Category */ +static const OnigCodePoint CR_L[] = { + 347, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x037a, 0x037a, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03f5, + 0x03f7, 0x0481, + 0x048a, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x0531, 0x0556, + 0x0559, 0x0559, + 0x0561, 0x0587, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063a, + 0x0640, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06e5, 0x06e6, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x076d, + 0x0780, 0x07a5, + 0x07b1, 0x07b1, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x097d, 0x097d, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d60, 0x0d61, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e46, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f88, 0x0f8b, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x1050, 0x1055, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1d00, 0x1dbf, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2131, + 0x2133, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c80, 0x2ce4, + 0x2d00, 0x2d25, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3005, 0x3006, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa48c, + 0xa800, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_L */ + +/* 'Ll': General Category */ +static const OnigCodePoint CR_Ll[] = { + 480, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00b5, 0x00b5, + 0x00ba, 0x00ba, + 0x00df, 0x00f6, + 0x00f8, 0x00ff, + 0x0101, 0x0101, + 0x0103, 0x0103, + 0x0105, 0x0105, + 0x0107, 0x0107, + 0x0109, 0x0109, + 0x010b, 0x010b, + 0x010d, 0x010d, + 0x010f, 0x010f, + 0x0111, 0x0111, + 0x0113, 0x0113, + 0x0115, 0x0115, + 0x0117, 0x0117, + 0x0119, 0x0119, + 0x011b, 0x011b, + 0x011d, 0x011d, + 0x011f, 0x011f, + 0x0121, 0x0121, + 0x0123, 0x0123, + 0x0125, 0x0125, + 0x0127, 0x0127, + 0x0129, 0x0129, + 0x012b, 0x012b, + 0x012d, 0x012d, + 0x012f, 0x012f, + 0x0131, 0x0131, + 0x0133, 0x0133, + 0x0135, 0x0135, + 0x0137, 0x0138, + 0x013a, 0x013a, + 0x013c, 0x013c, + 0x013e, 0x013e, + 0x0140, 0x0140, + 0x0142, 0x0142, + 0x0144, 0x0144, + 0x0146, 0x0146, + 0x0148, 0x0149, + 0x014b, 0x014b, + 0x014d, 0x014d, + 0x014f, 0x014f, + 0x0151, 0x0151, + 0x0153, 0x0153, + 0x0155, 0x0155, + 0x0157, 0x0157, + 0x0159, 0x0159, + 0x015b, 0x015b, + 0x015d, 0x015d, + 0x015f, 0x015f, + 0x0161, 0x0161, + 0x0163, 0x0163, + 0x0165, 0x0165, + 0x0167, 0x0167, + 0x0169, 0x0169, + 0x016b, 0x016b, + 0x016d, 0x016d, + 0x016f, 0x016f, + 0x0171, 0x0171, + 0x0173, 0x0173, + 0x0175, 0x0175, + 0x0177, 0x0177, + 0x017a, 0x017a, + 0x017c, 0x017c, + 0x017e, 0x0180, + 0x0183, 0x0183, + 0x0185, 0x0185, + 0x0188, 0x0188, + 0x018c, 0x018d, + 0x0192, 0x0192, + 0x0195, 0x0195, + 0x0199, 0x019b, + 0x019e, 0x019e, + 0x01a1, 0x01a1, + 0x01a3, 0x01a3, + 0x01a5, 0x01a5, + 0x01a8, 0x01a8, + 0x01aa, 0x01ab, + 0x01ad, 0x01ad, + 0x01b0, 0x01b0, + 0x01b4, 0x01b4, + 0x01b6, 0x01b6, + 0x01b9, 0x01ba, + 0x01bd, 0x01bf, + 0x01c6, 0x01c6, + 0x01c9, 0x01c9, + 0x01cc, 0x01cc, + 0x01ce, 0x01ce, + 0x01d0, 0x01d0, + 0x01d2, 0x01d2, + 0x01d4, 0x01d4, + 0x01d6, 0x01d6, + 0x01d8, 0x01d8, + 0x01da, 0x01da, + 0x01dc, 0x01dd, + 0x01df, 0x01df, + 0x01e1, 0x01e1, + 0x01e3, 0x01e3, + 0x01e5, 0x01e5, + 0x01e7, 0x01e7, + 0x01e9, 0x01e9, + 0x01eb, 0x01eb, + 0x01ed, 0x01ed, + 0x01ef, 0x01f0, + 0x01f3, 0x01f3, + 0x01f5, 0x01f5, + 0x01f9, 0x01f9, + 0x01fb, 0x01fb, + 0x01fd, 0x01fd, + 0x01ff, 0x01ff, + 0x0201, 0x0201, + 0x0203, 0x0203, + 0x0205, 0x0205, + 0x0207, 0x0207, + 0x0209, 0x0209, + 0x020b, 0x020b, + 0x020d, 0x020d, + 0x020f, 0x020f, + 0x0211, 0x0211, + 0x0213, 0x0213, + 0x0215, 0x0215, + 0x0217, 0x0217, + 0x0219, 0x0219, + 0x021b, 0x021b, + 0x021d, 0x021d, + 0x021f, 0x021f, + 0x0221, 0x0221, + 0x0223, 0x0223, + 0x0225, 0x0225, + 0x0227, 0x0227, + 0x0229, 0x0229, + 0x022b, 0x022b, + 0x022d, 0x022d, + 0x022f, 0x022f, + 0x0231, 0x0231, + 0x0233, 0x0239, + 0x023c, 0x023c, + 0x023f, 0x0240, + 0x0250, 0x02af, + 0x0390, 0x0390, + 0x03ac, 0x03ce, + 0x03d0, 0x03d1, + 0x03d5, 0x03d7, + 0x03d9, 0x03d9, + 0x03db, 0x03db, + 0x03dd, 0x03dd, + 0x03df, 0x03df, + 0x03e1, 0x03e1, + 0x03e3, 0x03e3, + 0x03e5, 0x03e5, + 0x03e7, 0x03e7, + 0x03e9, 0x03e9, + 0x03eb, 0x03eb, + 0x03ed, 0x03ed, + 0x03ef, 0x03f3, + 0x03f5, 0x03f5, + 0x03f8, 0x03f8, + 0x03fb, 0x03fc, + 0x0430, 0x045f, + 0x0461, 0x0461, + 0x0463, 0x0463, + 0x0465, 0x0465, + 0x0467, 0x0467, + 0x0469, 0x0469, + 0x046b, 0x046b, + 0x046d, 0x046d, + 0x046f, 0x046f, + 0x0471, 0x0471, + 0x0473, 0x0473, + 0x0475, 0x0475, + 0x0477, 0x0477, + 0x0479, 0x0479, + 0x047b, 0x047b, + 0x047d, 0x047d, + 0x047f, 0x047f, + 0x0481, 0x0481, + 0x048b, 0x048b, + 0x048d, 0x048d, + 0x048f, 0x048f, + 0x0491, 0x0491, + 0x0493, 0x0493, + 0x0495, 0x0495, + 0x0497, 0x0497, + 0x0499, 0x0499, + 0x049b, 0x049b, + 0x049d, 0x049d, + 0x049f, 0x049f, + 0x04a1, 0x04a1, + 0x04a3, 0x04a3, + 0x04a5, 0x04a5, + 0x04a7, 0x04a7, + 0x04a9, 0x04a9, + 0x04ab, 0x04ab, + 0x04ad, 0x04ad, + 0x04af, 0x04af, + 0x04b1, 0x04b1, + 0x04b3, 0x04b3, + 0x04b5, 0x04b5, + 0x04b7, 0x04b7, + 0x04b9, 0x04b9, + 0x04bb, 0x04bb, + 0x04bd, 0x04bd, + 0x04bf, 0x04bf, + 0x04c2, 0x04c2, + 0x04c4, 0x04c4, + 0x04c6, 0x04c6, + 0x04c8, 0x04c8, + 0x04ca, 0x04ca, + 0x04cc, 0x04cc, + 0x04ce, 0x04ce, + 0x04d1, 0x04d1, + 0x04d3, 0x04d3, + 0x04d5, 0x04d5, + 0x04d7, 0x04d7, + 0x04d9, 0x04d9, + 0x04db, 0x04db, + 0x04dd, 0x04dd, + 0x04df, 0x04df, + 0x04e1, 0x04e1, + 0x04e3, 0x04e3, + 0x04e5, 0x04e5, + 0x04e7, 0x04e7, + 0x04e9, 0x04e9, + 0x04eb, 0x04eb, + 0x04ed, 0x04ed, + 0x04ef, 0x04ef, + 0x04f1, 0x04f1, + 0x04f3, 0x04f3, + 0x04f5, 0x04f5, + 0x04f7, 0x04f7, + 0x04f9, 0x04f9, + 0x0501, 0x0501, + 0x0503, 0x0503, + 0x0505, 0x0505, + 0x0507, 0x0507, + 0x0509, 0x0509, + 0x050b, 0x050b, + 0x050d, 0x050d, + 0x050f, 0x050f, + 0x0561, 0x0587, + 0x1d00, 0x1d2b, + 0x1d62, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9b, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1f00, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x2c30, 0x2c5e, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2d00, 0x2d25, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9 +}; /* CR_Ll */ + +/* 'Lm': General Category */ +static const OnigCodePoint CR_Lm[] = { + 26, + 0x02b0, 0x02c1, + 0x02c6, 0x02d1, + 0x02e0, 0x02e4, + 0x02ee, 0x02ee, + 0x037a, 0x037a, + 0x0559, 0x0559, + 0x0640, 0x0640, + 0x06e5, 0x06e6, + 0x0e46, 0x0e46, + 0x0ec6, 0x0ec6, + 0x10fc, 0x10fc, + 0x17d7, 0x17d7, + 0x1843, 0x1843, + 0x1d2c, 0x1d61, + 0x1d78, 0x1d78, + 0x1d9b, 0x1dbf, + 0x2090, 0x2094, + 0x2d6f, 0x2d6f, + 0x3005, 0x3005, + 0x3031, 0x3035, + 0x303b, 0x303b, + 0x309d, 0x309e, + 0x30fc, 0x30fe, + 0xa015, 0xa015, + 0xff70, 0xff70, + 0xff9e, 0xff9f +}; /* CR_Lm */ + +/* 'Lo': General Category */ +static const OnigCodePoint CR_Lo[] = { + 245, + 0x01bb, 0x01bb, + 0x01c0, 0x01c3, + 0x05d0, 0x05ea, + 0x05f0, 0x05f2, + 0x0621, 0x063a, + 0x0641, 0x064a, + 0x066e, 0x066f, + 0x0671, 0x06d3, + 0x06d5, 0x06d5, + 0x06ee, 0x06ef, + 0x06fa, 0x06fc, + 0x06ff, 0x06ff, + 0x0710, 0x0710, + 0x0712, 0x072f, + 0x074d, 0x076d, + 0x0780, 0x07a5, + 0x07b1, 0x07b1, + 0x0904, 0x0939, + 0x093d, 0x093d, + 0x0950, 0x0950, + 0x0958, 0x0961, + 0x097d, 0x097d, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bd, 0x09bd, + 0x09ce, 0x09ce, + 0x09dc, 0x09dd, + 0x09df, 0x09e1, + 0x09f0, 0x09f1, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a72, 0x0a74, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abd, 0x0abd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae1, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3d, 0x0b3d, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b71, 0x0b71, + 0x0b83, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c60, 0x0c61, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbd, 0x0cbd, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d60, 0x0d61, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0e01, 0x0e30, + 0x0e32, 0x0e33, + 0x0e40, 0x0e45, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb0, + 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0edc, 0x0edd, + 0x0f00, 0x0f00, + 0x0f40, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f88, 0x0f8b, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x1050, 0x1055, + 0x10d0, 0x10fa, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x1676, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17dc, 0x17dc, + 0x1820, 0x1842, + 0x1844, 0x1877, + 0x1880, 0x18a8, + 0x1900, 0x191c, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19a9, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x2135, 0x2138, + 0x2d30, 0x2d65, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3006, 0x3006, + 0x303c, 0x303c, + 0x3041, 0x3096, + 0x309f, 0x309f, + 0x30a1, 0x30fa, + 0x30ff, 0x30ff, + 0x3105, 0x312c, + 0x3131, 0x318e, + 0x31a0, 0x31b7, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xa000, 0xa014, + 0xa016, 0xa48c, + 0xa800, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xac00, 0xd7a3, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff66, 0xff6f, + 0xff71, 0xff9d, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10300, 0x1031e, + 0x10330, 0x10349, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10450, 0x1049d, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_Lo */ + +/* 'Lt': General Category */ +static const OnigCodePoint CR_Lt[] = { + 10, + 0x01c5, 0x01c5, + 0x01c8, 0x01c8, + 0x01cb, 0x01cb, + 0x01f2, 0x01f2, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fbc, 0x1fbc, + 0x1fcc, 0x1fcc, + 0x1ffc, 0x1ffc +}; /* CR_Lt */ + +/* 'Lu': General Category */ +static const OnigCodePoint CR_Lu[] = { + 476, + 0x0041, 0x005a, + 0x00c0, 0x00d6, + 0x00d8, 0x00de, + 0x0100, 0x0100, + 0x0102, 0x0102, + 0x0104, 0x0104, + 0x0106, 0x0106, + 0x0108, 0x0108, + 0x010a, 0x010a, + 0x010c, 0x010c, + 0x010e, 0x010e, + 0x0110, 0x0110, + 0x0112, 0x0112, + 0x0114, 0x0114, + 0x0116, 0x0116, + 0x0118, 0x0118, + 0x011a, 0x011a, + 0x011c, 0x011c, + 0x011e, 0x011e, + 0x0120, 0x0120, + 0x0122, 0x0122, + 0x0124, 0x0124, + 0x0126, 0x0126, + 0x0128, 0x0128, + 0x012a, 0x012a, + 0x012c, 0x012c, + 0x012e, 0x012e, + 0x0130, 0x0130, + 0x0132, 0x0132, + 0x0134, 0x0134, + 0x0136, 0x0136, + 0x0139, 0x0139, + 0x013b, 0x013b, + 0x013d, 0x013d, + 0x013f, 0x013f, + 0x0141, 0x0141, + 0x0143, 0x0143, + 0x0145, 0x0145, + 0x0147, 0x0147, + 0x014a, 0x014a, + 0x014c, 0x014c, + 0x014e, 0x014e, + 0x0150, 0x0150, + 0x0152, 0x0152, + 0x0154, 0x0154, + 0x0156, 0x0156, + 0x0158, 0x0158, + 0x015a, 0x015a, + 0x015c, 0x015c, + 0x015e, 0x015e, + 0x0160, 0x0160, + 0x0162, 0x0162, + 0x0164, 0x0164, + 0x0166, 0x0166, + 0x0168, 0x0168, + 0x016a, 0x016a, + 0x016c, 0x016c, + 0x016e, 0x016e, + 0x0170, 0x0170, + 0x0172, 0x0172, + 0x0174, 0x0174, + 0x0176, 0x0176, + 0x0178, 0x0179, + 0x017b, 0x017b, + 0x017d, 0x017d, + 0x0181, 0x0182, + 0x0184, 0x0184, + 0x0186, 0x0187, + 0x0189, 0x018b, + 0x018e, 0x0191, + 0x0193, 0x0194, + 0x0196, 0x0198, + 0x019c, 0x019d, + 0x019f, 0x01a0, + 0x01a2, 0x01a2, + 0x01a4, 0x01a4, + 0x01a6, 0x01a7, + 0x01a9, 0x01a9, + 0x01ac, 0x01ac, + 0x01ae, 0x01af, + 0x01b1, 0x01b3, + 0x01b5, 0x01b5, + 0x01b7, 0x01b8, + 0x01bc, 0x01bc, + 0x01c4, 0x01c4, + 0x01c7, 0x01c7, + 0x01ca, 0x01ca, + 0x01cd, 0x01cd, + 0x01cf, 0x01cf, + 0x01d1, 0x01d1, + 0x01d3, 0x01d3, + 0x01d5, 0x01d5, + 0x01d7, 0x01d7, + 0x01d9, 0x01d9, + 0x01db, 0x01db, + 0x01de, 0x01de, + 0x01e0, 0x01e0, + 0x01e2, 0x01e2, + 0x01e4, 0x01e4, + 0x01e6, 0x01e6, + 0x01e8, 0x01e8, + 0x01ea, 0x01ea, + 0x01ec, 0x01ec, + 0x01ee, 0x01ee, + 0x01f1, 0x01f1, + 0x01f4, 0x01f4, + 0x01f6, 0x01f8, + 0x01fa, 0x01fa, + 0x01fc, 0x01fc, + 0x01fe, 0x01fe, + 0x0200, 0x0200, + 0x0202, 0x0202, + 0x0204, 0x0204, + 0x0206, 0x0206, + 0x0208, 0x0208, + 0x020a, 0x020a, + 0x020c, 0x020c, + 0x020e, 0x020e, + 0x0210, 0x0210, + 0x0212, 0x0212, + 0x0214, 0x0214, + 0x0216, 0x0216, + 0x0218, 0x0218, + 0x021a, 0x021a, + 0x021c, 0x021c, + 0x021e, 0x021e, + 0x0220, 0x0220, + 0x0222, 0x0222, + 0x0224, 0x0224, + 0x0226, 0x0226, + 0x0228, 0x0228, + 0x022a, 0x022a, + 0x022c, 0x022c, + 0x022e, 0x022e, + 0x0230, 0x0230, + 0x0232, 0x0232, + 0x023a, 0x023b, + 0x023d, 0x023e, + 0x0241, 0x0241, + 0x0386, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x038f, + 0x0391, 0x03a1, + 0x03a3, 0x03ab, + 0x03d2, 0x03d4, + 0x03d8, 0x03d8, + 0x03da, 0x03da, + 0x03dc, 0x03dc, + 0x03de, 0x03de, + 0x03e0, 0x03e0, + 0x03e2, 0x03e2, + 0x03e4, 0x03e4, + 0x03e6, 0x03e6, + 0x03e8, 0x03e8, + 0x03ea, 0x03ea, + 0x03ec, 0x03ec, + 0x03ee, 0x03ee, + 0x03f4, 0x03f4, + 0x03f7, 0x03f7, + 0x03f9, 0x03fa, + 0x03fd, 0x042f, + 0x0460, 0x0460, + 0x0462, 0x0462, + 0x0464, 0x0464, + 0x0466, 0x0466, + 0x0468, 0x0468, + 0x046a, 0x046a, + 0x046c, 0x046c, + 0x046e, 0x046e, + 0x0470, 0x0470, + 0x0472, 0x0472, + 0x0474, 0x0474, + 0x0476, 0x0476, + 0x0478, 0x0478, + 0x047a, 0x047a, + 0x047c, 0x047c, + 0x047e, 0x047e, + 0x0480, 0x0480, + 0x048a, 0x048a, + 0x048c, 0x048c, + 0x048e, 0x048e, + 0x0490, 0x0490, + 0x0492, 0x0492, + 0x0494, 0x0494, + 0x0496, 0x0496, + 0x0498, 0x0498, + 0x049a, 0x049a, + 0x049c, 0x049c, + 0x049e, 0x049e, + 0x04a0, 0x04a0, + 0x04a2, 0x04a2, + 0x04a4, 0x04a4, + 0x04a6, 0x04a6, + 0x04a8, 0x04a8, + 0x04aa, 0x04aa, + 0x04ac, 0x04ac, + 0x04ae, 0x04ae, + 0x04b0, 0x04b0, + 0x04b2, 0x04b2, + 0x04b4, 0x04b4, + 0x04b6, 0x04b6, + 0x04b8, 0x04b8, + 0x04ba, 0x04ba, + 0x04bc, 0x04bc, + 0x04be, 0x04be, + 0x04c0, 0x04c1, + 0x04c3, 0x04c3, + 0x04c5, 0x04c5, + 0x04c7, 0x04c7, + 0x04c9, 0x04c9, + 0x04cb, 0x04cb, + 0x04cd, 0x04cd, + 0x04d0, 0x04d0, + 0x04d2, 0x04d2, + 0x04d4, 0x04d4, + 0x04d6, 0x04d6, + 0x04d8, 0x04d8, + 0x04da, 0x04da, + 0x04dc, 0x04dc, + 0x04de, 0x04de, + 0x04e0, 0x04e0, + 0x04e2, 0x04e2, + 0x04e4, 0x04e4, + 0x04e6, 0x04e6, + 0x04e8, 0x04e8, + 0x04ea, 0x04ea, + 0x04ec, 0x04ec, + 0x04ee, 0x04ee, + 0x04f0, 0x04f0, + 0x04f2, 0x04f2, + 0x04f4, 0x04f4, + 0x04f6, 0x04f6, + 0x04f8, 0x04f8, + 0x0500, 0x0500, + 0x0502, 0x0502, + 0x0504, 0x0504, + 0x0506, 0x0506, + 0x0508, 0x0508, + 0x050a, 0x050a, + 0x050c, 0x050c, + 0x050e, 0x050e, + 0x0531, 0x0556, + 0x10a0, 0x10c5, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2131, + 0x2133, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2c00, 0x2c2e, + 0x2c80, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8 +}; /* CR_Lu */ + +/* 'M': Major Category */ +static const OnigCodePoint CR_M[] = { + 133, + 0x0300, 0x036f, + 0x0483, 0x0486, + 0x0488, 0x0489, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x0615, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06de, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x0901, 0x0903, + 0x093c, 0x093c, + 0x093e, 0x094d, + 0x0951, 0x0954, + 0x0962, 0x0963, + 0x0981, 0x0983, + 0x09bc, 0x09bc, + 0x09be, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09cd, + 0x09d7, 0x09d7, + 0x09e2, 0x09e3, + 0x0a01, 0x0a03, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a70, 0x0a71, + 0x0a81, 0x0a83, + 0x0abc, 0x0abc, + 0x0abe, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b03, + 0x0b3c, 0x0b3c, + 0x0b3e, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b82, 0x0b82, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c82, 0x0c83, + 0x0cbc, 0x0cbc, + 0x0cbe, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d82, 0x0d83, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f3e, 0x0f3f, + 0x0f71, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1056, 0x1059, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b6, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a17, 0x1a1b, + 0x1dc0, 0x1dc3, + 0x20d0, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa823, 0xa827, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef +}; /* CR_M */ + +/* 'Mc': General Category */ +static const OnigCodePoint CR_Mc[] = { + 63, + 0x0903, 0x0903, + 0x093e, 0x0940, + 0x0949, 0x094c, + 0x0982, 0x0983, + 0x09be, 0x09c0, + 0x09c7, 0x09c8, + 0x09cb, 0x09cc, + 0x09d7, 0x09d7, + 0x0a03, 0x0a03, + 0x0a3e, 0x0a40, + 0x0a83, 0x0a83, + 0x0abe, 0x0ac0, + 0x0ac9, 0x0ac9, + 0x0acb, 0x0acc, + 0x0b02, 0x0b03, + 0x0b3e, 0x0b3e, + 0x0b40, 0x0b40, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4c, + 0x0b57, 0x0b57, + 0x0bbe, 0x0bbf, + 0x0bc1, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcc, + 0x0bd7, 0x0bd7, + 0x0c01, 0x0c03, + 0x0c41, 0x0c44, + 0x0c82, 0x0c83, + 0x0cbe, 0x0cbe, + 0x0cc0, 0x0cc4, + 0x0cc7, 0x0cc8, + 0x0cca, 0x0ccb, + 0x0cd5, 0x0cd6, + 0x0d02, 0x0d03, + 0x0d3e, 0x0d40, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4c, + 0x0d57, 0x0d57, + 0x0d82, 0x0d83, + 0x0dcf, 0x0dd1, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df3, + 0x0f3e, 0x0f3f, + 0x0f7f, 0x0f7f, + 0x102c, 0x102c, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x1056, 0x1057, + 0x17b6, 0x17b6, + 0x17be, 0x17c5, + 0x17c7, 0x17c8, + 0x1923, 0x1926, + 0x1929, 0x192b, + 0x1930, 0x1931, + 0x1933, 0x1938, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a19, 0x1a1b, + 0xa802, 0xa802, + 0xa823, 0xa824, + 0xa827, 0xa827, + 0x1d165, 0x1d166, + 0x1d16d, 0x1d172 +}; /* CR_Mc */ + +/* 'Me': General Category */ +static const OnigCodePoint CR_Me[] = { + 4, + 0x0488, 0x0489, + 0x06de, 0x06de, + 0x20dd, 0x20e0, + 0x20e2, 0x20e4 +}; /* CR_Me */ + +/* 'Mn': General Category */ +static const OnigCodePoint CR_Mn[] = { + 124, + 0x0300, 0x036f, + 0x0483, 0x0486, + 0x0591, 0x05b9, + 0x05bb, 0x05bd, + 0x05bf, 0x05bf, + 0x05c1, 0x05c2, + 0x05c4, 0x05c5, + 0x05c7, 0x05c7, + 0x0610, 0x0615, + 0x064b, 0x065e, + 0x0670, 0x0670, + 0x06d6, 0x06dc, + 0x06df, 0x06e4, + 0x06e7, 0x06e8, + 0x06ea, 0x06ed, + 0x0711, 0x0711, + 0x0730, 0x074a, + 0x07a6, 0x07b0, + 0x0901, 0x0902, + 0x093c, 0x093c, + 0x0941, 0x0948, + 0x094d, 0x094d, + 0x0951, 0x0954, + 0x0962, 0x0963, + 0x0981, 0x0981, + 0x09bc, 0x09bc, + 0x09c1, 0x09c4, + 0x09cd, 0x09cd, + 0x09e2, 0x09e3, + 0x0a01, 0x0a02, + 0x0a3c, 0x0a3c, + 0x0a41, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a70, 0x0a71, + 0x0a81, 0x0a82, + 0x0abc, 0x0abc, + 0x0ac1, 0x0ac5, + 0x0ac7, 0x0ac8, + 0x0acd, 0x0acd, + 0x0ae2, 0x0ae3, + 0x0b01, 0x0b01, + 0x0b3c, 0x0b3c, + 0x0b3f, 0x0b3f, + 0x0b41, 0x0b43, + 0x0b4d, 0x0b4d, + 0x0b56, 0x0b56, + 0x0b82, 0x0b82, + 0x0bc0, 0x0bc0, + 0x0bcd, 0x0bcd, + 0x0c3e, 0x0c40, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0cbc, 0x0cbc, + 0x0cbf, 0x0cbf, + 0x0cc6, 0x0cc6, + 0x0ccc, 0x0ccd, + 0x0d41, 0x0d43, + 0x0d4d, 0x0d4d, + 0x0dca, 0x0dca, + 0x0dd2, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0e31, 0x0e31, + 0x0e34, 0x0e3a, + 0x0e47, 0x0e4e, + 0x0eb1, 0x0eb1, + 0x0eb4, 0x0eb9, + 0x0ebb, 0x0ebc, + 0x0ec8, 0x0ecd, + 0x0f18, 0x0f19, + 0x0f35, 0x0f35, + 0x0f37, 0x0f37, + 0x0f39, 0x0f39, + 0x0f71, 0x0f7e, + 0x0f80, 0x0f84, + 0x0f86, 0x0f87, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fc6, 0x0fc6, + 0x102d, 0x1030, + 0x1032, 0x1032, + 0x1036, 0x1037, + 0x1039, 0x1039, + 0x1058, 0x1059, + 0x135f, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b7, 0x17bd, + 0x17c6, 0x17c6, + 0x17c9, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193b, + 0x1a17, 0x1a18, + 0x1dc0, 0x1dc3, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa825, 0xa826, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0xe0100, 0xe01ef +}; /* CR_Mn */ + +/* 'N': Major Category */ +static const OnigCodePoint CR_N[] = { + 53, + 0x0030, 0x0039, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x09f4, 0x09f9, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bf2, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f33, + 0x1040, 0x1049, + 0x1369, 0x137c, + 0x16ee, 0x16f0, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2153, 0x2183, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0xff10, 0xff19, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5, + 0x104a0, 0x104a9, + 0x10a40, 0x10a47, + 0x1d7ce, 0x1d7ff +}; /* CR_N */ + +/* 'Nd': General Category */ +static const OnigCodePoint CR_Nd[] = { + 23, + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +}; /* CR_Nd */ + +/* 'Nl': General Category */ +static const OnigCodePoint CR_Nl[] = { + 8, + 0x16ee, 0x16f0, + 0x2160, 0x2183, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x10140, 0x10174, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5 +}; /* CR_Nl */ + +/* 'No': General Category */ +static const OnigCodePoint CR_No[] = { + 26, + 0x00b2, 0x00b3, + 0x00b9, 0x00b9, + 0x00bc, 0x00be, + 0x09f4, 0x09f9, + 0x0bf0, 0x0bf2, + 0x0f2a, 0x0f33, + 0x1369, 0x137c, + 0x17f0, 0x17f9, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2153, 0x215f, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x10107, 0x10133, + 0x10175, 0x10178, + 0x1018a, 0x1018a, + 0x10320, 0x10323, + 0x10a40, 0x10a47 +}; /* CR_No */ + +/* 'P': Major Category */ +static const OnigCodePoint CR_P[] = { + 96, + 0x0021, 0x0023, + 0x0025, 0x002a, + 0x002c, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005b, 0x005d, + 0x005f, 0x005f, + 0x007b, 0x007b, + 0x007d, 0x007d, + 0x00a1, 0x00a1, + 0x00ab, 0x00ab, + 0x00b7, 0x00b7, + 0x00bb, 0x00bb, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x058a, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f3a, 0x0f3d, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x205e, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2329, 0x232a, + 0x23b4, 0x23b6, + 0x2768, 0x2775, + 0x27c5, 0x27c6, + 0x27e6, 0x27eb, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_P */ + +/* 'Pc': General Category */ +static const OnigCodePoint CR_Pc[] = { + 6, + 0x005f, 0x005f, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xff3f, 0xff3f +}; /* CR_Pc */ + +/* 'Pd': General Category */ +static const OnigCodePoint CR_Pd[] = { + 12, + 0x002d, 0x002d, + 0x058a, 0x058a, + 0x1806, 0x1806, + 0x2010, 0x2015, + 0x2e17, 0x2e17, + 0x301c, 0x301c, + 0x3030, 0x3030, + 0x30a0, 0x30a0, + 0xfe31, 0xfe32, + 0xfe58, 0xfe58, + 0xfe63, 0xfe63, + 0xff0d, 0xff0d +}; /* CR_Pd */ + +/* 'Pe': General Category */ +static const OnigCodePoint CR_Pe[] = { + 65, + 0x0029, 0x0029, + 0x005d, 0x005d, + 0x007d, 0x007d, + 0x0f3b, 0x0f3b, + 0x0f3d, 0x0f3d, + 0x169c, 0x169c, + 0x2046, 0x2046, + 0x207e, 0x207e, + 0x208e, 0x208e, + 0x232a, 0x232a, + 0x23b5, 0x23b5, + 0x2769, 0x2769, + 0x276b, 0x276b, + 0x276d, 0x276d, + 0x276f, 0x276f, + 0x2771, 0x2771, + 0x2773, 0x2773, + 0x2775, 0x2775, + 0x27c6, 0x27c6, + 0x27e7, 0x27e7, + 0x27e9, 0x27e9, + 0x27eb, 0x27eb, + 0x2984, 0x2984, + 0x2986, 0x2986, + 0x2988, 0x2988, + 0x298a, 0x298a, + 0x298c, 0x298c, + 0x298e, 0x298e, + 0x2990, 0x2990, + 0x2992, 0x2992, + 0x2994, 0x2994, + 0x2996, 0x2996, + 0x2998, 0x2998, + 0x29d9, 0x29d9, + 0x29db, 0x29db, + 0x29fd, 0x29fd, + 0x3009, 0x3009, + 0x300b, 0x300b, + 0x300d, 0x300d, + 0x300f, 0x300f, + 0x3011, 0x3011, + 0x3015, 0x3015, + 0x3017, 0x3017, + 0x3019, 0x3019, + 0x301b, 0x301b, + 0x301e, 0x301f, + 0xfd3f, 0xfd3f, + 0xfe18, 0xfe18, + 0xfe36, 0xfe36, + 0xfe38, 0xfe38, + 0xfe3a, 0xfe3a, + 0xfe3c, 0xfe3c, + 0xfe3e, 0xfe3e, + 0xfe40, 0xfe40, + 0xfe42, 0xfe42, + 0xfe44, 0xfe44, + 0xfe48, 0xfe48, + 0xfe5a, 0xfe5a, + 0xfe5c, 0xfe5c, + 0xfe5e, 0xfe5e, + 0xff09, 0xff09, + 0xff3d, 0xff3d, + 0xff5d, 0xff5d, + 0xff60, 0xff60, + 0xff63, 0xff63 +}; /* CR_Pe */ + +/* 'Pf': General Category */ +static const OnigCodePoint CR_Pf[] = { + 9, + 0x00bb, 0x00bb, + 0x2019, 0x2019, + 0x201d, 0x201d, + 0x203a, 0x203a, + 0x2e03, 0x2e03, + 0x2e05, 0x2e05, + 0x2e0a, 0x2e0a, + 0x2e0d, 0x2e0d, + 0x2e1d, 0x2e1d +}; /* CR_Pf */ + +/* 'Pi': General Category */ +static const OnigCodePoint CR_Pi[] = { + 10, + 0x00ab, 0x00ab, + 0x2018, 0x2018, + 0x201b, 0x201c, + 0x201f, 0x201f, + 0x2039, 0x2039, + 0x2e02, 0x2e02, + 0x2e04, 0x2e04, + 0x2e09, 0x2e09, + 0x2e0c, 0x2e0c, + 0x2e1c, 0x2e1c +}; /* CR_Pi */ + +/* 'Po': General Category */ +static const OnigCodePoint CR_Po[] = { + 88, + 0x0021, 0x0023, + 0x0025, 0x0027, + 0x002a, 0x002a, + 0x002c, 0x002c, + 0x002e, 0x002f, + 0x003a, 0x003b, + 0x003f, 0x0040, + 0x005c, 0x005c, + 0x00a1, 0x00a1, + 0x00b7, 0x00b7, + 0x00bf, 0x00bf, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x055a, 0x055f, + 0x0589, 0x0589, + 0x05be, 0x05be, + 0x05c0, 0x05c0, + 0x05c3, 0x05c3, + 0x05c6, 0x05c6, + 0x05f3, 0x05f4, + 0x060c, 0x060d, + 0x061b, 0x061b, + 0x061e, 0x061f, + 0x066a, 0x066d, + 0x06d4, 0x06d4, + 0x0700, 0x070d, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0df4, 0x0df4, + 0x0e4f, 0x0e4f, + 0x0e5a, 0x0e5b, + 0x0f04, 0x0f12, + 0x0f85, 0x0f85, + 0x0fd0, 0x0fd1, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1361, 0x1368, + 0x166d, 0x166e, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x1805, + 0x1807, 0x180a, + 0x1944, 0x1945, + 0x19de, 0x19df, + 0x1a1e, 0x1a1f, + 0x2016, 0x2017, + 0x2020, 0x2027, + 0x2030, 0x2038, + 0x203b, 0x203e, + 0x2041, 0x2043, + 0x2047, 0x2051, + 0x2053, 0x2053, + 0x2055, 0x205e, + 0x23b6, 0x23b6, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2e00, 0x2e01, + 0x2e06, 0x2e08, + 0x2e0b, 0x2e0b, + 0x2e0e, 0x2e16, + 0x3001, 0x3003, + 0x303d, 0x303d, + 0x30fb, 0x30fb, + 0xfe10, 0xfe16, + 0xfe19, 0xfe19, + 0xfe30, 0xfe30, + 0xfe45, 0xfe46, + 0xfe49, 0xfe4c, + 0xfe50, 0xfe52, + 0xfe54, 0xfe57, + 0xfe5f, 0xfe61, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff07, + 0xff0a, 0xff0a, + 0xff0c, 0xff0c, + 0xff0e, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3c, 0xff3c, + 0xff61, 0xff61, + 0xff64, 0xff65, + 0x10100, 0x10101, + 0x1039f, 0x1039f, + 0x10a50, 0x10a58 +}; /* CR_Po */ + +/* 'Ps': General Category */ +static const OnigCodePoint CR_Ps[] = { + 67, + 0x0028, 0x0028, + 0x005b, 0x005b, + 0x007b, 0x007b, + 0x0f3a, 0x0f3a, + 0x0f3c, 0x0f3c, + 0x169b, 0x169b, + 0x201a, 0x201a, + 0x201e, 0x201e, + 0x2045, 0x2045, + 0x207d, 0x207d, + 0x208d, 0x208d, + 0x2329, 0x2329, + 0x23b4, 0x23b4, + 0x2768, 0x2768, + 0x276a, 0x276a, + 0x276c, 0x276c, + 0x276e, 0x276e, + 0x2770, 0x2770, + 0x2772, 0x2772, + 0x2774, 0x2774, + 0x27c5, 0x27c5, + 0x27e6, 0x27e6, + 0x27e8, 0x27e8, + 0x27ea, 0x27ea, + 0x2983, 0x2983, + 0x2985, 0x2985, + 0x2987, 0x2987, + 0x2989, 0x2989, + 0x298b, 0x298b, + 0x298d, 0x298d, + 0x298f, 0x298f, + 0x2991, 0x2991, + 0x2993, 0x2993, + 0x2995, 0x2995, + 0x2997, 0x2997, + 0x29d8, 0x29d8, + 0x29da, 0x29da, + 0x29fc, 0x29fc, + 0x3008, 0x3008, + 0x300a, 0x300a, + 0x300c, 0x300c, + 0x300e, 0x300e, + 0x3010, 0x3010, + 0x3014, 0x3014, + 0x3016, 0x3016, + 0x3018, 0x3018, + 0x301a, 0x301a, + 0x301d, 0x301d, + 0xfd3e, 0xfd3e, + 0xfe17, 0xfe17, + 0xfe35, 0xfe35, + 0xfe37, 0xfe37, + 0xfe39, 0xfe39, + 0xfe3b, 0xfe3b, + 0xfe3d, 0xfe3d, + 0xfe3f, 0xfe3f, + 0xfe41, 0xfe41, + 0xfe43, 0xfe43, + 0xfe47, 0xfe47, + 0xfe59, 0xfe59, + 0xfe5b, 0xfe5b, + 0xfe5d, 0xfe5d, + 0xff08, 0xff08, + 0xff3b, 0xff3b, + 0xff5b, 0xff5b, + 0xff5f, 0xff5f, + 0xff62, 0xff62 +}; /* CR_Ps */ + +/* 'S': Major Category */ +static const OnigCodePoint CR_S[] = { + 162, + 0x0024, 0x0024, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00a2, 0x00a9, + 0x00ac, 0x00ac, + 0x00ae, 0x00b1, + 0x00b4, 0x00b4, + 0x00b6, 0x00b6, + 0x00b8, 0x00b8, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02ed, + 0x02ef, 0x02ff, + 0x0374, 0x0375, + 0x0384, 0x0385, + 0x03f6, 0x03f6, + 0x0482, 0x0482, + 0x060b, 0x060b, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x09f2, 0x09f3, + 0x09fa, 0x09fa, + 0x0af1, 0x0af1, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bfa, + 0x0e3f, 0x0e3f, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x17db, 0x17db, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x20a0, 0x20b5, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x2132, 0x2132, + 0x213a, 0x213b, + 0x2140, 0x2144, + 0x214a, 0x214c, + 0x2190, 0x2328, + 0x232b, 0x23b3, + 0x23b7, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c4, + 0x27d0, 0x27e5, + 0x27f0, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2b13, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x309b, 0x309c, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31cf, + 0x3200, 0x321e, + 0x322a, 0x3243, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa828, 0xa82b, + 0xfb29, 0xfb29, + 0xfdfc, 0xfdfd, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x103d0, 0x103d0, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3 +}; /* CR_S */ + +/* 'Sc': General Category */ +static const OnigCodePoint CR_Sc[] = { + 14, + 0x0024, 0x0024, + 0x00a2, 0x00a5, + 0x060b, 0x060b, + 0x09f2, 0x09f3, + 0x0af1, 0x0af1, + 0x0bf9, 0x0bf9, + 0x0e3f, 0x0e3f, + 0x17db, 0x17db, + 0x20a0, 0x20b5, + 0xfdfc, 0xfdfc, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xffe0, 0xffe1, + 0xffe5, 0xffe6 +}; /* CR_Sc */ + +/* 'Sk': General Category */ +static const OnigCodePoint CR_Sk[] = { + 23, + 0x005e, 0x005e, + 0x0060, 0x0060, + 0x00a8, 0x00a8, + 0x00af, 0x00af, + 0x00b4, 0x00b4, + 0x00b8, 0x00b8, + 0x02c2, 0x02c5, + 0x02d2, 0x02df, + 0x02e5, 0x02ed, + 0x02ef, 0x02ff, + 0x0374, 0x0375, + 0x0384, 0x0385, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x309b, 0x309c, + 0xa700, 0xa716, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xffe3, 0xffe3 +}; /* CR_Sk */ + +/* 'Sm': General Category */ +static const OnigCodePoint CR_Sm[] = { + 59, + 0x002b, 0x002b, + 0x003c, 0x003e, + 0x007c, 0x007c, + 0x007e, 0x007e, + 0x00ac, 0x00ac, + 0x00b1, 0x00b1, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x03f6, 0x03f6, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x2140, 0x2144, + 0x214b, 0x214b, + 0x2190, 0x2194, + 0x219a, 0x219b, + 0x21a0, 0x21a0, + 0x21a3, 0x21a3, + 0x21a6, 0x21a6, + 0x21ae, 0x21ae, + 0x21ce, 0x21cf, + 0x21d2, 0x21d2, + 0x21d4, 0x21d4, + 0x21f4, 0x22ff, + 0x2308, 0x230b, + 0x2320, 0x2321, + 0x237c, 0x237c, + 0x239b, 0x23b3, + 0x25b7, 0x25b7, + 0x25c1, 0x25c1, + 0x25f8, 0x25ff, + 0x266f, 0x266f, + 0x27c0, 0x27c4, + 0x27d0, 0x27e5, + 0x27f0, 0x27ff, + 0x2900, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2aff, + 0xfb29, 0xfb29, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe2, 0xffe2, + 0xffe9, 0xffec, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3 +}; /* CR_Sm */ + +/* 'So': General Category */ +static const OnigCodePoint CR_So[] = { + 120, + 0x00a6, 0x00a7, + 0x00a9, 0x00a9, + 0x00ae, 0x00ae, + 0x00b0, 0x00b0, + 0x00b6, 0x00b6, + 0x0482, 0x0482, + 0x060e, 0x060f, + 0x06e9, 0x06e9, + 0x06fd, 0x06fe, + 0x09fa, 0x09fa, + 0x0b70, 0x0b70, + 0x0bf3, 0x0bf8, + 0x0bfa, 0x0bfa, + 0x0f01, 0x0f03, + 0x0f13, 0x0f17, + 0x0f1a, 0x0f1f, + 0x0f34, 0x0f34, + 0x0f36, 0x0f36, + 0x0f38, 0x0f38, + 0x0fbe, 0x0fc5, + 0x0fc7, 0x0fcc, + 0x0fcf, 0x0fcf, + 0x1360, 0x1360, + 0x1390, 0x1399, + 0x1940, 0x1940, + 0x19e0, 0x19ff, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x2132, 0x2132, + 0x213a, 0x213b, + 0x214a, 0x214a, + 0x214c, 0x214c, + 0x2195, 0x2199, + 0x219c, 0x219f, + 0x21a1, 0x21a2, + 0x21a4, 0x21a5, + 0x21a7, 0x21ad, + 0x21af, 0x21cd, + 0x21d0, 0x21d1, + 0x21d3, 0x21d3, + 0x21d5, 0x21f3, + 0x2300, 0x2307, + 0x230c, 0x231f, + 0x2322, 0x2328, + 0x232b, 0x237b, + 0x237d, 0x239a, + 0x23b7, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x25b6, + 0x25b8, 0x25c0, + 0x25c2, 0x25f7, + 0x2600, 0x266e, + 0x2670, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2767, + 0x2794, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x2800, 0x28ff, + 0x2b00, 0x2b13, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31cf, + 0x3200, 0x321e, + 0x322a, 0x3243, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa828, 0xa82b, + 0xfdfd, 0xfdfd, + 0xffe4, 0xffe4, + 0xffe8, 0xffe8, + 0xffed, 0xffee, + 0xfffc, 0xfffd, + 0x10102, 0x10102, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x103d0, 0x103d0, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356 +}; /* CR_So */ + +/* 'Z': Major Category */ +static const OnigCodePoint CR_Z[] = { + 9, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Z */ + +/* 'Zl': General Category */ +static const OnigCodePoint CR_Zl[] = { + 1, + 0x2028, 0x2028 +}; /* CR_Zl */ + +/* 'Zp': General Category */ +static const OnigCodePoint CR_Zp[] = { + 1, + 0x2029, 0x2029 +}; /* CR_Zp */ + +/* 'Zs': General Category */ +static const OnigCodePoint CR_Zs[] = { + 8, + 0x0020, 0x0020, + 0x00a0, 0x00a0, + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; /* CR_Zs */ + +/* 'Arabic': Script */ +static const OnigCodePoint CR_Arabic[] = { + 17, + 0x060b, 0x060b, + 0x060d, 0x0615, + 0x061e, 0x061e, + 0x0621, 0x063a, + 0x0641, 0x064a, + 0x0656, 0x065e, + 0x066a, 0x066f, + 0x0671, 0x06dc, + 0x06de, 0x06ff, + 0x0750, 0x076d, + 0xfb50, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfc, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc +}; /* CR_Arabic */ + +/* 'Armenian': Script */ +static const OnigCodePoint CR_Armenian[] = { + 5, + 0x0531, 0x0556, + 0x0559, 0x055f, + 0x0561, 0x0587, + 0x058a, 0x058a, + 0xfb13, 0xfb17 +}; /* CR_Armenian */ + +/* 'Bengali': Script */ +static const OnigCodePoint CR_Bengali[] = { + 14, + 0x0981, 0x0983, + 0x0985, 0x098c, + 0x098f, 0x0990, + 0x0993, 0x09a8, + 0x09aa, 0x09b0, + 0x09b2, 0x09b2, + 0x09b6, 0x09b9, + 0x09bc, 0x09c4, + 0x09c7, 0x09c8, + 0x09cb, 0x09ce, + 0x09d7, 0x09d7, + 0x09dc, 0x09dd, + 0x09df, 0x09e3, + 0x09e6, 0x09fa +}; /* CR_Bengali */ + +/* 'Bopomofo': Script */ +static const OnigCodePoint CR_Bopomofo[] = { + 2, + 0x3105, 0x312c, + 0x31a0, 0x31b7 +}; /* CR_Bopomofo */ + +/* 'Braille': Script */ +static const OnigCodePoint CR_Braille[] = { + 1, + 0x2800, 0x28ff +}; /* CR_Braille */ + +/* 'Buginese': Script */ +static const OnigCodePoint CR_Buginese[] = { + 2, + 0x1a00, 0x1a1b, + 0x1a1e, 0x1a1f +}; /* CR_Buginese */ + +/* 'Buhid': Script */ +static const OnigCodePoint CR_Buhid[] = { + 1, + 0x1740, 0x1753 +}; /* CR_Buhid */ + +/* 'Canadian_Aboriginal': Script */ +static const OnigCodePoint CR_Canadian_Aboriginal[] = { + 1, + 0x1401, 0x1676 +}; /* CR_Canadian_Aboriginal */ + +/* 'Cherokee': Script */ +static const OnigCodePoint CR_Cherokee[] = { + 1, + 0x13a0, 0x13f4 +}; /* CR_Cherokee */ + +/* 'Common': Script */ +static const OnigCodePoint CR_Common[] = { + 126, + 0x0000, 0x0040, + 0x005b, 0x0060, + 0x007b, 0x00a9, + 0x00ab, 0x00b9, + 0x00bb, 0x00bf, + 0x00d7, 0x00d7, + 0x00f7, 0x00f7, + 0x02b9, 0x02df, + 0x02e5, 0x02ff, + 0x037e, 0x037e, + 0x0387, 0x0387, + 0x0589, 0x0589, + 0x0600, 0x0603, + 0x060c, 0x060c, + 0x061b, 0x061b, + 0x061f, 0x061f, + 0x0640, 0x0640, + 0x0660, 0x0669, + 0x06dd, 0x06dd, + 0x0964, 0x0965, + 0x0970, 0x0970, + 0x0e3f, 0x0e3f, + 0x10fb, 0x10fb, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x2000, 0x200b, + 0x200e, 0x2063, + 0x206a, 0x2070, + 0x2074, 0x207e, + 0x2080, 0x208e, + 0x20a0, 0x20b5, + 0x2100, 0x2125, + 0x2127, 0x2129, + 0x212c, 0x214c, + 0x2153, 0x2183, + 0x2190, 0x23db, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x2460, 0x269c, + 0x26a0, 0x26b1, + 0x2701, 0x2704, + 0x2706, 0x2709, + 0x270c, 0x2727, + 0x2729, 0x274b, + 0x274d, 0x274d, + 0x274f, 0x2752, + 0x2756, 0x2756, + 0x2758, 0x275e, + 0x2761, 0x2794, + 0x2798, 0x27af, + 0x27b1, 0x27be, + 0x27c0, 0x27c6, + 0x27d0, 0x27eb, + 0x27f0, 0x27ff, + 0x2900, 0x2b13, + 0x2e00, 0x2e17, + 0x2e1c, 0x2e1d, + 0x2ff0, 0x2ffb, + 0x3000, 0x3004, + 0x3006, 0x3006, + 0x3008, 0x3020, + 0x3030, 0x3037, + 0x303c, 0x303f, + 0x309b, 0x309c, + 0x30a0, 0x30a0, + 0x30fb, 0x30fc, + 0x3190, 0x319f, + 0x31c0, 0x31cf, + 0x3220, 0x3243, + 0x3250, 0x325f, + 0x327e, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa700, 0xa716, + 0xe000, 0xf8ff, + 0xfd3e, 0xfd3f, + 0xfdfd, 0xfdfd, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe66, + 0xfe68, 0xfe6b, + 0xfeff, 0xfeff, + 0xff01, 0xff20, + 0xff3b, 0xff40, + 0xff5b, 0xff65, + 0xff70, 0xff70, + 0xff9e, 0xff9f, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfff9, 0xfffd, + 0x10100, 0x10102, + 0x10107, 0x10133, + 0x10137, 0x1013f, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d12a, 0x1d166, + 0x1d16a, 0x1d17a, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d300, 0x1d356, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d7c9, + 0x1d7ce, 0x1d7ff, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; /* CR_Common */ + +/* 'Coptic': Script */ +static const OnigCodePoint CR_Coptic[] = { + 3, + 0x03e2, 0x03ef, + 0x2c80, 0x2cea, + 0x2cf9, 0x2cff +}; /* CR_Coptic */ + +/* 'Cypriot': Script */ +static const OnigCodePoint CR_Cypriot[] = { + 6, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f +}; /* CR_Cypriot */ + +/* 'Cyrillic': Script */ +static const OnigCodePoint CR_Cyrillic[] = { + 6, + 0x0400, 0x0486, + 0x0488, 0x04ce, + 0x04d0, 0x04f9, + 0x0500, 0x050f, + 0x1d2b, 0x1d2b, + 0x1d78, 0x1d78 +}; /* CR_Cyrillic */ + +/* 'Deseret': Script */ +static const OnigCodePoint CR_Deseret[] = { + 1, + 0x10400, 0x1044f +}; /* CR_Deseret */ + +/* 'Devanagari': Script */ +static const OnigCodePoint CR_Devanagari[] = { + 6, + 0x0901, 0x0939, + 0x093c, 0x094d, + 0x0950, 0x0954, + 0x0958, 0x0963, + 0x0966, 0x096f, + 0x097d, 0x097d +}; /* CR_Devanagari */ + +/* 'Ethiopic': Script */ +static const OnigCodePoint CR_Ethiopic[] = { + 27, + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135f, 0x137c, + 0x1380, 0x1399, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde +}; /* CR_Ethiopic */ + +/* 'Georgian': Script */ +static const OnigCodePoint CR_Georgian[] = { + 4, + 0x10a0, 0x10c5, + 0x10d0, 0x10fa, + 0x10fc, 0x10fc, + 0x2d00, 0x2d25 +}; /* CR_Georgian */ + +/* 'Glagolitic': Script */ +static const OnigCodePoint CR_Glagolitic[] = { + 2, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e +}; /* CR_Glagolitic */ + +/* 'Gothic': Script */ +static const OnigCodePoint CR_Gothic[] = { + 1, + 0x10330, 0x1034a +}; /* CR_Gothic */ + +/* 'Greek': Script */ +static const OnigCodePoint CR_Greek[] = { + 31, + 0x0374, 0x0375, + 0x037a, 0x037a, + 0x0384, 0x0386, + 0x0388, 0x038a, + 0x038c, 0x038c, + 0x038e, 0x03a1, + 0x03a3, 0x03ce, + 0x03d0, 0x03e1, + 0x03f0, 0x03ff, + 0x1d26, 0x1d2a, + 0x1d5d, 0x1d61, + 0x1d66, 0x1d6a, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2126, 0x2126, + 0x10140, 0x1018a, + 0x1d200, 0x1d245 +}; /* CR_Greek */ + +/* 'Gujarati': Script */ +static const OnigCodePoint CR_Gujarati[] = { + 14, + 0x0a81, 0x0a83, + 0x0a85, 0x0a8d, + 0x0a8f, 0x0a91, + 0x0a93, 0x0aa8, + 0x0aaa, 0x0ab0, + 0x0ab2, 0x0ab3, + 0x0ab5, 0x0ab9, + 0x0abc, 0x0ac5, + 0x0ac7, 0x0ac9, + 0x0acb, 0x0acd, + 0x0ad0, 0x0ad0, + 0x0ae0, 0x0ae3, + 0x0ae6, 0x0aef, + 0x0af1, 0x0af1 +}; /* CR_Gujarati */ + +/* 'Gurmukhi': Script */ +static const OnigCodePoint CR_Gurmukhi[] = { + 15, + 0x0a01, 0x0a03, + 0x0a05, 0x0a0a, + 0x0a0f, 0x0a10, + 0x0a13, 0x0a28, + 0x0a2a, 0x0a30, + 0x0a32, 0x0a33, + 0x0a35, 0x0a36, + 0x0a38, 0x0a39, + 0x0a3c, 0x0a3c, + 0x0a3e, 0x0a42, + 0x0a47, 0x0a48, + 0x0a4b, 0x0a4d, + 0x0a59, 0x0a5c, + 0x0a5e, 0x0a5e, + 0x0a66, 0x0a74 +}; /* CR_Gurmukhi */ + +/* 'Han': Script */ +static const OnigCodePoint CR_Han[] = { + 14, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x3005, 0x3005, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303b, + 0x3400, 0x4db5, + 0x4e00, 0x9fbb, + 0xf900, 0xfa2d, + 0xfa30, 0xfa6a, + 0xfa70, 0xfad9, + 0x20000, 0x2a6d6, + 0x2f800, 0x2fa1d +}; /* CR_Han */ + +/* 'Hangul': Script */ +static const OnigCodePoint CR_Hangul[] = { + 12, + 0x1100, 0x1159, + 0x115f, 0x11a2, + 0x11a8, 0x11f9, + 0x3131, 0x318e, + 0x3200, 0x321e, + 0x3260, 0x327d, + 0xac00, 0xd7a3, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc +}; /* CR_Hangul */ + +/* 'Hanunoo': Script */ +static const OnigCodePoint CR_Hanunoo[] = { + 1, + 0x1720, 0x1734 +}; /* CR_Hanunoo */ + +/* 'Hebrew': Script */ +static const OnigCodePoint CR_Hebrew[] = { + 10, + 0x0591, 0x05b9, + 0x05bb, 0x05c7, + 0x05d0, 0x05ea, + 0x05f0, 0x05f4, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfb4f +}; /* CR_Hebrew */ + +/* 'Hiragana': Script */ +static const OnigCodePoint CR_Hiragana[] = { + 2, + 0x3041, 0x3096, + 0x309d, 0x309f +}; /* CR_Hiragana */ + +/* 'Inherited': Script */ +static const OnigCodePoint CR_Inherited[] = { + 15, + 0x0300, 0x036f, + 0x064b, 0x0655, + 0x0670, 0x0670, + 0x1dc0, 0x1dc3, + 0x200c, 0x200d, + 0x20d0, 0x20eb, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe23, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0xe0100, 0xe01ef +}; /* CR_Inherited */ + +/* 'Kannada': Script */ +static const OnigCodePoint CR_Kannada[] = { + 13, + 0x0c82, 0x0c83, + 0x0c85, 0x0c8c, + 0x0c8e, 0x0c90, + 0x0c92, 0x0ca8, + 0x0caa, 0x0cb3, + 0x0cb5, 0x0cb9, + 0x0cbc, 0x0cc4, + 0x0cc6, 0x0cc8, + 0x0cca, 0x0ccd, + 0x0cd5, 0x0cd6, + 0x0cde, 0x0cde, + 0x0ce0, 0x0ce1, + 0x0ce6, 0x0cef +}; /* CR_Kannada */ + +/* 'Katakana': Script */ +static const OnigCodePoint CR_Katakana[] = { + 5, + 0x30a1, 0x30fa, + 0x30fd, 0x30ff, + 0x31f0, 0x31ff, + 0xff66, 0xff6f, + 0xff71, 0xff9d +}; /* CR_Katakana */ + +/* 'Kharoshthi': Script */ +static const OnigCodePoint CR_Kharoshthi[] = { + 8, + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58 +}; /* CR_Kharoshthi */ + +/* 'Khmer': Script */ +static const OnigCodePoint CR_Khmer[] = { + 4, + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x19e0, 0x19ff +}; /* CR_Khmer */ + +/* 'Lao': Script */ +static const OnigCodePoint CR_Lao[] = { + 18, + 0x0e81, 0x0e82, + 0x0e84, 0x0e84, + 0x0e87, 0x0e88, + 0x0e8a, 0x0e8a, + 0x0e8d, 0x0e8d, + 0x0e94, 0x0e97, + 0x0e99, 0x0e9f, + 0x0ea1, 0x0ea3, + 0x0ea5, 0x0ea5, + 0x0ea7, 0x0ea7, + 0x0eaa, 0x0eab, + 0x0ead, 0x0eb9, + 0x0ebb, 0x0ebd, + 0x0ec0, 0x0ec4, + 0x0ec6, 0x0ec6, + 0x0ec8, 0x0ecd, + 0x0ed0, 0x0ed9, + 0x0edc, 0x0edd +}; /* CR_Lao */ + +/* 'Latin': Script */ +static const OnigCodePoint CR_Latin[] = { + 23, + 0x0041, 0x005a, + 0x0061, 0x007a, + 0x00aa, 0x00aa, + 0x00ba, 0x00ba, + 0x00c0, 0x00d6, + 0x00d8, 0x00f6, + 0x00f8, 0x0241, + 0x0250, 0x02b8, + 0x02e0, 0x02e4, + 0x1d00, 0x1d25, + 0x1d2c, 0x1d5c, + 0x1d62, 0x1d65, + 0x1d6b, 0x1d77, + 0x1d79, 0x1dbf, + 0x1e00, 0x1e9b, + 0x1ea0, 0x1ef9, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x2094, + 0x212a, 0x212b, + 0xfb00, 0xfb06, + 0xff21, 0xff3a, + 0xff41, 0xff5a +}; /* CR_Latin */ + +/* 'Limbu': Script */ +static const OnigCodePoint CR_Limbu[] = { + 5, + 0x1900, 0x191c, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x194f +}; /* CR_Limbu */ + +/* 'Linear_B': Script */ +static const OnigCodePoint CR_Linear_B[] = { + 7, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa +}; /* CR_Linear_B */ + +/* 'Malayalam': Script */ +static const OnigCodePoint CR_Malayalam[] = { + 11, + 0x0d02, 0x0d03, + 0x0d05, 0x0d0c, + 0x0d0e, 0x0d10, + 0x0d12, 0x0d28, + 0x0d2a, 0x0d39, + 0x0d3e, 0x0d43, + 0x0d46, 0x0d48, + 0x0d4a, 0x0d4d, + 0x0d57, 0x0d57, + 0x0d60, 0x0d61, + 0x0d66, 0x0d6f +}; /* CR_Malayalam */ + +/* 'Mongolian': Script */ +static const OnigCodePoint CR_Mongolian[] = { + 4, + 0x1800, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a9 +}; /* CR_Mongolian */ + +/* 'Myanmar': Script */ +static const OnigCodePoint CR_Myanmar[] = { + 6, + 0x1000, 0x1021, + 0x1023, 0x1027, + 0x1029, 0x102a, + 0x102c, 0x1032, + 0x1036, 0x1039, + 0x1040, 0x1059 +}; /* CR_Myanmar */ + +/* 'New_Tai_Lue': Script */ +static const OnigCodePoint CR_New_Tai_Lue[] = { + 4, + 0x1980, 0x19a9, + 0x19b0, 0x19c9, + 0x19d0, 0x19d9, + 0x19de, 0x19df +}; /* CR_New_Tai_Lue */ + +/* 'Ogham': Script */ +static const OnigCodePoint CR_Ogham[] = { + 1, + 0x1680, 0x169c +}; /* CR_Ogham */ + +/* 'Old_Italic': Script */ +static const OnigCodePoint CR_Old_Italic[] = { + 2, + 0x10300, 0x1031e, + 0x10320, 0x10323 +}; /* CR_Old_Italic */ + +/* 'Old_Persian': Script */ +static const OnigCodePoint CR_Old_Persian[] = { + 2, + 0x103a0, 0x103c3, + 0x103c8, 0x103d5 +}; /* CR_Old_Persian */ + +/* 'Oriya': Script */ +static const OnigCodePoint CR_Oriya[] = { + 14, + 0x0b01, 0x0b03, + 0x0b05, 0x0b0c, + 0x0b0f, 0x0b10, + 0x0b13, 0x0b28, + 0x0b2a, 0x0b30, + 0x0b32, 0x0b33, + 0x0b35, 0x0b39, + 0x0b3c, 0x0b43, + 0x0b47, 0x0b48, + 0x0b4b, 0x0b4d, + 0x0b56, 0x0b57, + 0x0b5c, 0x0b5d, + 0x0b5f, 0x0b61, + 0x0b66, 0x0b71 +}; /* CR_Oriya */ + +/* 'Osmanya': Script */ +static const OnigCodePoint CR_Osmanya[] = { + 2, + 0x10480, 0x1049d, + 0x104a0, 0x104a9 +}; /* CR_Osmanya */ + +/* 'Runic': Script */ +static const OnigCodePoint CR_Runic[] = { + 2, + 0x16a0, 0x16ea, + 0x16ee, 0x16f0 +}; /* CR_Runic */ + +/* 'Shavian': Script */ +static const OnigCodePoint CR_Shavian[] = { + 1, + 0x10450, 0x1047f +}; /* CR_Shavian */ + +/* 'Sinhala': Script */ +static const OnigCodePoint CR_Sinhala[] = { + 11, + 0x0d82, 0x0d83, + 0x0d85, 0x0d96, + 0x0d9a, 0x0db1, + 0x0db3, 0x0dbb, + 0x0dbd, 0x0dbd, + 0x0dc0, 0x0dc6, + 0x0dca, 0x0dca, + 0x0dcf, 0x0dd4, + 0x0dd6, 0x0dd6, + 0x0dd8, 0x0ddf, + 0x0df2, 0x0df4 +}; /* CR_Sinhala */ + +/* 'Syloti_Nagri': Script */ +static const OnigCodePoint CR_Syloti_Nagri[] = { + 1, + 0xa800, 0xa82b +}; /* CR_Syloti_Nagri */ + +/* 'Syriac': Script */ +static const OnigCodePoint CR_Syriac[] = { + 3, + 0x0700, 0x070d, + 0x070f, 0x074a, + 0x074d, 0x074f +}; /* CR_Syriac */ + +/* 'Tagalog': Script */ +static const OnigCodePoint CR_Tagalog[] = { + 2, + 0x1700, 0x170c, + 0x170e, 0x1714 +}; /* CR_Tagalog */ + +/* 'Tagbanwa': Script */ +static const OnigCodePoint CR_Tagbanwa[] = { + 3, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773 +}; /* CR_Tagbanwa */ + +/* 'Tai_Le': Script */ +static const OnigCodePoint CR_Tai_Le[] = { + 2, + 0x1950, 0x196d, + 0x1970, 0x1974 +}; /* CR_Tai_Le */ + +/* 'Tamil': Script */ +static const OnigCodePoint CR_Tamil[] = { + 15, + 0x0b82, 0x0b83, + 0x0b85, 0x0b8a, + 0x0b8e, 0x0b90, + 0x0b92, 0x0b95, + 0x0b99, 0x0b9a, + 0x0b9c, 0x0b9c, + 0x0b9e, 0x0b9f, + 0x0ba3, 0x0ba4, + 0x0ba8, 0x0baa, + 0x0bae, 0x0bb9, + 0x0bbe, 0x0bc2, + 0x0bc6, 0x0bc8, + 0x0bca, 0x0bcd, + 0x0bd7, 0x0bd7, + 0x0be6, 0x0bfa +}; /* CR_Tamil */ + +/* 'Telugu': Script */ +static const OnigCodePoint CR_Telugu[] = { + 12, + 0x0c01, 0x0c03, + 0x0c05, 0x0c0c, + 0x0c0e, 0x0c10, + 0x0c12, 0x0c28, + 0x0c2a, 0x0c33, + 0x0c35, 0x0c39, + 0x0c3e, 0x0c44, + 0x0c46, 0x0c48, + 0x0c4a, 0x0c4d, + 0x0c55, 0x0c56, + 0x0c60, 0x0c61, + 0x0c66, 0x0c6f +}; /* CR_Telugu */ + +/* 'Thaana': Script */ +static const OnigCodePoint CR_Thaana[] = { + 1, + 0x0780, 0x07b1 +}; /* CR_Thaana */ + +/* 'Thai': Script */ +static const OnigCodePoint CR_Thai[] = { + 2, + 0x0e01, 0x0e3a, + 0x0e40, 0x0e5b +}; /* CR_Thai */ + +/* 'Tibetan': Script */ +static const OnigCodePoint CR_Tibetan[] = { + 7, + 0x0f00, 0x0f47, + 0x0f49, 0x0f6a, + 0x0f71, 0x0f8b, + 0x0f90, 0x0f97, + 0x0f99, 0x0fbc, + 0x0fbe, 0x0fcc, + 0x0fcf, 0x0fd1 +}; /* CR_Tibetan */ + +/* 'Tifinagh': Script */ +static const OnigCodePoint CR_Tifinagh[] = { + 2, + 0x2d30, 0x2d65, + 0x2d6f, 0x2d6f +}; /* CR_Tifinagh */ + +/* 'Ugaritic': Script */ +static const OnigCodePoint CR_Ugaritic[] = { + 2, + 0x10380, 0x1039d, + 0x1039f, 0x1039f +}; /* CR_Ugaritic */ + +/* 'Yi': Script */ +static const OnigCodePoint CR_Yi[] = { + 2, + 0xa000, 0xa48c, + 0xa490, 0xa4c6 +}; /* CR_Yi */ + + +#endif /* USE_UNICODE_PROPERTIES */ + + +typedef struct { + int n; + OnigCodePoint code[3]; +} CodePointList3; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseFold_11_Type; + +typedef struct { + OnigCodePoint from; + CodePointList3 to; +} CaseUnfold_11_Type; + +typedef struct { + int n; + OnigCodePoint code[2]; +} CodePointList2; + +typedef struct { + OnigCodePoint from[2]; + CodePointList2 to; +} CaseUnfold_12_Type; + +typedef struct { + OnigCodePoint from[3]; + CodePointList2 to; +} CaseUnfold_13_Type; + +static const CaseFold_11_Type CaseFold[] = { + { 0x0041, {1, {0x0061}}}, + { 0x0042, {1, {0x0062}}}, + { 0x0043, {1, {0x0063}}}, + { 0x0044, {1, {0x0064}}}, + { 0x0045, {1, {0x0065}}}, + { 0x0046, {1, {0x0066}}}, + { 0x0047, {1, {0x0067}}}, + { 0x0048, {1, {0x0068}}}, + { 0x004a, {1, {0x006a}}}, + { 0x004b, {1, {0x006b}}}, + { 0x004c, {1, {0x006c}}}, + { 0x004d, {1, {0x006d}}}, + { 0x004e, {1, {0x006e}}}, + { 0x004f, {1, {0x006f}}}, + { 0x0050, {1, {0x0070}}}, + { 0x0051, {1, {0x0071}}}, + { 0x0052, {1, {0x0072}}}, + { 0x0053, {1, {0x0073}}}, + { 0x0054, {1, {0x0074}}}, + { 0x0055, {1, {0x0075}}}, + { 0x0056, {1, {0x0076}}}, + { 0x0057, {1, {0x0077}}}, + { 0x0058, {1, {0x0078}}}, + { 0x0059, {1, {0x0079}}}, + { 0x005a, {1, {0x007a}}}, + { 0x00b5, {1, {0x03bc}}}, + { 0x00c0, {1, {0x00e0}}}, + { 0x00c1, {1, {0x00e1}}}, + { 0x00c2, {1, {0x00e2}}}, + { 0x00c3, {1, {0x00e3}}}, + { 0x00c4, {1, {0x00e4}}}, + { 0x00c5, {1, {0x00e5}}}, + { 0x00c6, {1, {0x00e6}}}, + { 0x00c7, {1, {0x00e7}}}, + { 0x00c8, {1, {0x00e8}}}, + { 0x00c9, {1, {0x00e9}}}, + { 0x00ca, {1, {0x00ea}}}, + { 0x00cb, {1, {0x00eb}}}, + { 0x00cc, {1, {0x00ec}}}, + { 0x00cd, {1, {0x00ed}}}, + { 0x00ce, {1, {0x00ee}}}, + { 0x00cf, {1, {0x00ef}}}, + { 0x00d0, {1, {0x00f0}}}, + { 0x00d1, {1, {0x00f1}}}, + { 0x00d2, {1, {0x00f2}}}, + { 0x00d3, {1, {0x00f3}}}, + { 0x00d4, {1, {0x00f4}}}, + { 0x00d5, {1, {0x00f5}}}, + { 0x00d6, {1, {0x00f6}}}, + { 0x00d8, {1, {0x00f8}}}, + { 0x00d9, {1, {0x00f9}}}, + { 0x00da, {1, {0x00fa}}}, + { 0x00db, {1, {0x00fb}}}, + { 0x00dc, {1, {0x00fc}}}, + { 0x00dd, {1, {0x00fd}}}, + { 0x00de, {1, {0x00fe}}}, + { 0x00df, {2, {0x0073, 0x0073}}}, + { 0x0100, {1, {0x0101}}}, + { 0x0102, {1, {0x0103}}}, + { 0x0104, {1, {0x0105}}}, + { 0x0106, {1, {0x0107}}}, + { 0x0108, {1, {0x0109}}}, + { 0x010a, {1, {0x010b}}}, + { 0x010c, {1, {0x010d}}}, + { 0x010e, {1, {0x010f}}}, + { 0x0110, {1, {0x0111}}}, + { 0x0112, {1, {0x0113}}}, + { 0x0114, {1, {0x0115}}}, + { 0x0116, {1, {0x0117}}}, + { 0x0118, {1, {0x0119}}}, + { 0x011a, {1, {0x011b}}}, + { 0x011c, {1, {0x011d}}}, + { 0x011e, {1, {0x011f}}}, + { 0x0120, {1, {0x0121}}}, + { 0x0122, {1, {0x0123}}}, + { 0x0124, {1, {0x0125}}}, + { 0x0126, {1, {0x0127}}}, + { 0x0128, {1, {0x0129}}}, + { 0x012a, {1, {0x012b}}}, + { 0x012c, {1, {0x012d}}}, + { 0x012e, {1, {0x012f}}}, + { 0x0132, {1, {0x0133}}}, + { 0x0134, {1, {0x0135}}}, + { 0x0136, {1, {0x0137}}}, + { 0x0139, {1, {0x013a}}}, + { 0x013b, {1, {0x013c}}}, + { 0x013d, {1, {0x013e}}}, + { 0x013f, {1, {0x0140}}}, + { 0x0141, {1, {0x0142}}}, + { 0x0143, {1, {0x0144}}}, + { 0x0145, {1, {0x0146}}}, + { 0x0147, {1, {0x0148}}}, + { 0x0149, {2, {0x02bc, 0x006e}}}, + { 0x014a, {1, {0x014b}}}, + { 0x014c, {1, {0x014d}}}, + { 0x014e, {1, {0x014f}}}, + { 0x0150, {1, {0x0151}}}, + { 0x0152, {1, {0x0153}}}, + { 0x0154, {1, {0x0155}}}, + { 0x0156, {1, {0x0157}}}, + { 0x0158, {1, {0x0159}}}, + { 0x015a, {1, {0x015b}}}, + { 0x015c, {1, {0x015d}}}, + { 0x015e, {1, {0x015f}}}, + { 0x0160, {1, {0x0161}}}, + { 0x0162, {1, {0x0163}}}, + { 0x0164, {1, {0x0165}}}, + { 0x0166, {1, {0x0167}}}, + { 0x0168, {1, {0x0169}}}, + { 0x016a, {1, {0x016b}}}, + { 0x016c, {1, {0x016d}}}, + { 0x016e, {1, {0x016f}}}, + { 0x0170, {1, {0x0171}}}, + { 0x0172, {1, {0x0173}}}, + { 0x0174, {1, {0x0175}}}, + { 0x0176, {1, {0x0177}}}, + { 0x0178, {1, {0x00ff}}}, + { 0x0179, {1, {0x017a}}}, + { 0x017b, {1, {0x017c}}}, + { 0x017d, {1, {0x017e}}}, + { 0x017f, {1, {0x0073}}}, + { 0x0181, {1, {0x0253}}}, + { 0x0182, {1, {0x0183}}}, + { 0x0184, {1, {0x0185}}}, + { 0x0186, {1, {0x0254}}}, + { 0x0187, {1, {0x0188}}}, + { 0x0189, {1, {0x0256}}}, + { 0x018a, {1, {0x0257}}}, + { 0x018b, {1, {0x018c}}}, + { 0x018e, {1, {0x01dd}}}, + { 0x018f, {1, {0x0259}}}, + { 0x0190, {1, {0x025b}}}, + { 0x0191, {1, {0x0192}}}, + { 0x0193, {1, {0x0260}}}, + { 0x0194, {1, {0x0263}}}, + { 0x0196, {1, {0x0269}}}, + { 0x0197, {1, {0x0268}}}, + { 0x0198, {1, {0x0199}}}, + { 0x019c, {1, {0x026f}}}, + { 0x019d, {1, {0x0272}}}, + { 0x019f, {1, {0x0275}}}, + { 0x01a0, {1, {0x01a1}}}, + { 0x01a2, {1, {0x01a3}}}, + { 0x01a4, {1, {0x01a5}}}, + { 0x01a6, {1, {0x0280}}}, + { 0x01a7, {1, {0x01a8}}}, + { 0x01a9, {1, {0x0283}}}, + { 0x01ac, {1, {0x01ad}}}, + { 0x01ae, {1, {0x0288}}}, + { 0x01af, {1, {0x01b0}}}, + { 0x01b1, {1, {0x028a}}}, + { 0x01b2, {1, {0x028b}}}, + { 0x01b3, {1, {0x01b4}}}, + { 0x01b5, {1, {0x01b6}}}, + { 0x01b7, {1, {0x0292}}}, + { 0x01b8, {1, {0x01b9}}}, + { 0x01bc, {1, {0x01bd}}}, + { 0x01c4, {1, {0x01c6}}}, + { 0x01c5, {1, {0x01c6}}}, + { 0x01c7, {1, {0x01c9}}}, + { 0x01c8, {1, {0x01c9}}}, + { 0x01ca, {1, {0x01cc}}}, + { 0x01cb, {1, {0x01cc}}}, + { 0x01cd, {1, {0x01ce}}}, + { 0x01cf, {1, {0x01d0}}}, + { 0x01d1, {1, {0x01d2}}}, + { 0x01d3, {1, {0x01d4}}}, + { 0x01d5, {1, {0x01d6}}}, + { 0x01d7, {1, {0x01d8}}}, + { 0x01d9, {1, {0x01da}}}, + { 0x01db, {1, {0x01dc}}}, + { 0x01de, {1, {0x01df}}}, + { 0x01e0, {1, {0x01e1}}}, + { 0x01e2, {1, {0x01e3}}}, + { 0x01e4, {1, {0x01e5}}}, + { 0x01e6, {1, {0x01e7}}}, + { 0x01e8, {1, {0x01e9}}}, + { 0x01ea, {1, {0x01eb}}}, + { 0x01ec, {1, {0x01ed}}}, + { 0x01ee, {1, {0x01ef}}}, + { 0x01f0, {2, {0x006a, 0x030c}}}, + { 0x01f1, {1, {0x01f3}}}, + { 0x01f2, {1, {0x01f3}}}, + { 0x01f4, {1, {0x01f5}}}, + { 0x01f6, {1, {0x0195}}}, + { 0x01f7, {1, {0x01bf}}}, + { 0x01f8, {1, {0x01f9}}}, + { 0x01fa, {1, {0x01fb}}}, + { 0x01fc, {1, {0x01fd}}}, + { 0x01fe, {1, {0x01ff}}}, + { 0x0200, {1, {0x0201}}}, + { 0x0202, {1, {0x0203}}}, + { 0x0204, {1, {0x0205}}}, + { 0x0206, {1, {0x0207}}}, + { 0x0208, {1, {0x0209}}}, + { 0x020a, {1, {0x020b}}}, + { 0x020c, {1, {0x020d}}}, + { 0x020e, {1, {0x020f}}}, + { 0x0210, {1, {0x0211}}}, + { 0x0212, {1, {0x0213}}}, + { 0x0214, {1, {0x0215}}}, + { 0x0216, {1, {0x0217}}}, + { 0x0218, {1, {0x0219}}}, + { 0x021a, {1, {0x021b}}}, + { 0x021c, {1, {0x021d}}}, + { 0x021e, {1, {0x021f}}}, + { 0x0220, {1, {0x019e}}}, + { 0x0222, {1, {0x0223}}}, + { 0x0224, {1, {0x0225}}}, + { 0x0226, {1, {0x0227}}}, + { 0x0228, {1, {0x0229}}}, + { 0x022a, {1, {0x022b}}}, + { 0x022c, {1, {0x022d}}}, + { 0x022e, {1, {0x022f}}}, + { 0x0230, {1, {0x0231}}}, + { 0x0232, {1, {0x0233}}}, + { 0x023b, {1, {0x023c}}}, + { 0x023d, {1, {0x019a}}}, + { 0x0241, {1, {0x0294}}}, + { 0x0345, {1, {0x03b9}}}, + { 0x0386, {1, {0x03ac}}}, + { 0x0388, {1, {0x03ad}}}, + { 0x0389, {1, {0x03ae}}}, + { 0x038a, {1, {0x03af}}}, + { 0x038c, {1, {0x03cc}}}, + { 0x038e, {1, {0x03cd}}}, + { 0x038f, {1, {0x03ce}}}, + { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x0391, {1, {0x03b1}}}, + { 0x0392, {1, {0x03b2}}}, + { 0x0393, {1, {0x03b3}}}, + { 0x0394, {1, {0x03b4}}}, + { 0x0395, {1, {0x03b5}}}, + { 0x0396, {1, {0x03b6}}}, + { 0x0397, {1, {0x03b7}}}, + { 0x0398, {1, {0x03b8}}}, + { 0x0399, {1, {0x03b9}}}, + { 0x039a, {1, {0x03ba}}}, + { 0x039b, {1, {0x03bb}}}, + { 0x039c, {1, {0x03bc}}}, + { 0x039d, {1, {0x03bd}}}, + { 0x039e, {1, {0x03be}}}, + { 0x039f, {1, {0x03bf}}}, + { 0x03a0, {1, {0x03c0}}}, + { 0x03a1, {1, {0x03c1}}}, + { 0x03a3, {1, {0x03c3}}}, + { 0x03a4, {1, {0x03c4}}}, + { 0x03a5, {1, {0x03c5}}}, + { 0x03a6, {1, {0x03c6}}}, + { 0x03a7, {1, {0x03c7}}}, + { 0x03a8, {1, {0x03c8}}}, + { 0x03a9, {1, {0x03c9}}}, + { 0x03aa, {1, {0x03ca}}}, + { 0x03ab, {1, {0x03cb}}}, + { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x03c2, {1, {0x03c3}}}, + { 0x03d0, {1, {0x03b2}}}, + { 0x03d1, {1, {0x03b8}}}, + { 0x03d5, {1, {0x03c6}}}, + { 0x03d6, {1, {0x03c0}}}, + { 0x03d8, {1, {0x03d9}}}, + { 0x03da, {1, {0x03db}}}, + { 0x03dc, {1, {0x03dd}}}, + { 0x03de, {1, {0x03df}}}, + { 0x03e0, {1, {0x03e1}}}, + { 0x03e2, {1, {0x03e3}}}, + { 0x03e4, {1, {0x03e5}}}, + { 0x03e6, {1, {0x03e7}}}, + { 0x03e8, {1, {0x03e9}}}, + { 0x03ea, {1, {0x03eb}}}, + { 0x03ec, {1, {0x03ed}}}, + { 0x03ee, {1, {0x03ef}}}, + { 0x03f0, {1, {0x03ba}}}, + { 0x03f1, {1, {0x03c1}}}, + { 0x03f4, {1, {0x03b8}}}, + { 0x03f5, {1, {0x03b5}}}, + { 0x03f7, {1, {0x03f8}}}, + { 0x03f9, {1, {0x03f2}}}, + { 0x03fa, {1, {0x03fb}}}, + { 0x0400, {1, {0x0450}}}, + { 0x0401, {1, {0x0451}}}, + { 0x0402, {1, {0x0452}}}, + { 0x0403, {1, {0x0453}}}, + { 0x0404, {1, {0x0454}}}, + { 0x0405, {1, {0x0455}}}, + { 0x0406, {1, {0x0456}}}, + { 0x0407, {1, {0x0457}}}, + { 0x0408, {1, {0x0458}}}, + { 0x0409, {1, {0x0459}}}, + { 0x040a, {1, {0x045a}}}, + { 0x040b, {1, {0x045b}}}, + { 0x040c, {1, {0x045c}}}, + { 0x040d, {1, {0x045d}}}, + { 0x040e, {1, {0x045e}}}, + { 0x040f, {1, {0x045f}}}, + { 0x0410, {1, {0x0430}}}, + { 0x0411, {1, {0x0431}}}, + { 0x0412, {1, {0x0432}}}, + { 0x0413, {1, {0x0433}}}, + { 0x0414, {1, {0x0434}}}, + { 0x0415, {1, {0x0435}}}, + { 0x0416, {1, {0x0436}}}, + { 0x0417, {1, {0x0437}}}, + { 0x0418, {1, {0x0438}}}, + { 0x0419, {1, {0x0439}}}, + { 0x041a, {1, {0x043a}}}, + { 0x041b, {1, {0x043b}}}, + { 0x041c, {1, {0x043c}}}, + { 0x041d, {1, {0x043d}}}, + { 0x041e, {1, {0x043e}}}, + { 0x041f, {1, {0x043f}}}, + { 0x0420, {1, {0x0440}}}, + { 0x0421, {1, {0x0441}}}, + { 0x0422, {1, {0x0442}}}, + { 0x0423, {1, {0x0443}}}, + { 0x0424, {1, {0x0444}}}, + { 0x0425, {1, {0x0445}}}, + { 0x0426, {1, {0x0446}}}, + { 0x0427, {1, {0x0447}}}, + { 0x0428, {1, {0x0448}}}, + { 0x0429, {1, {0x0449}}}, + { 0x042a, {1, {0x044a}}}, + { 0x042b, {1, {0x044b}}}, + { 0x042c, {1, {0x044c}}}, + { 0x042d, {1, {0x044d}}}, + { 0x042e, {1, {0x044e}}}, + { 0x042f, {1, {0x044f}}}, + { 0x0460, {1, {0x0461}}}, + { 0x0462, {1, {0x0463}}}, + { 0x0464, {1, {0x0465}}}, + { 0x0466, {1, {0x0467}}}, + { 0x0468, {1, {0x0469}}}, + { 0x046a, {1, {0x046b}}}, + { 0x046c, {1, {0x046d}}}, + { 0x046e, {1, {0x046f}}}, + { 0x0470, {1, {0x0471}}}, + { 0x0472, {1, {0x0473}}}, + { 0x0474, {1, {0x0475}}}, + { 0x0476, {1, {0x0477}}}, + { 0x0478, {1, {0x0479}}}, + { 0x047a, {1, {0x047b}}}, + { 0x047c, {1, {0x047d}}}, + { 0x047e, {1, {0x047f}}}, + { 0x0480, {1, {0x0481}}}, + { 0x048a, {1, {0x048b}}}, + { 0x048c, {1, {0x048d}}}, + { 0x048e, {1, {0x048f}}}, + { 0x0490, {1, {0x0491}}}, + { 0x0492, {1, {0x0493}}}, + { 0x0494, {1, {0x0495}}}, + { 0x0496, {1, {0x0497}}}, + { 0x0498, {1, {0x0499}}}, + { 0x049a, {1, {0x049b}}}, + { 0x049c, {1, {0x049d}}}, + { 0x049e, {1, {0x049f}}}, + { 0x04a0, {1, {0x04a1}}}, + { 0x04a2, {1, {0x04a3}}}, + { 0x04a4, {1, {0x04a5}}}, + { 0x04a6, {1, {0x04a7}}}, + { 0x04a8, {1, {0x04a9}}}, + { 0x04aa, {1, {0x04ab}}}, + { 0x04ac, {1, {0x04ad}}}, + { 0x04ae, {1, {0x04af}}}, + { 0x04b0, {1, {0x04b1}}}, + { 0x04b2, {1, {0x04b3}}}, + { 0x04b4, {1, {0x04b5}}}, + { 0x04b6, {1, {0x04b7}}}, + { 0x04b8, {1, {0x04b9}}}, + { 0x04ba, {1, {0x04bb}}}, + { 0x04bc, {1, {0x04bd}}}, + { 0x04be, {1, {0x04bf}}}, + { 0x04c1, {1, {0x04c2}}}, + { 0x04c3, {1, {0x04c4}}}, + { 0x04c5, {1, {0x04c6}}}, + { 0x04c7, {1, {0x04c8}}}, + { 0x04c9, {1, {0x04ca}}}, + { 0x04cb, {1, {0x04cc}}}, + { 0x04cd, {1, {0x04ce}}}, + { 0x04d0, {1, {0x04d1}}}, + { 0x04d2, {1, {0x04d3}}}, + { 0x04d4, {1, {0x04d5}}}, + { 0x04d6, {1, {0x04d7}}}, + { 0x04d8, {1, {0x04d9}}}, + { 0x04da, {1, {0x04db}}}, + { 0x04dc, {1, {0x04dd}}}, + { 0x04de, {1, {0x04df}}}, + { 0x04e0, {1, {0x04e1}}}, + { 0x04e2, {1, {0x04e3}}}, + { 0x04e4, {1, {0x04e5}}}, + { 0x04e6, {1, {0x04e7}}}, + { 0x04e8, {1, {0x04e9}}}, + { 0x04ea, {1, {0x04eb}}}, + { 0x04ec, {1, {0x04ed}}}, + { 0x04ee, {1, {0x04ef}}}, + { 0x04f0, {1, {0x04f1}}}, + { 0x04f2, {1, {0x04f3}}}, + { 0x04f4, {1, {0x04f5}}}, + { 0x04f6, {1, {0x04f7}}}, + { 0x04f8, {1, {0x04f9}}}, + { 0x0500, {1, {0x0501}}}, + { 0x0502, {1, {0x0503}}}, + { 0x0504, {1, {0x0505}}}, + { 0x0506, {1, {0x0507}}}, + { 0x0508, {1, {0x0509}}}, + { 0x050a, {1, {0x050b}}}, + { 0x050c, {1, {0x050d}}}, + { 0x050e, {1, {0x050f}}}, + { 0x0531, {1, {0x0561}}}, + { 0x0532, {1, {0x0562}}}, + { 0x0533, {1, {0x0563}}}, + { 0x0534, {1, {0x0564}}}, + { 0x0535, {1, {0x0565}}}, + { 0x0536, {1, {0x0566}}}, + { 0x0537, {1, {0x0567}}}, + { 0x0538, {1, {0x0568}}}, + { 0x0539, {1, {0x0569}}}, + { 0x053a, {1, {0x056a}}}, + { 0x053b, {1, {0x056b}}}, + { 0x053c, {1, {0x056c}}}, + { 0x053d, {1, {0x056d}}}, + { 0x053e, {1, {0x056e}}}, + { 0x053f, {1, {0x056f}}}, + { 0x0540, {1, {0x0570}}}, + { 0x0541, {1, {0x0571}}}, + { 0x0542, {1, {0x0572}}}, + { 0x0543, {1, {0x0573}}}, + { 0x0544, {1, {0x0574}}}, + { 0x0545, {1, {0x0575}}}, + { 0x0546, {1, {0x0576}}}, + { 0x0547, {1, {0x0577}}}, + { 0x0548, {1, {0x0578}}}, + { 0x0549, {1, {0x0579}}}, + { 0x054a, {1, {0x057a}}}, + { 0x054b, {1, {0x057b}}}, + { 0x054c, {1, {0x057c}}}, + { 0x054d, {1, {0x057d}}}, + { 0x054e, {1, {0x057e}}}, + { 0x054f, {1, {0x057f}}}, + { 0x0550, {1, {0x0580}}}, + { 0x0551, {1, {0x0581}}}, + { 0x0552, {1, {0x0582}}}, + { 0x0553, {1, {0x0583}}}, + { 0x0554, {1, {0x0584}}}, + { 0x0555, {1, {0x0585}}}, + { 0x0556, {1, {0x0586}}}, + { 0x0587, {2, {0x0565, 0x0582}}}, + { 0x10a0, {1, {0x2d00}}}, + { 0x10a1, {1, {0x2d01}}}, + { 0x10a2, {1, {0x2d02}}}, + { 0x10a3, {1, {0x2d03}}}, + { 0x10a4, {1, {0x2d04}}}, + { 0x10a5, {1, {0x2d05}}}, + { 0x10a6, {1, {0x2d06}}}, + { 0x10a7, {1, {0x2d07}}}, + { 0x10a8, {1, {0x2d08}}}, + { 0x10a9, {1, {0x2d09}}}, + { 0x10aa, {1, {0x2d0a}}}, + { 0x10ab, {1, {0x2d0b}}}, + { 0x10ac, {1, {0x2d0c}}}, + { 0x10ad, {1, {0x2d0d}}}, + { 0x10ae, {1, {0x2d0e}}}, + { 0x10af, {1, {0x2d0f}}}, + { 0x10b0, {1, {0x2d10}}}, + { 0x10b1, {1, {0x2d11}}}, + { 0x10b2, {1, {0x2d12}}}, + { 0x10b3, {1, {0x2d13}}}, + { 0x10b4, {1, {0x2d14}}}, + { 0x10b5, {1, {0x2d15}}}, + { 0x10b6, {1, {0x2d16}}}, + { 0x10b7, {1, {0x2d17}}}, + { 0x10b8, {1, {0x2d18}}}, + { 0x10b9, {1, {0x2d19}}}, + { 0x10ba, {1, {0x2d1a}}}, + { 0x10bb, {1, {0x2d1b}}}, + { 0x10bc, {1, {0x2d1c}}}, + { 0x10bd, {1, {0x2d1d}}}, + { 0x10be, {1, {0x2d1e}}}, + { 0x10bf, {1, {0x2d1f}}}, + { 0x10c0, {1, {0x2d20}}}, + { 0x10c1, {1, {0x2d21}}}, + { 0x10c2, {1, {0x2d22}}}, + { 0x10c3, {1, {0x2d23}}}, + { 0x10c4, {1, {0x2d24}}}, + { 0x10c5, {1, {0x2d25}}}, + { 0x1e00, {1, {0x1e01}}}, + { 0x1e02, {1, {0x1e03}}}, + { 0x1e04, {1, {0x1e05}}}, + { 0x1e06, {1, {0x1e07}}}, + { 0x1e08, {1, {0x1e09}}}, + { 0x1e0a, {1, {0x1e0b}}}, + { 0x1e0c, {1, {0x1e0d}}}, + { 0x1e0e, {1, {0x1e0f}}}, + { 0x1e10, {1, {0x1e11}}}, + { 0x1e12, {1, {0x1e13}}}, + { 0x1e14, {1, {0x1e15}}}, + { 0x1e16, {1, {0x1e17}}}, + { 0x1e18, {1, {0x1e19}}}, + { 0x1e1a, {1, {0x1e1b}}}, + { 0x1e1c, {1, {0x1e1d}}}, + { 0x1e1e, {1, {0x1e1f}}}, + { 0x1e20, {1, {0x1e21}}}, + { 0x1e22, {1, {0x1e23}}}, + { 0x1e24, {1, {0x1e25}}}, + { 0x1e26, {1, {0x1e27}}}, + { 0x1e28, {1, {0x1e29}}}, + { 0x1e2a, {1, {0x1e2b}}}, + { 0x1e2c, {1, {0x1e2d}}}, + { 0x1e2e, {1, {0x1e2f}}}, + { 0x1e30, {1, {0x1e31}}}, + { 0x1e32, {1, {0x1e33}}}, + { 0x1e34, {1, {0x1e35}}}, + { 0x1e36, {1, {0x1e37}}}, + { 0x1e38, {1, {0x1e39}}}, + { 0x1e3a, {1, {0x1e3b}}}, + { 0x1e3c, {1, {0x1e3d}}}, + { 0x1e3e, {1, {0x1e3f}}}, + { 0x1e40, {1, {0x1e41}}}, + { 0x1e42, {1, {0x1e43}}}, + { 0x1e44, {1, {0x1e45}}}, + { 0x1e46, {1, {0x1e47}}}, + { 0x1e48, {1, {0x1e49}}}, + { 0x1e4a, {1, {0x1e4b}}}, + { 0x1e4c, {1, {0x1e4d}}}, + { 0x1e4e, {1, {0x1e4f}}}, + { 0x1e50, {1, {0x1e51}}}, + { 0x1e52, {1, {0x1e53}}}, + { 0x1e54, {1, {0x1e55}}}, + { 0x1e56, {1, {0x1e57}}}, + { 0x1e58, {1, {0x1e59}}}, + { 0x1e5a, {1, {0x1e5b}}}, + { 0x1e5c, {1, {0x1e5d}}}, + { 0x1e5e, {1, {0x1e5f}}}, + { 0x1e60, {1, {0x1e61}}}, + { 0x1e62, {1, {0x1e63}}}, + { 0x1e64, {1, {0x1e65}}}, + { 0x1e66, {1, {0x1e67}}}, + { 0x1e68, {1, {0x1e69}}}, + { 0x1e6a, {1, {0x1e6b}}}, + { 0x1e6c, {1, {0x1e6d}}}, + { 0x1e6e, {1, {0x1e6f}}}, + { 0x1e70, {1, {0x1e71}}}, + { 0x1e72, {1, {0x1e73}}}, + { 0x1e74, {1, {0x1e75}}}, + { 0x1e76, {1, {0x1e77}}}, + { 0x1e78, {1, {0x1e79}}}, + { 0x1e7a, {1, {0x1e7b}}}, + { 0x1e7c, {1, {0x1e7d}}}, + { 0x1e7e, {1, {0x1e7f}}}, + { 0x1e80, {1, {0x1e81}}}, + { 0x1e82, {1, {0x1e83}}}, + { 0x1e84, {1, {0x1e85}}}, + { 0x1e86, {1, {0x1e87}}}, + { 0x1e88, {1, {0x1e89}}}, + { 0x1e8a, {1, {0x1e8b}}}, + { 0x1e8c, {1, {0x1e8d}}}, + { 0x1e8e, {1, {0x1e8f}}}, + { 0x1e90, {1, {0x1e91}}}, + { 0x1e92, {1, {0x1e93}}}, + { 0x1e94, {1, {0x1e95}}}, + { 0x1e96, {2, {0x0068, 0x0331}}}, + { 0x1e97, {2, {0x0074, 0x0308}}}, + { 0x1e98, {2, {0x0077, 0x030a}}}, + { 0x1e99, {2, {0x0079, 0x030a}}}, + { 0x1e9a, {2, {0x0061, 0x02be}}}, + { 0x1e9b, {1, {0x1e61}}}, + { 0x1ea0, {1, {0x1ea1}}}, + { 0x1ea2, {1, {0x1ea3}}}, + { 0x1ea4, {1, {0x1ea5}}}, + { 0x1ea6, {1, {0x1ea7}}}, + { 0x1ea8, {1, {0x1ea9}}}, + { 0x1eaa, {1, {0x1eab}}}, + { 0x1eac, {1, {0x1ead}}}, + { 0x1eae, {1, {0x1eaf}}}, + { 0x1eb0, {1, {0x1eb1}}}, + { 0x1eb2, {1, {0x1eb3}}}, + { 0x1eb4, {1, {0x1eb5}}}, + { 0x1eb6, {1, {0x1eb7}}}, + { 0x1eb8, {1, {0x1eb9}}}, + { 0x1eba, {1, {0x1ebb}}}, + { 0x1ebc, {1, {0x1ebd}}}, + { 0x1ebe, {1, {0x1ebf}}}, + { 0x1ec0, {1, {0x1ec1}}}, + { 0x1ec2, {1, {0x1ec3}}}, + { 0x1ec4, {1, {0x1ec5}}}, + { 0x1ec6, {1, {0x1ec7}}}, + { 0x1ec8, {1, {0x1ec9}}}, + { 0x1eca, {1, {0x1ecb}}}, + { 0x1ecc, {1, {0x1ecd}}}, + { 0x1ece, {1, {0x1ecf}}}, + { 0x1ed0, {1, {0x1ed1}}}, + { 0x1ed2, {1, {0x1ed3}}}, + { 0x1ed4, {1, {0x1ed5}}}, + { 0x1ed6, {1, {0x1ed7}}}, + { 0x1ed8, {1, {0x1ed9}}}, + { 0x1eda, {1, {0x1edb}}}, + { 0x1edc, {1, {0x1edd}}}, + { 0x1ede, {1, {0x1edf}}}, + { 0x1ee0, {1, {0x1ee1}}}, + { 0x1ee2, {1, {0x1ee3}}}, + { 0x1ee4, {1, {0x1ee5}}}, + { 0x1ee6, {1, {0x1ee7}}}, + { 0x1ee8, {1, {0x1ee9}}}, + { 0x1eea, {1, {0x1eeb}}}, + { 0x1eec, {1, {0x1eed}}}, + { 0x1eee, {1, {0x1eef}}}, + { 0x1ef0, {1, {0x1ef1}}}, + { 0x1ef2, {1, {0x1ef3}}}, + { 0x1ef4, {1, {0x1ef5}}}, + { 0x1ef6, {1, {0x1ef7}}}, + { 0x1ef8, {1, {0x1ef9}}}, + { 0x1f08, {1, {0x1f00}}}, + { 0x1f09, {1, {0x1f01}}}, + { 0x1f0a, {1, {0x1f02}}}, + { 0x1f0b, {1, {0x1f03}}}, + { 0x1f0c, {1, {0x1f04}}}, + { 0x1f0d, {1, {0x1f05}}}, + { 0x1f0e, {1, {0x1f06}}}, + { 0x1f0f, {1, {0x1f07}}}, + { 0x1f18, {1, {0x1f10}}}, + { 0x1f19, {1, {0x1f11}}}, + { 0x1f1a, {1, {0x1f12}}}, + { 0x1f1b, {1, {0x1f13}}}, + { 0x1f1c, {1, {0x1f14}}}, + { 0x1f1d, {1, {0x1f15}}}, + { 0x1f28, {1, {0x1f20}}}, + { 0x1f29, {1, {0x1f21}}}, + { 0x1f2a, {1, {0x1f22}}}, + { 0x1f2b, {1, {0x1f23}}}, + { 0x1f2c, {1, {0x1f24}}}, + { 0x1f2d, {1, {0x1f25}}}, + { 0x1f2e, {1, {0x1f26}}}, + { 0x1f2f, {1, {0x1f27}}}, + { 0x1f38, {1, {0x1f30}}}, + { 0x1f39, {1, {0x1f31}}}, + { 0x1f3a, {1, {0x1f32}}}, + { 0x1f3b, {1, {0x1f33}}}, + { 0x1f3c, {1, {0x1f34}}}, + { 0x1f3d, {1, {0x1f35}}}, + { 0x1f3e, {1, {0x1f36}}}, + { 0x1f3f, {1, {0x1f37}}}, + { 0x1f48, {1, {0x1f40}}}, + { 0x1f49, {1, {0x1f41}}}, + { 0x1f4a, {1, {0x1f42}}}, + { 0x1f4b, {1, {0x1f43}}}, + { 0x1f4c, {1, {0x1f44}}}, + { 0x1f4d, {1, {0x1f45}}}, + { 0x1f50, {2, {0x03c5, 0x0313}}}, + { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}}, + { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}}, + { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}}, + { 0x1f59, {1, {0x1f51}}}, + { 0x1f5b, {1, {0x1f53}}}, + { 0x1f5d, {1, {0x1f55}}}, + { 0x1f5f, {1, {0x1f57}}}, + { 0x1f68, {1, {0x1f60}}}, + { 0x1f69, {1, {0x1f61}}}, + { 0x1f6a, {1, {0x1f62}}}, + { 0x1f6b, {1, {0x1f63}}}, + { 0x1f6c, {1, {0x1f64}}}, + { 0x1f6d, {1, {0x1f65}}}, + { 0x1f6e, {1, {0x1f66}}}, + { 0x1f6f, {1, {0x1f67}}}, + { 0x1f80, {2, {0x1f00, 0x03b9}}}, + { 0x1f81, {2, {0x1f01, 0x03b9}}}, + { 0x1f82, {2, {0x1f02, 0x03b9}}}, + { 0x1f83, {2, {0x1f03, 0x03b9}}}, + { 0x1f84, {2, {0x1f04, 0x03b9}}}, + { 0x1f85, {2, {0x1f05, 0x03b9}}}, + { 0x1f86, {2, {0x1f06, 0x03b9}}}, + { 0x1f87, {2, {0x1f07, 0x03b9}}}, + { 0x1f88, {2, {0x1f00, 0x03b9}}}, + { 0x1f89, {2, {0x1f01, 0x03b9}}}, + { 0x1f8a, {2, {0x1f02, 0x03b9}}}, + { 0x1f8b, {2, {0x1f03, 0x03b9}}}, + { 0x1f8c, {2, {0x1f04, 0x03b9}}}, + { 0x1f8d, {2, {0x1f05, 0x03b9}}}, + { 0x1f8e, {2, {0x1f06, 0x03b9}}}, + { 0x1f8f, {2, {0x1f07, 0x03b9}}}, + { 0x1f90, {2, {0x1f20, 0x03b9}}}, + { 0x1f91, {2, {0x1f21, 0x03b9}}}, + { 0x1f92, {2, {0x1f22, 0x03b9}}}, + { 0x1f93, {2, {0x1f23, 0x03b9}}}, + { 0x1f94, {2, {0x1f24, 0x03b9}}}, + { 0x1f95, {2, {0x1f25, 0x03b9}}}, + { 0x1f96, {2, {0x1f26, 0x03b9}}}, + { 0x1f97, {2, {0x1f27, 0x03b9}}}, + { 0x1f98, {2, {0x1f20, 0x03b9}}}, + { 0x1f99, {2, {0x1f21, 0x03b9}}}, + { 0x1f9a, {2, {0x1f22, 0x03b9}}}, + { 0x1f9b, {2, {0x1f23, 0x03b9}}}, + { 0x1f9c, {2, {0x1f24, 0x03b9}}}, + { 0x1f9d, {2, {0x1f25, 0x03b9}}}, + { 0x1f9e, {2, {0x1f26, 0x03b9}}}, + { 0x1f9f, {2, {0x1f27, 0x03b9}}}, + { 0x1fa0, {2, {0x1f60, 0x03b9}}}, + { 0x1fa1, {2, {0x1f61, 0x03b9}}}, + { 0x1fa2, {2, {0x1f62, 0x03b9}}}, + { 0x1fa3, {2, {0x1f63, 0x03b9}}}, + { 0x1fa4, {2, {0x1f64, 0x03b9}}}, + { 0x1fa5, {2, {0x1f65, 0x03b9}}}, + { 0x1fa6, {2, {0x1f66, 0x03b9}}}, + { 0x1fa7, {2, {0x1f67, 0x03b9}}}, + { 0x1fa8, {2, {0x1f60, 0x03b9}}}, + { 0x1fa9, {2, {0x1f61, 0x03b9}}}, + { 0x1faa, {2, {0x1f62, 0x03b9}}}, + { 0x1fab, {2, {0x1f63, 0x03b9}}}, + { 0x1fac, {2, {0x1f64, 0x03b9}}}, + { 0x1fad, {2, {0x1f65, 0x03b9}}}, + { 0x1fae, {2, {0x1f66, 0x03b9}}}, + { 0x1faf, {2, {0x1f67, 0x03b9}}}, + { 0x1fb2, {2, {0x1f70, 0x03b9}}}, + { 0x1fb3, {2, {0x03b1, 0x03b9}}}, + { 0x1fb4, {2, {0x03ac, 0x03b9}}}, + { 0x1fb6, {2, {0x03b1, 0x0342}}}, + { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}}, + { 0x1fb8, {1, {0x1fb0}}}, + { 0x1fb9, {1, {0x1fb1}}}, + { 0x1fba, {1, {0x1f70}}}, + { 0x1fbb, {1, {0x1f71}}}, + { 0x1fbc, {2, {0x03b1, 0x03b9}}}, + { 0x1fbe, {1, {0x03b9}}}, + { 0x1fc2, {2, {0x1f74, 0x03b9}}}, + { 0x1fc3, {2, {0x03b7, 0x03b9}}}, + { 0x1fc4, {2, {0x03ae, 0x03b9}}}, + { 0x1fc6, {2, {0x03b7, 0x0342}}}, + { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}}, + { 0x1fc8, {1, {0x1f72}}}, + { 0x1fc9, {1, {0x1f73}}}, + { 0x1fca, {1, {0x1f74}}}, + { 0x1fcb, {1, {0x1f75}}}, + { 0x1fcc, {2, {0x03b7, 0x03b9}}}, + { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}}, + { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}}, + { 0x1fd6, {2, {0x03b9, 0x0342}}}, + { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}}, + { 0x1fd8, {1, {0x1fd0}}}, + { 0x1fd9, {1, {0x1fd1}}}, + { 0x1fda, {1, {0x1f76}}}, + { 0x1fdb, {1, {0x1f77}}}, + { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}}, + { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}}, + { 0x1fe4, {2, {0x03c1, 0x0313}}}, + { 0x1fe6, {2, {0x03c5, 0x0342}}}, + { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}}, + { 0x1fe8, {1, {0x1fe0}}}, + { 0x1fe9, {1, {0x1fe1}}}, + { 0x1fea, {1, {0x1f7a}}}, + { 0x1feb, {1, {0x1f7b}}}, + { 0x1fec, {1, {0x1fe5}}}, + { 0x1ff2, {2, {0x1f7c, 0x03b9}}}, + { 0x1ff3, {2, {0x03c9, 0x03b9}}}, + { 0x1ff4, {2, {0x03ce, 0x03b9}}}, + { 0x1ff6, {2, {0x03c9, 0x0342}}}, + { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}}, + { 0x1ff8, {1, {0x1f78}}}, + { 0x1ff9, {1, {0x1f79}}}, + { 0x1ffa, {1, {0x1f7c}}}, + { 0x1ffb, {1, {0x1f7d}}}, + { 0x1ffc, {2, {0x03c9, 0x03b9}}}, + { 0x2126, {1, {0x03c9}}}, + { 0x212a, {1, {0x006b}}}, + { 0x212b, {1, {0x00e5}}}, + { 0x2160, {1, {0x2170}}}, + { 0x2161, {1, {0x2171}}}, + { 0x2162, {1, {0x2172}}}, + { 0x2163, {1, {0x2173}}}, + { 0x2164, {1, {0x2174}}}, + { 0x2165, {1, {0x2175}}}, + { 0x2166, {1, {0x2176}}}, + { 0x2167, {1, {0x2177}}}, + { 0x2168, {1, {0x2178}}}, + { 0x2169, {1, {0x2179}}}, + { 0x216a, {1, {0x217a}}}, + { 0x216b, {1, {0x217b}}}, + { 0x216c, {1, {0x217c}}}, + { 0x216d, {1, {0x217d}}}, + { 0x216e, {1, {0x217e}}}, + { 0x216f, {1, {0x217f}}}, + { 0x24b6, {1, {0x24d0}}}, + { 0x24b7, {1, {0x24d1}}}, + { 0x24b8, {1, {0x24d2}}}, + { 0x24b9, {1, {0x24d3}}}, + { 0x24ba, {1, {0x24d4}}}, + { 0x24bb, {1, {0x24d5}}}, + { 0x24bc, {1, {0x24d6}}}, + { 0x24bd, {1, {0x24d7}}}, + { 0x24be, {1, {0x24d8}}}, + { 0x24bf, {1, {0x24d9}}}, + { 0x24c0, {1, {0x24da}}}, + { 0x24c1, {1, {0x24db}}}, + { 0x24c2, {1, {0x24dc}}}, + { 0x24c3, {1, {0x24dd}}}, + { 0x24c4, {1, {0x24de}}}, + { 0x24c5, {1, {0x24df}}}, + { 0x24c6, {1, {0x24e0}}}, + { 0x24c7, {1, {0x24e1}}}, + { 0x24c8, {1, {0x24e2}}}, + { 0x24c9, {1, {0x24e3}}}, + { 0x24ca, {1, {0x24e4}}}, + { 0x24cb, {1, {0x24e5}}}, + { 0x24cc, {1, {0x24e6}}}, + { 0x24cd, {1, {0x24e7}}}, + { 0x24ce, {1, {0x24e8}}}, + { 0x24cf, {1, {0x24e9}}}, + { 0x2c00, {1, {0x2c30}}}, + { 0x2c01, {1, {0x2c31}}}, + { 0x2c02, {1, {0x2c32}}}, + { 0x2c03, {1, {0x2c33}}}, + { 0x2c04, {1, {0x2c34}}}, + { 0x2c05, {1, {0x2c35}}}, + { 0x2c06, {1, {0x2c36}}}, + { 0x2c07, {1, {0x2c37}}}, + { 0x2c08, {1, {0x2c38}}}, + { 0x2c09, {1, {0x2c39}}}, + { 0x2c0a, {1, {0x2c3a}}}, + { 0x2c0b, {1, {0x2c3b}}}, + { 0x2c0c, {1, {0x2c3c}}}, + { 0x2c0d, {1, {0x2c3d}}}, + { 0x2c0e, {1, {0x2c3e}}}, + { 0x2c0f, {1, {0x2c3f}}}, + { 0x2c10, {1, {0x2c40}}}, + { 0x2c11, {1, {0x2c41}}}, + { 0x2c12, {1, {0x2c42}}}, + { 0x2c13, {1, {0x2c43}}}, + { 0x2c14, {1, {0x2c44}}}, + { 0x2c15, {1, {0x2c45}}}, + { 0x2c16, {1, {0x2c46}}}, + { 0x2c17, {1, {0x2c47}}}, + { 0x2c18, {1, {0x2c48}}}, + { 0x2c19, {1, {0x2c49}}}, + { 0x2c1a, {1, {0x2c4a}}}, + { 0x2c1b, {1, {0x2c4b}}}, + { 0x2c1c, {1, {0x2c4c}}}, + { 0x2c1d, {1, {0x2c4d}}}, + { 0x2c1e, {1, {0x2c4e}}}, + { 0x2c1f, {1, {0x2c4f}}}, + { 0x2c20, {1, {0x2c50}}}, + { 0x2c21, {1, {0x2c51}}}, + { 0x2c22, {1, {0x2c52}}}, + { 0x2c23, {1, {0x2c53}}}, + { 0x2c24, {1, {0x2c54}}}, + { 0x2c25, {1, {0x2c55}}}, + { 0x2c26, {1, {0x2c56}}}, + { 0x2c27, {1, {0x2c57}}}, + { 0x2c28, {1, {0x2c58}}}, + { 0x2c29, {1, {0x2c59}}}, + { 0x2c2a, {1, {0x2c5a}}}, + { 0x2c2b, {1, {0x2c5b}}}, + { 0x2c2c, {1, {0x2c5c}}}, + { 0x2c2d, {1, {0x2c5d}}}, + { 0x2c2e, {1, {0x2c5e}}}, + { 0x2c80, {1, {0x2c81}}}, + { 0x2c82, {1, {0x2c83}}}, + { 0x2c84, {1, {0x2c85}}}, + { 0x2c86, {1, {0x2c87}}}, + { 0x2c88, {1, {0x2c89}}}, + { 0x2c8a, {1, {0x2c8b}}}, + { 0x2c8c, {1, {0x2c8d}}}, + { 0x2c8e, {1, {0x2c8f}}}, + { 0x2c90, {1, {0x2c91}}}, + { 0x2c92, {1, {0x2c93}}}, + { 0x2c94, {1, {0x2c95}}}, + { 0x2c96, {1, {0x2c97}}}, + { 0x2c98, {1, {0x2c99}}}, + { 0x2c9a, {1, {0x2c9b}}}, + { 0x2c9c, {1, {0x2c9d}}}, + { 0x2c9e, {1, {0x2c9f}}}, + { 0x2ca0, {1, {0x2ca1}}}, + { 0x2ca2, {1, {0x2ca3}}}, + { 0x2ca4, {1, {0x2ca5}}}, + { 0x2ca6, {1, {0x2ca7}}}, + { 0x2ca8, {1, {0x2ca9}}}, + { 0x2caa, {1, {0x2cab}}}, + { 0x2cac, {1, {0x2cad}}}, + { 0x2cae, {1, {0x2caf}}}, + { 0x2cb0, {1, {0x2cb1}}}, + { 0x2cb2, {1, {0x2cb3}}}, + { 0x2cb4, {1, {0x2cb5}}}, + { 0x2cb6, {1, {0x2cb7}}}, + { 0x2cb8, {1, {0x2cb9}}}, + { 0x2cba, {1, {0x2cbb}}}, + { 0x2cbc, {1, {0x2cbd}}}, + { 0x2cbe, {1, {0x2cbf}}}, + { 0x2cc0, {1, {0x2cc1}}}, + { 0x2cc2, {1, {0x2cc3}}}, + { 0x2cc4, {1, {0x2cc5}}}, + { 0x2cc6, {1, {0x2cc7}}}, + { 0x2cc8, {1, {0x2cc9}}}, + { 0x2cca, {1, {0x2ccb}}}, + { 0x2ccc, {1, {0x2ccd}}}, + { 0x2cce, {1, {0x2ccf}}}, + { 0x2cd0, {1, {0x2cd1}}}, + { 0x2cd2, {1, {0x2cd3}}}, + { 0x2cd4, {1, {0x2cd5}}}, + { 0x2cd6, {1, {0x2cd7}}}, + { 0x2cd8, {1, {0x2cd9}}}, + { 0x2cda, {1, {0x2cdb}}}, + { 0x2cdc, {1, {0x2cdd}}}, + { 0x2cde, {1, {0x2cdf}}}, + { 0x2ce0, {1, {0x2ce1}}}, + { 0x2ce2, {1, {0x2ce3}}}, + { 0xfb00, {2, {0x0066, 0x0066}}}, + { 0xfb01, {2, {0x0066, 0x0069}}}, + { 0xfb02, {2, {0x0066, 0x006c}}}, + { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}}, + { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}}, + { 0xfb05, {2, {0x0073, 0x0074}}}, + { 0xfb06, {2, {0x0073, 0x0074}}}, + { 0xfb13, {2, {0x0574, 0x0576}}}, + { 0xfb14, {2, {0x0574, 0x0565}}}, + { 0xfb15, {2, {0x0574, 0x056b}}}, + { 0xfb16, {2, {0x057e, 0x0576}}}, + { 0xfb17, {2, {0x0574, 0x056d}}}, + { 0xff21, {1, {0xff41}}}, + { 0xff22, {1, {0xff42}}}, + { 0xff23, {1, {0xff43}}}, + { 0xff24, {1, {0xff44}}}, + { 0xff25, {1, {0xff45}}}, + { 0xff26, {1, {0xff46}}}, + { 0xff27, {1, {0xff47}}}, + { 0xff28, {1, {0xff48}}}, + { 0xff29, {1, {0xff49}}}, + { 0xff2a, {1, {0xff4a}}}, + { 0xff2b, {1, {0xff4b}}}, + { 0xff2c, {1, {0xff4c}}}, + { 0xff2d, {1, {0xff4d}}}, + { 0xff2e, {1, {0xff4e}}}, + { 0xff2f, {1, {0xff4f}}}, + { 0xff30, {1, {0xff50}}}, + { 0xff31, {1, {0xff51}}}, + { 0xff32, {1, {0xff52}}}, + { 0xff33, {1, {0xff53}}}, + { 0xff34, {1, {0xff54}}}, + { 0xff35, {1, {0xff55}}}, + { 0xff36, {1, {0xff56}}}, + { 0xff37, {1, {0xff57}}}, + { 0xff38, {1, {0xff58}}}, + { 0xff39, {1, {0xff59}}}, + { 0xff3a, {1, {0xff5a}}}, + { 0x10400, {1, {0x10428}}}, + { 0x10401, {1, {0x10429}}}, + { 0x10402, {1, {0x1042a}}}, + { 0x10403, {1, {0x1042b}}}, + { 0x10404, {1, {0x1042c}}}, + { 0x10405, {1, {0x1042d}}}, + { 0x10406, {1, {0x1042e}}}, + { 0x10407, {1, {0x1042f}}}, + { 0x10408, {1, {0x10430}}}, + { 0x10409, {1, {0x10431}}}, + { 0x1040a, {1, {0x10432}}}, + { 0x1040b, {1, {0x10433}}}, + { 0x1040c, {1, {0x10434}}}, + { 0x1040d, {1, {0x10435}}}, + { 0x1040e, {1, {0x10436}}}, + { 0x1040f, {1, {0x10437}}}, + { 0x10410, {1, {0x10438}}}, + { 0x10411, {1, {0x10439}}}, + { 0x10412, {1, {0x1043a}}}, + { 0x10413, {1, {0x1043b}}}, + { 0x10414, {1, {0x1043c}}}, + { 0x10415, {1, {0x1043d}}}, + { 0x10416, {1, {0x1043e}}}, + { 0x10417, {1, {0x1043f}}}, + { 0x10418, {1, {0x10440}}}, + { 0x10419, {1, {0x10441}}}, + { 0x1041a, {1, {0x10442}}}, + { 0x1041b, {1, {0x10443}}}, + { 0x1041c, {1, {0x10444}}}, + { 0x1041d, {1, {0x10445}}}, + { 0x1041e, {1, {0x10446}}}, + { 0x1041f, {1, {0x10447}}}, + { 0x10420, {1, {0x10448}}}, + { 0x10421, {1, {0x10449}}}, + { 0x10422, {1, {0x1044a}}}, + { 0x10423, {1, {0x1044b}}}, + { 0x10424, {1, {0x1044c}}}, + { 0x10425, {1, {0x1044d}}}, + { 0x10426, {1, {0x1044e}}}, + { 0x10427, {1, {0x1044f}}} +}; + +static const CaseFold_11_Type CaseFold_Locale[] = { + { 0x0049, {1, {0x0069}}}, + { 0x0130, {2, {0x0069, 0x0307}}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11[] = { + { 0x0061, {1, {0x0041 }}}, + { 0x0062, {1, {0x0042 }}}, + { 0x0063, {1, {0x0043 }}}, + { 0x0064, {1, {0x0044 }}}, + { 0x0065, {1, {0x0045 }}}, + { 0x0066, {1, {0x0046 }}}, + { 0x0067, {1, {0x0047 }}}, + { 0x0068, {1, {0x0048 }}}, + { 0x006a, {1, {0x004a }}}, + { 0x006b, {2, {0x212a, 0x004b }}}, + { 0x006c, {1, {0x004c }}}, + { 0x006d, {1, {0x004d }}}, + { 0x006e, {1, {0x004e }}}, + { 0x006f, {1, {0x004f }}}, + { 0x0070, {1, {0x0050 }}}, + { 0x0071, {1, {0x0051 }}}, + { 0x0072, {1, {0x0052 }}}, + { 0x0073, {2, {0x0053, 0x017f }}}, + { 0x0074, {1, {0x0054 }}}, + { 0x0075, {1, {0x0055 }}}, + { 0x0076, {1, {0x0056 }}}, + { 0x0077, {1, {0x0057 }}}, + { 0x0078, {1, {0x0058 }}}, + { 0x0079, {1, {0x0059 }}}, + { 0x007a, {1, {0x005a }}}, + { 0x00e0, {1, {0x00c0 }}}, + { 0x00e1, {1, {0x00c1 }}}, + { 0x00e2, {1, {0x00c2 }}}, + { 0x00e3, {1, {0x00c3 }}}, + { 0x00e4, {1, {0x00c4 }}}, + { 0x00e5, {2, {0x212b, 0x00c5 }}}, + { 0x00e6, {1, {0x00c6 }}}, + { 0x00e7, {1, {0x00c7 }}}, + { 0x00e8, {1, {0x00c8 }}}, + { 0x00e9, {1, {0x00c9 }}}, + { 0x00ea, {1, {0x00ca }}}, + { 0x00eb, {1, {0x00cb }}}, + { 0x00ec, {1, {0x00cc }}}, + { 0x00ed, {1, {0x00cd }}}, + { 0x00ee, {1, {0x00ce }}}, + { 0x00ef, {1, {0x00cf }}}, + { 0x00f0, {1, {0x00d0 }}}, + { 0x00f1, {1, {0x00d1 }}}, + { 0x00f2, {1, {0x00d2 }}}, + { 0x00f3, {1, {0x00d3 }}}, + { 0x00f4, {1, {0x00d4 }}}, + { 0x00f5, {1, {0x00d5 }}}, + { 0x00f6, {1, {0x00d6 }}}, + { 0x00f8, {1, {0x00d8 }}}, + { 0x00f9, {1, {0x00d9 }}}, + { 0x00fa, {1, {0x00da }}}, + { 0x00fb, {1, {0x00db }}}, + { 0x00fc, {1, {0x00dc }}}, + { 0x00fd, {1, {0x00dd }}}, + { 0x00fe, {1, {0x00de }}}, + { 0x00ff, {1, {0x0178 }}}, + { 0x0101, {1, {0x0100 }}}, + { 0x0103, {1, {0x0102 }}}, + { 0x0105, {1, {0x0104 }}}, + { 0x0107, {1, {0x0106 }}}, + { 0x0109, {1, {0x0108 }}}, + { 0x010b, {1, {0x010a }}}, + { 0x010d, {1, {0x010c }}}, + { 0x010f, {1, {0x010e }}}, + { 0x0111, {1, {0x0110 }}}, + { 0x0113, {1, {0x0112 }}}, + { 0x0115, {1, {0x0114 }}}, + { 0x0117, {1, {0x0116 }}}, + { 0x0119, {1, {0x0118 }}}, + { 0x011b, {1, {0x011a }}}, + { 0x011d, {1, {0x011c }}}, + { 0x011f, {1, {0x011e }}}, + { 0x0121, {1, {0x0120 }}}, + { 0x0123, {1, {0x0122 }}}, + { 0x0125, {1, {0x0124 }}}, + { 0x0127, {1, {0x0126 }}}, + { 0x0129, {1, {0x0128 }}}, + { 0x012b, {1, {0x012a }}}, + { 0x012d, {1, {0x012c }}}, + { 0x012f, {1, {0x012e }}}, + { 0x0133, {1, {0x0132 }}}, + { 0x0135, {1, {0x0134 }}}, + { 0x0137, {1, {0x0136 }}}, + { 0x013a, {1, {0x0139 }}}, + { 0x013c, {1, {0x013b }}}, + { 0x013e, {1, {0x013d }}}, + { 0x0140, {1, {0x013f }}}, + { 0x0142, {1, {0x0141 }}}, + { 0x0144, {1, {0x0143 }}}, + { 0x0146, {1, {0x0145 }}}, + { 0x0148, {1, {0x0147 }}}, + { 0x014b, {1, {0x014a }}}, + { 0x014d, {1, {0x014c }}}, + { 0x014f, {1, {0x014e }}}, + { 0x0151, {1, {0x0150 }}}, + { 0x0153, {1, {0x0152 }}}, + { 0x0155, {1, {0x0154 }}}, + { 0x0157, {1, {0x0156 }}}, + { 0x0159, {1, {0x0158 }}}, + { 0x015b, {1, {0x015a }}}, + { 0x015d, {1, {0x015c }}}, + { 0x015f, {1, {0x015e }}}, + { 0x0161, {1, {0x0160 }}}, + { 0x0163, {1, {0x0162 }}}, + { 0x0165, {1, {0x0164 }}}, + { 0x0167, {1, {0x0166 }}}, + { 0x0169, {1, {0x0168 }}}, + { 0x016b, {1, {0x016a }}}, + { 0x016d, {1, {0x016c }}}, + { 0x016f, {1, {0x016e }}}, + { 0x0171, {1, {0x0170 }}}, + { 0x0173, {1, {0x0172 }}}, + { 0x0175, {1, {0x0174 }}}, + { 0x0177, {1, {0x0176 }}}, + { 0x017a, {1, {0x0179 }}}, + { 0x017c, {1, {0x017b }}}, + { 0x017e, {1, {0x017d }}}, + { 0x0183, {1, {0x0182 }}}, + { 0x0185, {1, {0x0184 }}}, + { 0x0188, {1, {0x0187 }}}, + { 0x018c, {1, {0x018b }}}, + { 0x0192, {1, {0x0191 }}}, + { 0x0195, {1, {0x01f6 }}}, + { 0x0199, {1, {0x0198 }}}, + { 0x019a, {1, {0x023d }}}, + { 0x019e, {1, {0x0220 }}}, + { 0x01a1, {1, {0x01a0 }}}, + { 0x01a3, {1, {0x01a2 }}}, + { 0x01a5, {1, {0x01a4 }}}, + { 0x01a8, {1, {0x01a7 }}}, + { 0x01ad, {1, {0x01ac }}}, + { 0x01b0, {1, {0x01af }}}, + { 0x01b4, {1, {0x01b3 }}}, + { 0x01b6, {1, {0x01b5 }}}, + { 0x01b9, {1, {0x01b8 }}}, + { 0x01bd, {1, {0x01bc }}}, + { 0x01bf, {1, {0x01f7 }}}, + { 0x01c6, {2, {0x01c4, 0x01c5 }}}, + { 0x01c9, {2, {0x01c7, 0x01c8 }}}, + { 0x01cc, {2, {0x01ca, 0x01cb }}}, + { 0x01ce, {1, {0x01cd }}}, + { 0x01d0, {1, {0x01cf }}}, + { 0x01d2, {1, {0x01d1 }}}, + { 0x01d4, {1, {0x01d3 }}}, + { 0x01d6, {1, {0x01d5 }}}, + { 0x01d8, {1, {0x01d7 }}}, + { 0x01da, {1, {0x01d9 }}}, + { 0x01dc, {1, {0x01db }}}, + { 0x01dd, {1, {0x018e }}}, + { 0x01df, {1, {0x01de }}}, + { 0x01e1, {1, {0x01e0 }}}, + { 0x01e3, {1, {0x01e2 }}}, + { 0x01e5, {1, {0x01e4 }}}, + { 0x01e7, {1, {0x01e6 }}}, + { 0x01e9, {1, {0x01e8 }}}, + { 0x01eb, {1, {0x01ea }}}, + { 0x01ed, {1, {0x01ec }}}, + { 0x01ef, {1, {0x01ee }}}, + { 0x01f3, {2, {0x01f1, 0x01f2 }}}, + { 0x01f5, {1, {0x01f4 }}}, + { 0x01f9, {1, {0x01f8 }}}, + { 0x01fb, {1, {0x01fa }}}, + { 0x01fd, {1, {0x01fc }}}, + { 0x01ff, {1, {0x01fe }}}, + { 0x0201, {1, {0x0200 }}}, + { 0x0203, {1, {0x0202 }}}, + { 0x0205, {1, {0x0204 }}}, + { 0x0207, {1, {0x0206 }}}, + { 0x0209, {1, {0x0208 }}}, + { 0x020b, {1, {0x020a }}}, + { 0x020d, {1, {0x020c }}}, + { 0x020f, {1, {0x020e }}}, + { 0x0211, {1, {0x0210 }}}, + { 0x0213, {1, {0x0212 }}}, + { 0x0215, {1, {0x0214 }}}, + { 0x0217, {1, {0x0216 }}}, + { 0x0219, {1, {0x0218 }}}, + { 0x021b, {1, {0x021a }}}, + { 0x021d, {1, {0x021c }}}, + { 0x021f, {1, {0x021e }}}, + { 0x0223, {1, {0x0222 }}}, + { 0x0225, {1, {0x0224 }}}, + { 0x0227, {1, {0x0226 }}}, + { 0x0229, {1, {0x0228 }}}, + { 0x022b, {1, {0x022a }}}, + { 0x022d, {1, {0x022c }}}, + { 0x022f, {1, {0x022e }}}, + { 0x0231, {1, {0x0230 }}}, + { 0x0233, {1, {0x0232 }}}, + { 0x023c, {1, {0x023b }}}, + { 0x0253, {1, {0x0181 }}}, + { 0x0254, {1, {0x0186 }}}, + { 0x0256, {1, {0x0189 }}}, + { 0x0257, {1, {0x018a }}}, + { 0x0259, {1, {0x018f }}}, + { 0x025b, {1, {0x0190 }}}, + { 0x0260, {1, {0x0193 }}}, + { 0x0263, {1, {0x0194 }}}, + { 0x0268, {1, {0x0197 }}}, + { 0x0269, {1, {0x0196 }}}, + { 0x026f, {1, {0x019c }}}, + { 0x0272, {1, {0x019d }}}, + { 0x0275, {1, {0x019f }}}, + { 0x0280, {1, {0x01a6 }}}, + { 0x0283, {1, {0x01a9 }}}, + { 0x0288, {1, {0x01ae }}}, + { 0x028a, {1, {0x01b1 }}}, + { 0x028b, {1, {0x01b2 }}}, + { 0x0292, {1, {0x01b7 }}}, + { 0x0294, {1, {0x0241 }}}, + { 0x03ac, {1, {0x0386 }}}, + { 0x03ad, {1, {0x0388 }}}, + { 0x03ae, {1, {0x0389 }}}, + { 0x03af, {1, {0x038a }}}, + { 0x03b1, {1, {0x0391 }}}, + { 0x03b2, {2, {0x0392, 0x03d0 }}}, + { 0x03b3, {1, {0x0393 }}}, + { 0x03b4, {1, {0x0394 }}}, + { 0x03b5, {2, {0x03f5, 0x0395 }}}, + { 0x03b6, {1, {0x0396 }}}, + { 0x03b7, {1, {0x0397 }}}, + { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}}, + { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}}, + { 0x03ba, {2, {0x03f0, 0x039a }}}, + { 0x03bb, {1, {0x039b }}}, + { 0x03bc, {2, {0x00b5, 0x039c }}}, + { 0x03bd, {1, {0x039d }}}, + { 0x03be, {1, {0x039e }}}, + { 0x03bf, {1, {0x039f }}}, + { 0x03c0, {2, {0x03a0, 0x03d6 }}}, + { 0x03c1, {2, {0x03f1, 0x03a1 }}}, + { 0x03c3, {2, {0x03a3, 0x03c2 }}}, + { 0x03c4, {1, {0x03a4 }}}, + { 0x03c5, {1, {0x03a5 }}}, + { 0x03c6, {2, {0x03a6, 0x03d5 }}}, + { 0x03c7, {1, {0x03a7 }}}, + { 0x03c8, {1, {0x03a8 }}}, + { 0x03c9, {2, {0x03a9, 0x2126 }}}, + { 0x03ca, {1, {0x03aa }}}, + { 0x03cb, {1, {0x03ab }}}, + { 0x03cc, {1, {0x038c }}}, + { 0x03cd, {1, {0x038e }}}, + { 0x03ce, {1, {0x038f }}}, + { 0x03d9, {1, {0x03d8 }}}, + { 0x03db, {1, {0x03da }}}, + { 0x03dd, {1, {0x03dc }}}, + { 0x03df, {1, {0x03de }}}, + { 0x03e1, {1, {0x03e0 }}}, + { 0x03e3, {1, {0x03e2 }}}, + { 0x03e5, {1, {0x03e4 }}}, + { 0x03e7, {1, {0x03e6 }}}, + { 0x03e9, {1, {0x03e8 }}}, + { 0x03eb, {1, {0x03ea }}}, + { 0x03ed, {1, {0x03ec }}}, + { 0x03ef, {1, {0x03ee }}}, + { 0x03f2, {1, {0x03f9 }}}, + { 0x03f8, {1, {0x03f7 }}}, + { 0x03fb, {1, {0x03fa }}}, + { 0x0430, {1, {0x0410 }}}, + { 0x0431, {1, {0x0411 }}}, + { 0x0432, {1, {0x0412 }}}, + { 0x0433, {1, {0x0413 }}}, + { 0x0434, {1, {0x0414 }}}, + { 0x0435, {1, {0x0415 }}}, + { 0x0436, {1, {0x0416 }}}, + { 0x0437, {1, {0x0417 }}}, + { 0x0438, {1, {0x0418 }}}, + { 0x0439, {1, {0x0419 }}}, + { 0x043a, {1, {0x041a }}}, + { 0x043b, {1, {0x041b }}}, + { 0x043c, {1, {0x041c }}}, + { 0x043d, {1, {0x041d }}}, + { 0x043e, {1, {0x041e }}}, + { 0x043f, {1, {0x041f }}}, + { 0x0440, {1, {0x0420 }}}, + { 0x0441, {1, {0x0421 }}}, + { 0x0442, {1, {0x0422 }}}, + { 0x0443, {1, {0x0423 }}}, + { 0x0444, {1, {0x0424 }}}, + { 0x0445, {1, {0x0425 }}}, + { 0x0446, {1, {0x0426 }}}, + { 0x0447, {1, {0x0427 }}}, + { 0x0448, {1, {0x0428 }}}, + { 0x0449, {1, {0x0429 }}}, + { 0x044a, {1, {0x042a }}}, + { 0x044b, {1, {0x042b }}}, + { 0x044c, {1, {0x042c }}}, + { 0x044d, {1, {0x042d }}}, + { 0x044e, {1, {0x042e }}}, + { 0x044f, {1, {0x042f }}}, + { 0x0450, {1, {0x0400 }}}, + { 0x0451, {1, {0x0401 }}}, + { 0x0452, {1, {0x0402 }}}, + { 0x0453, {1, {0x0403 }}}, + { 0x0454, {1, {0x0404 }}}, + { 0x0455, {1, {0x0405 }}}, + { 0x0456, {1, {0x0406 }}}, + { 0x0457, {1, {0x0407 }}}, + { 0x0458, {1, {0x0408 }}}, + { 0x0459, {1, {0x0409 }}}, + { 0x045a, {1, {0x040a }}}, + { 0x045b, {1, {0x040b }}}, + { 0x045c, {1, {0x040c }}}, + { 0x045d, {1, {0x040d }}}, + { 0x045e, {1, {0x040e }}}, + { 0x045f, {1, {0x040f }}}, + { 0x0461, {1, {0x0460 }}}, + { 0x0463, {1, {0x0462 }}}, + { 0x0465, {1, {0x0464 }}}, + { 0x0467, {1, {0x0466 }}}, + { 0x0469, {1, {0x0468 }}}, + { 0x046b, {1, {0x046a }}}, + { 0x046d, {1, {0x046c }}}, + { 0x046f, {1, {0x046e }}}, + { 0x0471, {1, {0x0470 }}}, + { 0x0473, {1, {0x0472 }}}, + { 0x0475, {1, {0x0474 }}}, + { 0x0477, {1, {0x0476 }}}, + { 0x0479, {1, {0x0478 }}}, + { 0x047b, {1, {0x047a }}}, + { 0x047d, {1, {0x047c }}}, + { 0x047f, {1, {0x047e }}}, + { 0x0481, {1, {0x0480 }}}, + { 0x048b, {1, {0x048a }}}, + { 0x048d, {1, {0x048c }}}, + { 0x048f, {1, {0x048e }}}, + { 0x0491, {1, {0x0490 }}}, + { 0x0493, {1, {0x0492 }}}, + { 0x0495, {1, {0x0494 }}}, + { 0x0497, {1, {0x0496 }}}, + { 0x0499, {1, {0x0498 }}}, + { 0x049b, {1, {0x049a }}}, + { 0x049d, {1, {0x049c }}}, + { 0x049f, {1, {0x049e }}}, + { 0x04a1, {1, {0x04a0 }}}, + { 0x04a3, {1, {0x04a2 }}}, + { 0x04a5, {1, {0x04a4 }}}, + { 0x04a7, {1, {0x04a6 }}}, + { 0x04a9, {1, {0x04a8 }}}, + { 0x04ab, {1, {0x04aa }}}, + { 0x04ad, {1, {0x04ac }}}, + { 0x04af, {1, {0x04ae }}}, + { 0x04b1, {1, {0x04b0 }}}, + { 0x04b3, {1, {0x04b2 }}}, + { 0x04b5, {1, {0x04b4 }}}, + { 0x04b7, {1, {0x04b6 }}}, + { 0x04b9, {1, {0x04b8 }}}, + { 0x04bb, {1, {0x04ba }}}, + { 0x04bd, {1, {0x04bc }}}, + { 0x04bf, {1, {0x04be }}}, + { 0x04c2, {1, {0x04c1 }}}, + { 0x04c4, {1, {0x04c3 }}}, + { 0x04c6, {1, {0x04c5 }}}, + { 0x04c8, {1, {0x04c7 }}}, + { 0x04ca, {1, {0x04c9 }}}, + { 0x04cc, {1, {0x04cb }}}, + { 0x04ce, {1, {0x04cd }}}, + { 0x04d1, {1, {0x04d0 }}}, + { 0x04d3, {1, {0x04d2 }}}, + { 0x04d5, {1, {0x04d4 }}}, + { 0x04d7, {1, {0x04d6 }}}, + { 0x04d9, {1, {0x04d8 }}}, + { 0x04db, {1, {0x04da }}}, + { 0x04dd, {1, {0x04dc }}}, + { 0x04df, {1, {0x04de }}}, + { 0x04e1, {1, {0x04e0 }}}, + { 0x04e3, {1, {0x04e2 }}}, + { 0x04e5, {1, {0x04e4 }}}, + { 0x04e7, {1, {0x04e6 }}}, + { 0x04e9, {1, {0x04e8 }}}, + { 0x04eb, {1, {0x04ea }}}, + { 0x04ed, {1, {0x04ec }}}, + { 0x04ef, {1, {0x04ee }}}, + { 0x04f1, {1, {0x04f0 }}}, + { 0x04f3, {1, {0x04f2 }}}, + { 0x04f5, {1, {0x04f4 }}}, + { 0x04f7, {1, {0x04f6 }}}, + { 0x04f9, {1, {0x04f8 }}}, + { 0x0501, {1, {0x0500 }}}, + { 0x0503, {1, {0x0502 }}}, + { 0x0505, {1, {0x0504 }}}, + { 0x0507, {1, {0x0506 }}}, + { 0x0509, {1, {0x0508 }}}, + { 0x050b, {1, {0x050a }}}, + { 0x050d, {1, {0x050c }}}, + { 0x050f, {1, {0x050e }}}, + { 0x0561, {1, {0x0531 }}}, + { 0x0562, {1, {0x0532 }}}, + { 0x0563, {1, {0x0533 }}}, + { 0x0564, {1, {0x0534 }}}, + { 0x0565, {1, {0x0535 }}}, + { 0x0566, {1, {0x0536 }}}, + { 0x0567, {1, {0x0537 }}}, + { 0x0568, {1, {0x0538 }}}, + { 0x0569, {1, {0x0539 }}}, + { 0x056a, {1, {0x053a }}}, + { 0x056b, {1, {0x053b }}}, + { 0x056c, {1, {0x053c }}}, + { 0x056d, {1, {0x053d }}}, + { 0x056e, {1, {0x053e }}}, + { 0x056f, {1, {0x053f }}}, + { 0x0570, {1, {0x0540 }}}, + { 0x0571, {1, {0x0541 }}}, + { 0x0572, {1, {0x0542 }}}, + { 0x0573, {1, {0x0543 }}}, + { 0x0574, {1, {0x0544 }}}, + { 0x0575, {1, {0x0545 }}}, + { 0x0576, {1, {0x0546 }}}, + { 0x0577, {1, {0x0547 }}}, + { 0x0578, {1, {0x0548 }}}, + { 0x0579, {1, {0x0549 }}}, + { 0x057a, {1, {0x054a }}}, + { 0x057b, {1, {0x054b }}}, + { 0x057c, {1, {0x054c }}}, + { 0x057d, {1, {0x054d }}}, + { 0x057e, {1, {0x054e }}}, + { 0x057f, {1, {0x054f }}}, + { 0x0580, {1, {0x0550 }}}, + { 0x0581, {1, {0x0551 }}}, + { 0x0582, {1, {0x0552 }}}, + { 0x0583, {1, {0x0553 }}}, + { 0x0584, {1, {0x0554 }}}, + { 0x0585, {1, {0x0555 }}}, + { 0x0586, {1, {0x0556 }}}, + { 0x1e01, {1, {0x1e00 }}}, + { 0x1e03, {1, {0x1e02 }}}, + { 0x1e05, {1, {0x1e04 }}}, + { 0x1e07, {1, {0x1e06 }}}, + { 0x1e09, {1, {0x1e08 }}}, + { 0x1e0b, {1, {0x1e0a }}}, + { 0x1e0d, {1, {0x1e0c }}}, + { 0x1e0f, {1, {0x1e0e }}}, + { 0x1e11, {1, {0x1e10 }}}, + { 0x1e13, {1, {0x1e12 }}}, + { 0x1e15, {1, {0x1e14 }}}, + { 0x1e17, {1, {0x1e16 }}}, + { 0x1e19, {1, {0x1e18 }}}, + { 0x1e1b, {1, {0x1e1a }}}, + { 0x1e1d, {1, {0x1e1c }}}, + { 0x1e1f, {1, {0x1e1e }}}, + { 0x1e21, {1, {0x1e20 }}}, + { 0x1e23, {1, {0x1e22 }}}, + { 0x1e25, {1, {0x1e24 }}}, + { 0x1e27, {1, {0x1e26 }}}, + { 0x1e29, {1, {0x1e28 }}}, + { 0x1e2b, {1, {0x1e2a }}}, + { 0x1e2d, {1, {0x1e2c }}}, + { 0x1e2f, {1, {0x1e2e }}}, + { 0x1e31, {1, {0x1e30 }}}, + { 0x1e33, {1, {0x1e32 }}}, + { 0x1e35, {1, {0x1e34 }}}, + { 0x1e37, {1, {0x1e36 }}}, + { 0x1e39, {1, {0x1e38 }}}, + { 0x1e3b, {1, {0x1e3a }}}, + { 0x1e3d, {1, {0x1e3c }}}, + { 0x1e3f, {1, {0x1e3e }}}, + { 0x1e41, {1, {0x1e40 }}}, + { 0x1e43, {1, {0x1e42 }}}, + { 0x1e45, {1, {0x1e44 }}}, + { 0x1e47, {1, {0x1e46 }}}, + { 0x1e49, {1, {0x1e48 }}}, + { 0x1e4b, {1, {0x1e4a }}}, + { 0x1e4d, {1, {0x1e4c }}}, + { 0x1e4f, {1, {0x1e4e }}}, + { 0x1e51, {1, {0x1e50 }}}, + { 0x1e53, {1, {0x1e52 }}}, + { 0x1e55, {1, {0x1e54 }}}, + { 0x1e57, {1, {0x1e56 }}}, + { 0x1e59, {1, {0x1e58 }}}, + { 0x1e5b, {1, {0x1e5a }}}, + { 0x1e5d, {1, {0x1e5c }}}, + { 0x1e5f, {1, {0x1e5e }}}, + { 0x1e61, {2, {0x1e9b, 0x1e60 }}}, + { 0x1e63, {1, {0x1e62 }}}, + { 0x1e65, {1, {0x1e64 }}}, + { 0x1e67, {1, {0x1e66 }}}, + { 0x1e69, {1, {0x1e68 }}}, + { 0x1e6b, {1, {0x1e6a }}}, + { 0x1e6d, {1, {0x1e6c }}}, + { 0x1e6f, {1, {0x1e6e }}}, + { 0x1e71, {1, {0x1e70 }}}, + { 0x1e73, {1, {0x1e72 }}}, + { 0x1e75, {1, {0x1e74 }}}, + { 0x1e77, {1, {0x1e76 }}}, + { 0x1e79, {1, {0x1e78 }}}, + { 0x1e7b, {1, {0x1e7a }}}, + { 0x1e7d, {1, {0x1e7c }}}, + { 0x1e7f, {1, {0x1e7e }}}, + { 0x1e81, {1, {0x1e80 }}}, + { 0x1e83, {1, {0x1e82 }}}, + { 0x1e85, {1, {0x1e84 }}}, + { 0x1e87, {1, {0x1e86 }}}, + { 0x1e89, {1, {0x1e88 }}}, + { 0x1e8b, {1, {0x1e8a }}}, + { 0x1e8d, {1, {0x1e8c }}}, + { 0x1e8f, {1, {0x1e8e }}}, + { 0x1e91, {1, {0x1e90 }}}, + { 0x1e93, {1, {0x1e92 }}}, + { 0x1e95, {1, {0x1e94 }}}, + { 0x1ea1, {1, {0x1ea0 }}}, + { 0x1ea3, {1, {0x1ea2 }}}, + { 0x1ea5, {1, {0x1ea4 }}}, + { 0x1ea7, {1, {0x1ea6 }}}, + { 0x1ea9, {1, {0x1ea8 }}}, + { 0x1eab, {1, {0x1eaa }}}, + { 0x1ead, {1, {0x1eac }}}, + { 0x1eaf, {1, {0x1eae }}}, + { 0x1eb1, {1, {0x1eb0 }}}, + { 0x1eb3, {1, {0x1eb2 }}}, + { 0x1eb5, {1, {0x1eb4 }}}, + { 0x1eb7, {1, {0x1eb6 }}}, + { 0x1eb9, {1, {0x1eb8 }}}, + { 0x1ebb, {1, {0x1eba }}}, + { 0x1ebd, {1, {0x1ebc }}}, + { 0x1ebf, {1, {0x1ebe }}}, + { 0x1ec1, {1, {0x1ec0 }}}, + { 0x1ec3, {1, {0x1ec2 }}}, + { 0x1ec5, {1, {0x1ec4 }}}, + { 0x1ec7, {1, {0x1ec6 }}}, + { 0x1ec9, {1, {0x1ec8 }}}, + { 0x1ecb, {1, {0x1eca }}}, + { 0x1ecd, {1, {0x1ecc }}}, + { 0x1ecf, {1, {0x1ece }}}, + { 0x1ed1, {1, {0x1ed0 }}}, + { 0x1ed3, {1, {0x1ed2 }}}, + { 0x1ed5, {1, {0x1ed4 }}}, + { 0x1ed7, {1, {0x1ed6 }}}, + { 0x1ed9, {1, {0x1ed8 }}}, + { 0x1edb, {1, {0x1eda }}}, + { 0x1edd, {1, {0x1edc }}}, + { 0x1edf, {1, {0x1ede }}}, + { 0x1ee1, {1, {0x1ee0 }}}, + { 0x1ee3, {1, {0x1ee2 }}}, + { 0x1ee5, {1, {0x1ee4 }}}, + { 0x1ee7, {1, {0x1ee6 }}}, + { 0x1ee9, {1, {0x1ee8 }}}, + { 0x1eeb, {1, {0x1eea }}}, + { 0x1eed, {1, {0x1eec }}}, + { 0x1eef, {1, {0x1eee }}}, + { 0x1ef1, {1, {0x1ef0 }}}, + { 0x1ef3, {1, {0x1ef2 }}}, + { 0x1ef5, {1, {0x1ef4 }}}, + { 0x1ef7, {1, {0x1ef6 }}}, + { 0x1ef9, {1, {0x1ef8 }}}, + { 0x1f00, {1, {0x1f08 }}}, + { 0x1f01, {1, {0x1f09 }}}, + { 0x1f02, {1, {0x1f0a }}}, + { 0x1f03, {1, {0x1f0b }}}, + { 0x1f04, {1, {0x1f0c }}}, + { 0x1f05, {1, {0x1f0d }}}, + { 0x1f06, {1, {0x1f0e }}}, + { 0x1f07, {1, {0x1f0f }}}, + { 0x1f10, {1, {0x1f18 }}}, + { 0x1f11, {1, {0x1f19 }}}, + { 0x1f12, {1, {0x1f1a }}}, + { 0x1f13, {1, {0x1f1b }}}, + { 0x1f14, {1, {0x1f1c }}}, + { 0x1f15, {1, {0x1f1d }}}, + { 0x1f20, {1, {0x1f28 }}}, + { 0x1f21, {1, {0x1f29 }}}, + { 0x1f22, {1, {0x1f2a }}}, + { 0x1f23, {1, {0x1f2b }}}, + { 0x1f24, {1, {0x1f2c }}}, + { 0x1f25, {1, {0x1f2d }}}, + { 0x1f26, {1, {0x1f2e }}}, + { 0x1f27, {1, {0x1f2f }}}, + { 0x1f30, {1, {0x1f38 }}}, + { 0x1f31, {1, {0x1f39 }}}, + { 0x1f32, {1, {0x1f3a }}}, + { 0x1f33, {1, {0x1f3b }}}, + { 0x1f34, {1, {0x1f3c }}}, + { 0x1f35, {1, {0x1f3d }}}, + { 0x1f36, {1, {0x1f3e }}}, + { 0x1f37, {1, {0x1f3f }}}, + { 0x1f40, {1, {0x1f48 }}}, + { 0x1f41, {1, {0x1f49 }}}, + { 0x1f42, {1, {0x1f4a }}}, + { 0x1f43, {1, {0x1f4b }}}, + { 0x1f44, {1, {0x1f4c }}}, + { 0x1f45, {1, {0x1f4d }}}, + { 0x1f51, {1, {0x1f59 }}}, + { 0x1f53, {1, {0x1f5b }}}, + { 0x1f55, {1, {0x1f5d }}}, + { 0x1f57, {1, {0x1f5f }}}, + { 0x1f60, {1, {0x1f68 }}}, + { 0x1f61, {1, {0x1f69 }}}, + { 0x1f62, {1, {0x1f6a }}}, + { 0x1f63, {1, {0x1f6b }}}, + { 0x1f64, {1, {0x1f6c }}}, + { 0x1f65, {1, {0x1f6d }}}, + { 0x1f66, {1, {0x1f6e }}}, + { 0x1f67, {1, {0x1f6f }}}, + { 0x1f70, {1, {0x1fba }}}, + { 0x1f71, {1, {0x1fbb }}}, + { 0x1f72, {1, {0x1fc8 }}}, + { 0x1f73, {1, {0x1fc9 }}}, + { 0x1f74, {1, {0x1fca }}}, + { 0x1f75, {1, {0x1fcb }}}, + { 0x1f76, {1, {0x1fda }}}, + { 0x1f77, {1, {0x1fdb }}}, + { 0x1f78, {1, {0x1ff8 }}}, + { 0x1f79, {1, {0x1ff9 }}}, + { 0x1f7a, {1, {0x1fea }}}, + { 0x1f7b, {1, {0x1feb }}}, + { 0x1f7c, {1, {0x1ffa }}}, + { 0x1f7d, {1, {0x1ffb }}}, + { 0x1fb0, {1, {0x1fb8 }}}, + { 0x1fb1, {1, {0x1fb9 }}}, + { 0x1fd0, {1, {0x1fd8 }}}, + { 0x1fd1, {1, {0x1fd9 }}}, + { 0x1fe0, {1, {0x1fe8 }}}, + { 0x1fe1, {1, {0x1fe9 }}}, + { 0x1fe5, {1, {0x1fec }}}, + { 0x2170, {1, {0x2160 }}}, + { 0x2171, {1, {0x2161 }}}, + { 0x2172, {1, {0x2162 }}}, + { 0x2173, {1, {0x2163 }}}, + { 0x2174, {1, {0x2164 }}}, + { 0x2175, {1, {0x2165 }}}, + { 0x2176, {1, {0x2166 }}}, + { 0x2177, {1, {0x2167 }}}, + { 0x2178, {1, {0x2168 }}}, + { 0x2179, {1, {0x2169 }}}, + { 0x217a, {1, {0x216a }}}, + { 0x217b, {1, {0x216b }}}, + { 0x217c, {1, {0x216c }}}, + { 0x217d, {1, {0x216d }}}, + { 0x217e, {1, {0x216e }}}, + { 0x217f, {1, {0x216f }}}, + { 0x24d0, {1, {0x24b6 }}}, + { 0x24d1, {1, {0x24b7 }}}, + { 0x24d2, {1, {0x24b8 }}}, + { 0x24d3, {1, {0x24b9 }}}, + { 0x24d4, {1, {0x24ba }}}, + { 0x24d5, {1, {0x24bb }}}, + { 0x24d6, {1, {0x24bc }}}, + { 0x24d7, {1, {0x24bd }}}, + { 0x24d8, {1, {0x24be }}}, + { 0x24d9, {1, {0x24bf }}}, + { 0x24da, {1, {0x24c0 }}}, + { 0x24db, {1, {0x24c1 }}}, + { 0x24dc, {1, {0x24c2 }}}, + { 0x24dd, {1, {0x24c3 }}}, + { 0x24de, {1, {0x24c4 }}}, + { 0x24df, {1, {0x24c5 }}}, + { 0x24e0, {1, {0x24c6 }}}, + { 0x24e1, {1, {0x24c7 }}}, + { 0x24e2, {1, {0x24c8 }}}, + { 0x24e3, {1, {0x24c9 }}}, + { 0x24e4, {1, {0x24ca }}}, + { 0x24e5, {1, {0x24cb }}}, + { 0x24e6, {1, {0x24cc }}}, + { 0x24e7, {1, {0x24cd }}}, + { 0x24e8, {1, {0x24ce }}}, + { 0x24e9, {1, {0x24cf }}}, + { 0x2c30, {1, {0x2c00 }}}, + { 0x2c31, {1, {0x2c01 }}}, + { 0x2c32, {1, {0x2c02 }}}, + { 0x2c33, {1, {0x2c03 }}}, + { 0x2c34, {1, {0x2c04 }}}, + { 0x2c35, {1, {0x2c05 }}}, + { 0x2c36, {1, {0x2c06 }}}, + { 0x2c37, {1, {0x2c07 }}}, + { 0x2c38, {1, {0x2c08 }}}, + { 0x2c39, {1, {0x2c09 }}}, + { 0x2c3a, {1, {0x2c0a }}}, + { 0x2c3b, {1, {0x2c0b }}}, + { 0x2c3c, {1, {0x2c0c }}}, + { 0x2c3d, {1, {0x2c0d }}}, + { 0x2c3e, {1, {0x2c0e }}}, + { 0x2c3f, {1, {0x2c0f }}}, + { 0x2c40, {1, {0x2c10 }}}, + { 0x2c41, {1, {0x2c11 }}}, + { 0x2c42, {1, {0x2c12 }}}, + { 0x2c43, {1, {0x2c13 }}}, + { 0x2c44, {1, {0x2c14 }}}, + { 0x2c45, {1, {0x2c15 }}}, + { 0x2c46, {1, {0x2c16 }}}, + { 0x2c47, {1, {0x2c17 }}}, + { 0x2c48, {1, {0x2c18 }}}, + { 0x2c49, {1, {0x2c19 }}}, + { 0x2c4a, {1, {0x2c1a }}}, + { 0x2c4b, {1, {0x2c1b }}}, + { 0x2c4c, {1, {0x2c1c }}}, + { 0x2c4d, {1, {0x2c1d }}}, + { 0x2c4e, {1, {0x2c1e }}}, + { 0x2c4f, {1, {0x2c1f }}}, + { 0x2c50, {1, {0x2c20 }}}, + { 0x2c51, {1, {0x2c21 }}}, + { 0x2c52, {1, {0x2c22 }}}, + { 0x2c53, {1, {0x2c23 }}}, + { 0x2c54, {1, {0x2c24 }}}, + { 0x2c55, {1, {0x2c25 }}}, + { 0x2c56, {1, {0x2c26 }}}, + { 0x2c57, {1, {0x2c27 }}}, + { 0x2c58, {1, {0x2c28 }}}, + { 0x2c59, {1, {0x2c29 }}}, + { 0x2c5a, {1, {0x2c2a }}}, + { 0x2c5b, {1, {0x2c2b }}}, + { 0x2c5c, {1, {0x2c2c }}}, + { 0x2c5d, {1, {0x2c2d }}}, + { 0x2c5e, {1, {0x2c2e }}}, + { 0x2c81, {1, {0x2c80 }}}, + { 0x2c83, {1, {0x2c82 }}}, + { 0x2c85, {1, {0x2c84 }}}, + { 0x2c87, {1, {0x2c86 }}}, + { 0x2c89, {1, {0x2c88 }}}, + { 0x2c8b, {1, {0x2c8a }}}, + { 0x2c8d, {1, {0x2c8c }}}, + { 0x2c8f, {1, {0x2c8e }}}, + { 0x2c91, {1, {0x2c90 }}}, + { 0x2c93, {1, {0x2c92 }}}, + { 0x2c95, {1, {0x2c94 }}}, + { 0x2c97, {1, {0x2c96 }}}, + { 0x2c99, {1, {0x2c98 }}}, + { 0x2c9b, {1, {0x2c9a }}}, + { 0x2c9d, {1, {0x2c9c }}}, + { 0x2c9f, {1, {0x2c9e }}}, + { 0x2ca1, {1, {0x2ca0 }}}, + { 0x2ca3, {1, {0x2ca2 }}}, + { 0x2ca5, {1, {0x2ca4 }}}, + { 0x2ca7, {1, {0x2ca6 }}}, + { 0x2ca9, {1, {0x2ca8 }}}, + { 0x2cab, {1, {0x2caa }}}, + { 0x2cad, {1, {0x2cac }}}, + { 0x2caf, {1, {0x2cae }}}, + { 0x2cb1, {1, {0x2cb0 }}}, + { 0x2cb3, {1, {0x2cb2 }}}, + { 0x2cb5, {1, {0x2cb4 }}}, + { 0x2cb7, {1, {0x2cb6 }}}, + { 0x2cb9, {1, {0x2cb8 }}}, + { 0x2cbb, {1, {0x2cba }}}, + { 0x2cbd, {1, {0x2cbc }}}, + { 0x2cbf, {1, {0x2cbe }}}, + { 0x2cc1, {1, {0x2cc0 }}}, + { 0x2cc3, {1, {0x2cc2 }}}, + { 0x2cc5, {1, {0x2cc4 }}}, + { 0x2cc7, {1, {0x2cc6 }}}, + { 0x2cc9, {1, {0x2cc8 }}}, + { 0x2ccb, {1, {0x2cca }}}, + { 0x2ccd, {1, {0x2ccc }}}, + { 0x2ccf, {1, {0x2cce }}}, + { 0x2cd1, {1, {0x2cd0 }}}, + { 0x2cd3, {1, {0x2cd2 }}}, + { 0x2cd5, {1, {0x2cd4 }}}, + { 0x2cd7, {1, {0x2cd6 }}}, + { 0x2cd9, {1, {0x2cd8 }}}, + { 0x2cdb, {1, {0x2cda }}}, + { 0x2cdd, {1, {0x2cdc }}}, + { 0x2cdf, {1, {0x2cde }}}, + { 0x2ce1, {1, {0x2ce0 }}}, + { 0x2ce3, {1, {0x2ce2 }}}, + { 0x2d00, {1, {0x10a0 }}}, + { 0x2d01, {1, {0x10a1 }}}, + { 0x2d02, {1, {0x10a2 }}}, + { 0x2d03, {1, {0x10a3 }}}, + { 0x2d04, {1, {0x10a4 }}}, + { 0x2d05, {1, {0x10a5 }}}, + { 0x2d06, {1, {0x10a6 }}}, + { 0x2d07, {1, {0x10a7 }}}, + { 0x2d08, {1, {0x10a8 }}}, + { 0x2d09, {1, {0x10a9 }}}, + { 0x2d0a, {1, {0x10aa }}}, + { 0x2d0b, {1, {0x10ab }}}, + { 0x2d0c, {1, {0x10ac }}}, + { 0x2d0d, {1, {0x10ad }}}, + { 0x2d0e, {1, {0x10ae }}}, + { 0x2d0f, {1, {0x10af }}}, + { 0x2d10, {1, {0x10b0 }}}, + { 0x2d11, {1, {0x10b1 }}}, + { 0x2d12, {1, {0x10b2 }}}, + { 0x2d13, {1, {0x10b3 }}}, + { 0x2d14, {1, {0x10b4 }}}, + { 0x2d15, {1, {0x10b5 }}}, + { 0x2d16, {1, {0x10b6 }}}, + { 0x2d17, {1, {0x10b7 }}}, + { 0x2d18, {1, {0x10b8 }}}, + { 0x2d19, {1, {0x10b9 }}}, + { 0x2d1a, {1, {0x10ba }}}, + { 0x2d1b, {1, {0x10bb }}}, + { 0x2d1c, {1, {0x10bc }}}, + { 0x2d1d, {1, {0x10bd }}}, + { 0x2d1e, {1, {0x10be }}}, + { 0x2d1f, {1, {0x10bf }}}, + { 0x2d20, {1, {0x10c0 }}}, + { 0x2d21, {1, {0x10c1 }}}, + { 0x2d22, {1, {0x10c2 }}}, + { 0x2d23, {1, {0x10c3 }}}, + { 0x2d24, {1, {0x10c4 }}}, + { 0x2d25, {1, {0x10c5 }}}, + { 0xff41, {1, {0xff21 }}}, + { 0xff42, {1, {0xff22 }}}, + { 0xff43, {1, {0xff23 }}}, + { 0xff44, {1, {0xff24 }}}, + { 0xff45, {1, {0xff25 }}}, + { 0xff46, {1, {0xff26 }}}, + { 0xff47, {1, {0xff27 }}}, + { 0xff48, {1, {0xff28 }}}, + { 0xff49, {1, {0xff29 }}}, + { 0xff4a, {1, {0xff2a }}}, + { 0xff4b, {1, {0xff2b }}}, + { 0xff4c, {1, {0xff2c }}}, + { 0xff4d, {1, {0xff2d }}}, + { 0xff4e, {1, {0xff2e }}}, + { 0xff4f, {1, {0xff2f }}}, + { 0xff50, {1, {0xff30 }}}, + { 0xff51, {1, {0xff31 }}}, + { 0xff52, {1, {0xff32 }}}, + { 0xff53, {1, {0xff33 }}}, + { 0xff54, {1, {0xff34 }}}, + { 0xff55, {1, {0xff35 }}}, + { 0xff56, {1, {0xff36 }}}, + { 0xff57, {1, {0xff37 }}}, + { 0xff58, {1, {0xff38 }}}, + { 0xff59, {1, {0xff39 }}}, + { 0xff5a, {1, {0xff3a }}}, + { 0x10428, {1, {0x10400 }}}, + { 0x10429, {1, {0x10401 }}}, + { 0x1042a, {1, {0x10402 }}}, + { 0x1042b, {1, {0x10403 }}}, + { 0x1042c, {1, {0x10404 }}}, + { 0x1042d, {1, {0x10405 }}}, + { 0x1042e, {1, {0x10406 }}}, + { 0x1042f, {1, {0x10407 }}}, + { 0x10430, {1, {0x10408 }}}, + { 0x10431, {1, {0x10409 }}}, + { 0x10432, {1, {0x1040a }}}, + { 0x10433, {1, {0x1040b }}}, + { 0x10434, {1, {0x1040c }}}, + { 0x10435, {1, {0x1040d }}}, + { 0x10436, {1, {0x1040e }}}, + { 0x10437, {1, {0x1040f }}}, + { 0x10438, {1, {0x10410 }}}, + { 0x10439, {1, {0x10411 }}}, + { 0x1043a, {1, {0x10412 }}}, + { 0x1043b, {1, {0x10413 }}}, + { 0x1043c, {1, {0x10414 }}}, + { 0x1043d, {1, {0x10415 }}}, + { 0x1043e, {1, {0x10416 }}}, + { 0x1043f, {1, {0x10417 }}}, + { 0x10440, {1, {0x10418 }}}, + { 0x10441, {1, {0x10419 }}}, + { 0x10442, {1, {0x1041a }}}, + { 0x10443, {1, {0x1041b }}}, + { 0x10444, {1, {0x1041c }}}, + { 0x10445, {1, {0x1041d }}}, + { 0x10446, {1, {0x1041e }}}, + { 0x10447, {1, {0x1041f }}}, + { 0x10448, {1, {0x10420 }}}, + { 0x10449, {1, {0x10421 }}}, + { 0x1044a, {1, {0x10422 }}}, + { 0x1044b, {1, {0x10423 }}}, + { 0x1044c, {1, {0x10424 }}}, + { 0x1044d, {1, {0x10425 }}}, + { 0x1044e, {1, {0x10426 }}}, + { 0x1044f, {1, {0x10427 }}} +}; + +static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = { + { 0x0069, {1, {0x0049 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12[] = { + { {0x0061, 0x02be}, {1, {0x1e9a }}}, + { {0x0066, 0x0066}, {1, {0xfb00 }}}, + { {0x0066, 0x0069}, {1, {0xfb01 }}}, + { {0x0066, 0x006c}, {1, {0xfb02 }}}, + { {0x0068, 0x0331}, {1, {0x1e96 }}}, + { {0x006a, 0x030c}, {1, {0x01f0 }}}, + { {0x0073, 0x0073}, {1, {0x00df }}}, + { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}}, + { {0x0074, 0x0308}, {1, {0x1e97 }}}, + { {0x0077, 0x030a}, {1, {0x1e98 }}}, + { {0x0079, 0x030a}, {1, {0x1e99 }}}, + { {0x02bc, 0x006e}, {1, {0x0149 }}}, + { {0x03ac, 0x03b9}, {1, {0x1fb4 }}}, + { {0x03ae, 0x03b9}, {1, {0x1fc4 }}}, + { {0x03b1, 0x0342}, {1, {0x1fb6 }}}, + { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}}, + { {0x03b7, 0x0342}, {1, {0x1fc6 }}}, + { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}}, + { {0x03b9, 0x0342}, {1, {0x1fd6 }}}, + { {0x03c1, 0x0313}, {1, {0x1fe4 }}}, + { {0x03c5, 0x0313}, {1, {0x1f50 }}}, + { {0x03c5, 0x0342}, {1, {0x1fe6 }}}, + { {0x03c9, 0x0342}, {1, {0x1ff6 }}}, + { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}}, + { {0x03ce, 0x03b9}, {1, {0x1ff4 }}}, + { {0x0565, 0x0582}, {1, {0x0587 }}}, + { {0x0574, 0x0565}, {1, {0xfb14 }}}, + { {0x0574, 0x056b}, {1, {0xfb15 }}}, + { {0x0574, 0x056d}, {1, {0xfb17 }}}, + { {0x0574, 0x0576}, {1, {0xfb13 }}}, + { {0x057e, 0x0576}, {1, {0xfb16 }}}, + { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}}, + { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}}, + { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}}, + { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}}, + { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}}, + { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}}, + { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}}, + { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}}, + { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}}, + { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}}, + { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}}, + { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}}, + { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}}, + { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}}, + { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}}, + { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}}, + { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}}, + { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}}, + { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}}, + { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}}, + { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}}, + { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}}, + { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}}, + { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}}, + { {0x1f70, 0x03b9}, {1, {0x1fb2 }}}, + { {0x1f74, 0x03b9}, {1, {0x1fc2 }}}, + { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}} +}; + +static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = { + { {0x0069, 0x0307}, {1, {0x0130 }}} +}; + +static const CaseUnfold_13_Type CaseUnfold_13[] = { + { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}}, + { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}}, + { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}}, + { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}}, + { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}}, + { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}}, + { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}}, + { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}}, + { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}}, + { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}}, + { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}}, + { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}}, + { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}}, + { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}} +}; + + +static PosixBracketEntryType HashEntryData[] = { + { (UChar* )"NEWLINE", 0, 7 }, + { (UChar* )"Alpha", 1, 5 }, + { (UChar* )"Blank", 2, 5 }, + { (UChar* )"Cntrl", 3, 5 }, + { (UChar* )"Digit", 4, 5 }, + { (UChar* )"Graph", 5, 5 }, + { (UChar* )"Lower", 6, 5 }, + { (UChar* )"Print", 7, 5 }, + { (UChar* )"Punct", 8, 5 }, + { (UChar* )"Space", 9, 5 }, + { (UChar* )"Upper", 10, 5 }, + { (UChar* )"XDigit", 11, 6 }, + { (UChar* )"Word", 12, 4 }, + { (UChar* )"Alnum", 13, 5 }, + { (UChar* )"ASCII", 14, 5 }, + +#ifdef USE_UNICODE_PROPERTIES + { (UChar* )"Any", 15, 3 }, + { (UChar* )"Assigned", 16, 8 }, + { (UChar* )"C", 17, 1 }, + { (UChar* )"Cc", 18, 2 }, + { (UChar* )"Cf", 19, 2 }, + { (UChar* )"Cn", 20, 2 }, + { (UChar* )"Co", 21, 2 }, + { (UChar* )"Cs", 22, 2 }, + { (UChar* )"L", 23, 1 }, + { (UChar* )"Ll", 24, 2 }, + { (UChar* )"Lm", 25, 2 }, + { (UChar* )"Lo", 26, 2 }, + { (UChar* )"Lt", 27, 2 }, + { (UChar* )"Lu", 28, 2 }, + { (UChar* )"M", 29, 1 }, + { (UChar* )"Mc", 30, 2 }, + { (UChar* )"Me", 31, 2 }, + { (UChar* )"Mn", 32, 2 }, + { (UChar* )"N", 33, 1 }, + { (UChar* )"Nd", 34, 2 }, + { (UChar* )"Nl", 35, 2 }, + { (UChar* )"No", 36, 2 }, + { (UChar* )"P", 37, 1 }, + { (UChar* )"Pc", 38, 2 }, + { (UChar* )"Pd", 39, 2 }, + { (UChar* )"Pe", 40, 2 }, + { (UChar* )"Pf", 41, 2 }, + { (UChar* )"Pi", 42, 2 }, + { (UChar* )"Po", 43, 2 }, + { (UChar* )"Ps", 44, 2 }, + { (UChar* )"S", 45, 1 }, + { (UChar* )"Sc", 46, 2 }, + { (UChar* )"Sk", 47, 2 }, + { (UChar* )"Sm", 48, 2 }, + { (UChar* )"So", 49, 2 }, + { (UChar* )"Z", 50, 1 }, + { (UChar* )"Zl", 51, 2 }, + { (UChar* )"Zp", 52, 2 }, + { (UChar* )"Zs", 53, 2 }, + { (UChar* )"Arabic", 54, 6 }, + { (UChar* )"Armenian", 55, 8 }, + { (UChar* )"Bengali", 56, 7 }, + { (UChar* )"Bopomofo", 57, 8 }, + { (UChar* )"Braille", 58, 7 }, + { (UChar* )"Buginese", 59, 8 }, + { (UChar* )"Buhid", 60, 5 }, + { (UChar* )"Canadian_Aboriginal", 61, 19 }, + { (UChar* )"Cherokee", 62, 8 }, + { (UChar* )"Common", 63, 6 }, + { (UChar* )"Coptic", 64, 6 }, + { (UChar* )"Cypriot", 65, 7 }, + { (UChar* )"Cyrillic", 66, 8 }, + { (UChar* )"Deseret", 67, 7 }, + { (UChar* )"Devanagari", 68, 10 }, + { (UChar* )"Ethiopic", 69, 8 }, + { (UChar* )"Georgian", 70, 8 }, + { (UChar* )"Glagolitic", 71, 10 }, + { (UChar* )"Gothic", 72, 6 }, + { (UChar* )"Greek", 73, 5 }, + { (UChar* )"Gujarati", 74, 8 }, + { (UChar* )"Gurmukhi", 75, 8 }, + { (UChar* )"Han", 76, 3 }, + { (UChar* )"Hangul", 77, 6 }, + { (UChar* )"Hanunoo", 78, 7 }, + { (UChar* )"Hebrew", 79, 6 }, + { (UChar* )"Hiragana", 80, 8 }, + { (UChar* )"Inherited", 81, 9 }, + { (UChar* )"Kannada", 82, 7 }, + { (UChar* )"Katakana", 83, 8 }, + { (UChar* )"Kharoshthi", 84, 10 }, + { (UChar* )"Khmer", 85, 5 }, + { (UChar* )"Lao", 86, 3 }, + { (UChar* )"Latin", 87, 5 }, + { (UChar* )"Limbu", 88, 5 }, + { (UChar* )"Linear_B", 89, 8 }, + { (UChar* )"Malayalam", 90, 9 }, + { (UChar* )"Mongolian", 91, 9 }, + { (UChar* )"Myanmar", 92, 7 }, + { (UChar* )"New_Tai_Lue", 93, 11 }, + { (UChar* )"Ogham", 94, 5 }, + { (UChar* )"Old_Italic", 95, 10 }, + { (UChar* )"Old_Persian", 96, 11 }, + { (UChar* )"Oriya", 97, 5 }, + { (UChar* )"Osmanya", 98, 7 }, + { (UChar* )"Runic", 99, 5 }, + { (UChar* )"Shavian", 100, 7 }, + { (UChar* )"Sinhala", 101, 7 }, + { (UChar* )"Syloti_Nagri", 102, 12 }, + { (UChar* )"Syriac", 103, 6 }, + { (UChar* )"Tagalog", 104, 7 }, + { (UChar* )"Tagbanwa", 105, 8 }, + { (UChar* )"Tai_Le", 106, 6 }, + { (UChar* )"Tamil", 107, 5 }, + { (UChar* )"Telugu", 108, 6 }, + { (UChar* )"Thaana", 109, 6 }, + { (UChar* )"Thai", 110, 4 }, + { (UChar* )"Tibetan", 111, 7 }, + { (UChar* )"Tifinagh", 112, 8 }, + { (UChar* )"Ugaritic", 113, 8 }, + { (UChar* )"Yi", 114, 2 }, +#endif /* USE_UNICODE_PROPERTIES */ + { (UChar* )NULL, -1, 0 } +}; + +#ifdef USE_UNICODE_PROPERTIES +#define CODE_RANGES_NUM 115 +#else +#define CODE_RANGES_NUM 15 +#endif + +static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM]; +static int CodeRangeTableInited = 0; + +static void init_code_range_array(void) { + THREAD_ATOMIC_START; + + CodeRanges[0] = CR_NEWLINE; + CodeRanges[1] = CR_Alpha; + CodeRanges[2] = CR_Blank; + CodeRanges[3] = CR_Cntrl; + CodeRanges[4] = CR_Digit; + CodeRanges[5] = CR_Graph; + CodeRanges[6] = CR_Lower; + CodeRanges[7] = CR_Print; + CodeRanges[8] = CR_Punct; + CodeRanges[9] = CR_Space; + CodeRanges[10] = CR_Upper; + CodeRanges[11] = CR_XDigit; + CodeRanges[12] = CR_Word; + CodeRanges[13] = CR_Alnum; + CodeRanges[14] = CR_ASCII; + +#ifdef USE_UNICODE_PROPERTIES + CodeRanges[15] = CR_Any; + CodeRanges[16] = CR_Assigned; + CodeRanges[17] = CR_C; + CodeRanges[18] = CR_Cc; + CodeRanges[19] = CR_Cf; + CodeRanges[20] = CR_Cn; + CodeRanges[21] = CR_Co; + CodeRanges[22] = CR_Cs; + CodeRanges[23] = CR_L; + CodeRanges[24] = CR_Ll; + CodeRanges[25] = CR_Lm; + CodeRanges[26] = CR_Lo; + CodeRanges[27] = CR_Lt; + CodeRanges[28] = CR_Lu; + CodeRanges[29] = CR_M; + CodeRanges[30] = CR_Mc; + CodeRanges[31] = CR_Me; + CodeRanges[32] = CR_Mn; + CodeRanges[33] = CR_N; + CodeRanges[34] = CR_Nd; + CodeRanges[35] = CR_Nl; + CodeRanges[36] = CR_No; + CodeRanges[37] = CR_P; + CodeRanges[38] = CR_Pc; + CodeRanges[39] = CR_Pd; + CodeRanges[40] = CR_Pe; + CodeRanges[41] = CR_Pf; + CodeRanges[42] = CR_Pi; + CodeRanges[43] = CR_Po; + CodeRanges[44] = CR_Ps; + CodeRanges[45] = CR_S; + CodeRanges[46] = CR_Sc; + CodeRanges[47] = CR_Sk; + CodeRanges[48] = CR_Sm; + CodeRanges[49] = CR_So; + CodeRanges[50] = CR_Z; + CodeRanges[51] = CR_Zl; + CodeRanges[52] = CR_Zp; + CodeRanges[53] = CR_Zs; + CodeRanges[54] = CR_Arabic; + CodeRanges[55] = CR_Armenian; + CodeRanges[56] = CR_Bengali; + CodeRanges[57] = CR_Bopomofo; + CodeRanges[58] = CR_Braille; + CodeRanges[59] = CR_Buginese; + CodeRanges[60] = CR_Buhid; + CodeRanges[61] = CR_Canadian_Aboriginal; + CodeRanges[62] = CR_Cherokee; + CodeRanges[63] = CR_Common; + CodeRanges[64] = CR_Coptic; + CodeRanges[65] = CR_Cypriot; + CodeRanges[66] = CR_Cyrillic; + CodeRanges[67] = CR_Deseret; + CodeRanges[68] = CR_Devanagari; + CodeRanges[69] = CR_Ethiopic; + CodeRanges[70] = CR_Georgian; + CodeRanges[71] = CR_Glagolitic; + CodeRanges[72] = CR_Gothic; + CodeRanges[73] = CR_Greek; + CodeRanges[74] = CR_Gujarati; + CodeRanges[75] = CR_Gurmukhi; + CodeRanges[76] = CR_Han; + CodeRanges[77] = CR_Hangul; + CodeRanges[78] = CR_Hanunoo; + CodeRanges[79] = CR_Hebrew; + CodeRanges[80] = CR_Hiragana; + CodeRanges[81] = CR_Inherited; + CodeRanges[82] = CR_Kannada; + CodeRanges[83] = CR_Katakana; + CodeRanges[84] = CR_Kharoshthi; + CodeRanges[85] = CR_Khmer; + CodeRanges[86] = CR_Lao; + CodeRanges[87] = CR_Latin; + CodeRanges[88] = CR_Limbu; + CodeRanges[89] = CR_Linear_B; + CodeRanges[90] = CR_Malayalam; + CodeRanges[91] = CR_Mongolian; + CodeRanges[92] = CR_Myanmar; + CodeRanges[93] = CR_New_Tai_Lue; + CodeRanges[94] = CR_Ogham; + CodeRanges[95] = CR_Old_Italic; + CodeRanges[96] = CR_Old_Persian; + CodeRanges[97] = CR_Oriya; + CodeRanges[98] = CR_Osmanya; + CodeRanges[99] = CR_Runic; + CodeRanges[100] = CR_Shavian; + CodeRanges[101] = CR_Sinhala; + CodeRanges[102] = CR_Syloti_Nagri; + CodeRanges[103] = CR_Syriac; + CodeRanges[104] = CR_Tagalog; + CodeRanges[105] = CR_Tagbanwa; + CodeRanges[106] = CR_Tai_Le; + CodeRanges[107] = CR_Tamil; + CodeRanges[108] = CR_Telugu; + CodeRanges[109] = CR_Thaana; + CodeRanges[110] = CR_Thai; + CodeRanges[111] = CR_Tibetan; + CodeRanges[112] = CR_Tifinagh; + CodeRanges[113] = CR_Ugaritic; + CodeRanges[114] = CR_Yi; +#endif /* USE_UNICODE_PROPERTIES */ + + CodeRangeTableInited = 1; + THREAD_ATOMIC_END; +} + +extern int +onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) +{ + if ( +#ifdef USE_UNICODE_PROPERTIES + ctype <= ONIGENC_MAX_STD_CTYPE && +#endif + code < 256) { + return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype); + } + + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + if (CodeRangeTableInited == 0) init_code_range_array(); + + return onig_is_in_code_range((UChar* )CodeRanges[ctype], code); +} + + +extern int +onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) +{ + if (ctype >= CODE_RANGES_NUM) { + return ONIGERR_TYPE_BUG; + } + + if (CodeRangeTableInited == 0) init_code_range_array(); + + *ranges = CodeRanges[ctype]; + + return 0; +} + +extern int +onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, + const OnigCodePoint* ranges[]) +{ + *sb_out = 0x00; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + +#include "st.h" + +#define PROPERTY_NAME_MAX_SIZE 20 + +static st_table* NameCtypeTable; +static int NameTableInited = 0; + +static int init_name_ctype_table(void) +{ + PosixBracketEntryType *pb; + + THREAD_ATOMIC_START; + + NameCtypeTable = onig_st_init_strend_table_with_size(100); + if (ONIG_IS_NULL(NameCtypeTable)) return ONIGERR_MEMORY; + + for (pb = HashEntryData; ONIG_IS_NOT_NULL(pb->name); pb++) { + onig_st_insert_strend(NameCtypeTable, pb->name, pb->name + pb->len, + (st_data_t )pb->ctype); + } + + NameTableInited = 1; + THREAD_ATOMIC_END; + return 0; +} + +extern int +onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end) +{ + int len; + hash_data_type ctype; + UChar buf[PROPERTY_NAME_MAX_SIZE]; + UChar *p; + OnigCodePoint code; + + p = name; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + buf[len++] = (UChar )code; + if (len >= PROPERTY_NAME_MAX_SIZE) + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + + p += enclen(enc, p); + } + + buf[len] = 0; + + if (NameTableInited == 0) init_name_ctype_table(); + + if (onig_st_lookup_strend(NameCtypeTable, buf, buf + len, &ctype) == 0) { + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; + } + + return (int )ctype; +} + + +static int +code2_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1]) return 0; + return 1; +} + +static int +code2_hash(OnigCodePoint* x) +{ + return (int )(x[0] + x[1]); +} + +static struct st_hash_type type_code2_hash = { + code2_cmp, + code2_hash, +}; + +static int +code3_cmp(OnigCodePoint* x, OnigCodePoint* y) +{ + if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0; + return 1; +} + +static int +code3_hash(OnigCodePoint* x) +{ + return (int )(x[0] + x[1] + x[2]); +} + +static struct st_hash_type type_code3_hash = { + code3_cmp, + code3_hash, +}; + + +static st_table* FoldTable; /* fold-1, fold-2, fold-3 */ +static st_table* Unfold1Table; +static st_table* Unfold2Table; +static st_table* Unfold3Table; +static int CaseFoldInited = 0; + +static int init_case_fold_table(void) +{ + const CaseFold_11_Type *p; + const CaseUnfold_11_Type *p1; + const CaseUnfold_12_Type *p2; + const CaseUnfold_13_Type *p3; + int i; + + THREAD_ATOMIC_START; + + FoldTable = st_init_numtable_with_size(1200); + if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY; + for (i = 0; i < (int )(sizeof(CaseFold)/sizeof(CaseFold_11_Type)); i++) { + p = &CaseFold[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + for (i = 0; i < (int )(sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type)); + i++) { + p = &CaseFold_Locale[i]; + st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); + } + + Unfold1Table = st_init_numtable_with_size(1000); + if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { + p1 = &CaseUnfold_11[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); + i++) { + p1 = &CaseUnfold_11_Locale[i]; + st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); + } + + Unfold2Table = st_init_table_with_size(&type_code2_hash, 200); + if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { + p2 = &CaseUnfold_12[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); + i++) { + p2 = &CaseUnfold_12_Locale[i]; + st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); + } + + Unfold3Table = st_init_table_with_size(&type_code3_hash, 30); + if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY; + + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { + p3 = &CaseUnfold_13[i]; + st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to)); + } + + CaseFoldInited = 1; + THREAD_ATOMIC_END; + return 0; +} + +extern int +onigenc_unicode_mbc_case_fold(OnigEncoding enc, + OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end, + UChar* fold) +{ + CodePointList3 *to; + OnigCodePoint code; + int i, len, rlen; + const UChar *p = *pp; + + if (CaseFoldInited == 0) init_case_fold_table(); + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p); + *pp += len; + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold); + } + else if (code == 0x0130) { + return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold); + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold); + } +#if 0 + /* NO NEEDS TO CHECK */ + else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { +#else + else { +#endif + rlen = 0; + for (i = 0; i < to->n; i++) { + len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold); + fold += len; + rlen += len; + } + return rlen; + } + } + + for (i = 0; i < len; i++) { + *fold++ = *p++; + } + return len; +} + +extern int +onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + const CaseUnfold_11_Type* p11; + OnigCodePoint code; + int i, j, k, r; + + /* if (CaseFoldInited == 0) init_case_fold_table(); */ + + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { + p11 = &CaseUnfold_11[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + code = 0x0131; + r = (*f)(0x0049, &code, 1, arg); + if (r != 0) return r; + code = 0x0049; + r = (*f)(0x0131, &code, 1, arg); + if (r != 0) return r; + + code = 0x0130; + r = (*f)(0x0069, &code, 1, arg); + if (r != 0) return r; + code = 0x0069; + r = (*f)(0x0130, &code, 1, arg); + if (r != 0) return r; + } + else { +#endif + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); + i++) { + p11 = &CaseUnfold_11_Locale[i]; + for (j = 0; j < p11->to.n; j++) { + code = p11->from; + r = (*f)(p11->to.code[j], &code, 1, arg); + if (r != 0) return r; + + code = p11->to.code[j]; + r = (*f)(p11->from, &code, 1, arg); + if (r != 0) return r; + + for (k = 0; k < j; k++) { + r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), + 1, arg); + if (r != 0) return r; + + r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { + for (j = 0; j < CaseUnfold_12[i].to.n; j++) { + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) { +#endif + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); + i++) { + for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) { + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_12_Locale[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]), + 1, arg); + if (r != 0) return r; + } + } + } +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { + for (j = 0; j < CaseUnfold_13[i].to.n; j++) { + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg); + if (r != 0) return r; + + for (k = 0; k < CaseUnfold_13[i].to.n; k++) { + if (k == j) continue; + + r = (*f)(CaseUnfold_13[i].to.code[j], + (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg); + if (r != 0) return r; + } + } + } + } + + return 0; +} + +extern int +onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, + OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[]) +{ + int n, i, j, k, len; + OnigCodePoint code, codes[3]; + CodePointList3 *to, *z3; + CodePointList2 *z2; + + if (CaseFoldInited == 0) init_case_fold_table(); + + n = 0; + + code = ONIGENC_MBC_TO_CODE(enc, p, end); + len = enclen(enc, p); + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (code == 0x0049) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0131; + return 1; + } + else if (code == 0x0130) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0069; + return 1; + } + else if (code == 0x0131) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0049; + return 1; + } + else if (code == 0x0069) { + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = 0x0130; + return 1; + } + } +#endif + + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) { + if (to->n == 1) { + OnigCodePoint orig_code = code; + + items[0].byte_len = len; + items[0].code_len = 1; + items[0].code[0] = to->code[0]; + n++; + + code = to->code[0]; + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + if (to->code[i] != orig_code) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + } + else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + OnigCodePoint cs[3][4]; + int fn, ncs[3]; + + for (fn = 0; fn < to->n; fn++) { + cs[fn][0] = to->code[fn]; + if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0], + (void* )&z3) != 0) { + for (i = 0; i < z3->n; i++) { + cs[fn][i+1] = z3->code[i]; + } + ncs[fn] = z3->n + 1; + } + else + ncs[fn] = 1; + } + + if (fn == 2) { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + items[n].byte_len = len; + items[n].code_len = 2; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + n++; + } + } + + if (onig_st_lookup(Unfold2Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + else { + for (i = 0; i < ncs[0]; i++) { + for (j = 0; j < ncs[1]; j++) { + for (k = 0; k < ncs[2]; k++) { + items[n].byte_len = len; + items[n].code_len = 3; + items[n].code[0] = cs[0][i]; + items[n].code[1] = cs[1][j]; + items[n].code[2] = cs[2][k]; + n++; + } + } + } + + if (onig_st_lookup(Unfold3Table, (st_data_t )to->code, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + if (z2->code[i] == code) continue; + + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + + /* multi char folded code is not head of another folded multi char */ + flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */ + } + } + else { + if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) { + for (i = 0; i < to->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = to->code[i]; + n++; + } + } + } + + + if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + p += len; + if (p < end) { + int clen; + + codes[0] = code; + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[1] = to->code[0]; + } + else + codes[1] = code; + + clen = enclen(enc, p); + len += clen; + if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + + p += clen; + if (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0 + && to->n == 1) { + codes[2] = to->code[0]; + } + else + codes[2] = code; + + clen = enclen(enc, p); + len += clen; + if (onig_st_lookup(Unfold3Table, (st_data_t )codes, + (void* )&z2) != 0) { + for (i = 0; i < z2->n; i++) { + items[n].byte_len = len; + items[n].code_len = 1; + items[n].code[0] = z2->code[i]; + n++; + } + } + } + } + } + + return n; +} diff --git a/oniguruma/enc/utf16_be.c b/oniguruma/enc/utf16_be.c new file mode 100644 index 0000000..1e909eb --- /dev/null +++ b/oniguruma/enc/utf16_be.c @@ -0,0 +1,225 @@ +/********************************************************************** + utf16_be.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_UTF16[] = { + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static int +utf16be_mbc_enc_len(const UChar* p) +{ + return EncLen_UTF16[*p]; +} + +static int +utf16be_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 1 < end) { + if (*(p+1) == 0x0a && *p == 0x00) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *(p+1) == 0x0d || +#endif + *(p+1) == 0x85) && *p == 0x00) + return 1; + if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + OnigCodePoint code; + + if (UTF16_IS_SURROGATE_FIRST(*p)) { + code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16) + + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8) + + p[3]; + } + else { + code = p[0] * 256 + p[1]; + } + return code; +} + +static int +utf16be_code_to_mbclen(OnigCodePoint code) +{ + return (code > 0xffff ? 4 : 2); +} + +static int +utf16be_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + if (code > 0xffff) { + unsigned int plane, high; + + plane = (code >> 16) - 1; + *p++ = (plane >> 2) + 0xd8; + high = (code & 0xff00) >> 8; + *p++ = ((plane & 0x03) << 6) + (high >> 2); + *p++ = (high & 0x03) + 0xdc; + *p = (UChar )(code & 0xff); + return 4; + } + else { + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar )(code & 0xff); + return 2; + } +} + +static int +utf16be_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) { + p++; +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x01; + *fold = 0x31; + (*pp) += 2; + return 2; + } + } +#endif + + *fold++ = 0; + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *pp += 2; + return 2; + } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE, flag, + pp, end, fold); +} + +#if 0 +static int +utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += EncLen_UTF16[*p]; + + if (*p == 0) { + int c, v; + + p++; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf16be_left_adjust_char_head(const UChar* start, const UChar* s) +{ + if (s <= start) return (UChar* )s; + + if ((s - start) % 2 == 1) { + s--; + } + + if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1) + s -= 2; + + return (UChar* )s; +} + +static int +utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF16_BE = { + utf16be_mbc_enc_len, + "UTF-16BE", /* name */ + 4, /* max byte length */ + 2, /* min byte length */ + utf16be_is_mbc_newline, + utf16be_mbc_to_code, + utf16be_code_to_mbclen, + utf16be_code_to_mbc, + utf16be_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf16be_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf16be_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/utf16_le.c b/oniguruma/enc/utf16_le.c new file mode 100644 index 0000000..5cc0759 --- /dev/null +++ b/oniguruma/enc/utf16_le.c @@ -0,0 +1,226 @@ +/********************************************************************** + utf16_le.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static const int EncLen_UTF16[] = { + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static int +utf16le_code_to_mbclen(OnigCodePoint code) +{ + return (code > 0xffff ? 4 : 2); +} + +static int +utf16le_mbc_enc_len(const UChar* p) +{ + return EncLen_UTF16[*(p+1)]; +} + +static int +utf16le_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 1 < end) { + if (*p == 0x0a && *(p+1) == 0x00) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *p == 0x0d || +#endif + *p == 0x85) && *(p+1) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + OnigCodePoint code; + UChar c0 = *p; + UChar c1 = *(p+1); + + if (UTF16_IS_SURROGATE_FIRST(c1)) { + code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16) + + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8) + + p[2]; + } + else { + code = c1 * 256 + p[0]; + } + return code; +} + +static int +utf16le_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + if (code > 0xffff) { + unsigned int plane, high; + + plane = (code >> 16) - 1; + high = (code & 0xff00) >> 8; + + *p++ = ((plane & 0x03) << 6) + (high >> 2); + *p++ = (plane >> 2) + 0xd8; + *p++ = (UChar )(code & 0xff); + *p = (high & 0x03) + 0xdc; + return 4; + } + else { + *p++ = (UChar )(code & 0xff); + *p++ = (UChar )((code & 0xff00) >> 8); + return 2; + } +} + +static int +utf16le_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x31; + *fold = 0x01; + (*pp) += 2; + return 2; + } + } +#endif + + *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *fold = 0; + *pp += 2; + return 2; + } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end, + fold); +} + +#if 0 +static int +utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, + const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += EncLen_UTF16[*(p+1)]; + + if (*(p+1) == 0) { + int c, v; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf16le_left_adjust_char_head(const UChar* start, const UChar* s) +{ + if (s <= start) return (UChar* )s; + + if ((s - start) % 2 == 1) { + s--; + } + + if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) + s -= 2; + + return (UChar* )s; +} + +static int +utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF16_LE = { + utf16le_mbc_enc_len, + "UTF-16LE", /* name */ + 4, /* max byte length */ + 2, /* min byte length */ + utf16le_is_mbc_newline, + utf16le_mbc_to_code, + utf16le_code_to_mbclen, + utf16le_code_to_mbc, + utf16le_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf16le_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf16le_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/utf32_be.c b/oniguruma/enc/utf32_be.c new file mode 100644 index 0000000..b4f8226 --- /dev/null +++ b/oniguruma/enc/utf32_be.c @@ -0,0 +1,184 @@ +/********************************************************************** + utf32_be.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +utf32be_mbc_enc_len(const UChar* p ARG_UNUSED) +{ + return 4; +} + +static int +utf32be_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 3 < end) { + if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *(p+3) == 0x0d || +#endif + *(p+3) == 0x85) + && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) + return 1; + if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) + && *(p+1) == 0 && *p == 0) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]); +} + +static int +utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED) +{ + return 4; +} + +static int +utf32be_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + *p++ = (UChar )((code & 0xff000000) >>24); + *p++ = (UChar )((code & 0xff0000) >>16); + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar ) (code & 0xff); + return 4; +} + +static int +utf32be_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) { + *fold++ = 0; + *fold++ = 0; + +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*(p+3) == 0x49) { + *fold++ = 0x01; + *fold = 0x31; + (*pp) += 4; + return 4; + } + } +#endif + + *fold++ = 0; + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3)); + *pp += 4; + return 4; + } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_BE, flag, pp, end, + fold); +} + +#if 0 +static int +utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += 4; + + if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { + int c, v; + + p += 3; + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf32be_left_adjust_char_head(const UChar* start, const UChar* s) +{ + int rem; + + if (s <= start) return (UChar* )s; + + rem = (s - start) % 4; + return (UChar* )(s - rem); +} + +static int +utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_BE, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF32_BE = { + utf32be_mbc_enc_len, + "UTF-32BE", /* name */ + 4, /* max byte length */ + 4, /* min byte length */ + utf32be_is_mbc_newline, + utf32be_mbc_to_code, + utf32be_code_to_mbclen, + utf32be_code_to_mbc, + utf32be_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf32be_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf32be_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/utf32_le.c b/oniguruma/enc/utf32_le.c new file mode 100644 index 0000000..8f413bf --- /dev/null +++ b/oniguruma/enc/utf32_le.c @@ -0,0 +1,184 @@ +/********************************************************************** + utf32_le.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +static int +utf32le_mbc_enc_len(const UChar* p ARG_UNUSED) +{ + return 4; +} + +static int +utf32le_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p + 3 < end) { + if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) + return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (( +#ifndef USE_CRNL_AS_LINE_TERMINATOR + *p == 0x0d || +#endif + *p == 0x85) + && *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) + && *(p+2) == 0x00 && *(p+3) == 0x00) + return 1; +#endif + } + return 0; +} + +static OnigCodePoint +utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]); +} + +static int +utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED) +{ + return 4; +} + +static int +utf32le_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + UChar* p = buf; + + *p++ = (UChar ) (code & 0xff); + *p++ = (UChar )((code & 0xff00) >> 8); + *p++ = (UChar )((code & 0xff0000) >>16); + *p++ = (UChar )((code & 0xff000000) >>24); + return 4; +} + +static int +utf32le_mbc_case_fold(OnigCaseFoldType flag, + const UChar** pp, const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0x31; + *fold++ = 0x01; + } + } + else { +#endif + *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + *fold++ = 0; +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + } +#endif + + *fold++ = 0; + *fold = 0; + *pp += 4; + return 4; + } + else + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_LE, flag, pp, end, + fold); +} + +#if 0 +static int +utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp) += 4; + + if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { + int c, v; + + if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + + c = *p; + v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, + (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); + if ((v | BIT_CTYPE_LOWER) != 0) { + /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ + if (c >= 0xaa && c <= 0xba) + return FALSE; + else + return TRUE; + } + return (v != 0 ? TRUE : FALSE); + } + + return FALSE; +} +#endif + +static UChar* +utf32le_left_adjust_char_head(const UChar* start, const UChar* s) +{ + int rem; + + if (s <= start) return (UChar* )s; + + rem = (s - start) % 4; + return (UChar* )(s - rem); +} + +static int +utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_LE, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF32_LE = { + utf32le_mbc_enc_len, + "UTF-32LE", /* name */ + 4, /* max byte length */ + 4, /* min byte length */ + utf32le_is_mbc_newline, + utf32le_mbc_to_code, + utf32le_code_to_mbclen, + utf32le_code_to_mbc, + utf32le_mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + utf32le_get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + onigenc_utf16_32_get_ctype_code_range, + utf32le_left_adjust_char_head, + onigenc_always_false_is_allowed_reverse_match +}; diff --git a/oniguruma/enc/utf8.c b/oniguruma/enc/utf8.c new file mode 100644 index 0000000..5e2c172 --- /dev/null +++ b/oniguruma/enc/utf8.c @@ -0,0 +1,305 @@ +/********************************************************************** + utf8.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regenc.h" + +#define USE_INVALID_CODE_SCHEME + +#ifdef USE_INVALID_CODE_SCHEME +/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ +#define INVALID_CODE_FE 0xfffffffe +#define INVALID_CODE_FF 0xffffffff +#define VALID_CODE_LIMIT 0x7fffffff +#endif + +#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) + +static const int EncLen_UTF8[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 +}; + +static int +mbc_enc_len(const UChar* p) +{ + return EncLen_UTF8[*p]; +} + +static int +is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p < end) { + if (*p == 0x0a) return 1; + +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS +#ifndef USE_CRNL_AS_LINE_TERMINATOR + if (*p == 0x0d) return 1; +#endif + if (p + 1 < end) { + if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ + return 1; + if (p + 2 < end) { + if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) + && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ + return 1; + } + } +#endif + } + + return 0; +} + +static OnigCodePoint +mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + int c, len; + OnigCodePoint n; + + len = enclen(ONIG_ENCODING_UTF8, p); + c = *p++; + if (len > 1) { + len--; + n = c & ((1 << (6 - len)) - 1); + while (len--) { + c = *p++; + n = (n << 6) | (c & ((1 << 6) - 1)); + } + return n; + } + else { +#ifdef USE_INVALID_CODE_SCHEME + if (c > 0xfd) { + return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF); + } +#endif + return (OnigCodePoint )c; + } +} + +static int +code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xffffff80) == 0) return 1; + else if ((code & 0xfffff800) == 0) return 2; + else if ((code & 0xffff0000) == 0) return 3; + else if ((code & 0xffe00000) == 0) return 4; + else if ((code & 0xfc000000) == 0) return 5; + else if ((code & 0x80000000) == 0) return 6; +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) return 1; + else if (code == INVALID_CODE_FF) return 1; +#endif + else + return ONIGERR_INVALID_CODE_POINT_VALUE; +} + +static int +code_to_mbc(OnigCodePoint code, UChar *buf) +{ +#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80) +#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80) + + if ((code & 0xffffff80) == 0) { + *buf = (UChar )code; + return 1; + } + else { + UChar *p = buf; + + if ((code & 0xfffff800) == 0) { + *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0); + } + else if ((code & 0xffff0000) == 0) { + *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xffe00000) == 0) { + *p++ = (UChar )(((code>>18) & 0x07) | 0xf0); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0xfc000000) == 0) { + *p++ = (UChar )(((code>>24) & 0x03) | 0xf8); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } + else if ((code & 0x80000000) == 0) { + *p++ = (UChar )(((code>>30) & 0x01) | 0xfc); + *p++ = UTF8_TRAILS(code, 24); + *p++ = UTF8_TRAILS(code, 18); + *p++ = UTF8_TRAILS(code, 12); + *p++ = UTF8_TRAILS(code, 6); + } +#ifdef USE_INVALID_CODE_SCHEME + else if (code == INVALID_CODE_FE) { + *p = 0xfe; + return 1; + } + else if (code == INVALID_CODE_FF) { + *p = 0xff; + return 1; + } +#endif + else { + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + } + + *p++ = UTF8_TRAIL0(code); + return p - buf; + } +} + +static int +mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, + const UChar* end, UChar* fold) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { +#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI + if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { + if (*p == 0x49) { + *fold++ = 0xc4; + *fold = 0xb1; + (*pp)++; + return 2; + } + } +#endif + + *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; /* return byte length of converted char to lower */ + } + else { + return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag, + pp, end, fold); + } +} + +#if 0 +static int +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + (*pp)++; + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + else { + (*pp) += enclen(ONIG_ENCODING_UTF8, p); + + if (*p == 0xc3) { + int c = *(p + 1); + if (c >= 0x80) { + if (c <= (UChar )0x9e) { /* upper */ + if (c == (UChar )0x97) return FALSE; + return TRUE; + } + else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */ + if (c == (UChar )'\267') return FALSE; + return TRUE; + } + else if (c == (UChar )0x9f && + (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { + return TRUE; + } + } + } + } + + return FALSE; +} +#endif + + +static int +get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, + const OnigCodePoint* ranges[]) +{ + *sb_out = 0x80; + return onigenc_unicode_ctype_code_range(ctype, ranges); +} + + +static UChar* +left_adjust_char_head(const UChar* start, const UChar* s) +{ + const UChar *p; + + if (s <= start) return (UChar* )s; + p = s; + + while (!utf8_islead(*p) && p > start) p--; + return (UChar* )p; +} + +static int +get_case_fold_codes_by_str(OnigCaseFoldType flag, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8, + flag, p, end, items); +} + +OnigEncodingType OnigEncodingUTF8 = { + mbc_enc_len, + "UTF-8", /* name */ + 6, /* max byte length */ + 1, /* min byte length */ + is_mbc_newline, + mbc_to_code, + code_to_mbclen, + code_to_mbc, + mbc_case_fold, + onigenc_unicode_apply_all_case_fold, + get_case_fold_codes_by_str, + onigenc_unicode_property_name_to_ctype, + onigenc_unicode_is_code_ctype, + get_ctype_code_range, + left_adjust_char_head, + onigenc_always_true_is_allowed_reverse_match +}; diff --git a/oniguruma/onig-config.in b/oniguruma/onig-config.in new file mode 100644 index 0000000..eca7179 --- /dev/null +++ b/oniguruma/onig-config.in @@ -0,0 +1,78 @@ +#!/bin/sh +# Copyright (C) 2006 K.Kosako (sndgk393 AT ybb DOT ne DOT jp) + +ONIG_VERSION=@PACKAGE_VERSION@ + +show_usage() +{ + cat < + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "oniguruma.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define RE_MBCTYPE_ASCII 0 +#define RE_MBCTYPE_EUC 1 +#define RE_MBCTYPE_SJIS 2 +#define RE_MBCTYPE_UTF8 3 + +/* GNU regex options */ +#ifndef RE_NREGS +#define RE_NREGS ONIG_NREGION +#endif + +#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE +#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND +#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE +#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE +#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST +#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE) +#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY +#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE +#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP +#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP + + +ONIG_EXTERN +void re_mbcinit P_((int)); +ONIG_EXTERN +int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); +ONIG_EXTERN +int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf)); +ONIG_EXTERN +void re_free_pattern P_((struct re_pattern_buffer*)); +ONIG_EXTERN +int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int)); +ONIG_EXTERN +int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*)); +ONIG_EXTERN +int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*)); +ONIG_EXTERN +void re_set_casetable P_((const char*)); +ONIG_EXTERN +void re_free_registers P_((struct re_registers*)); +ONIG_EXTERN +int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */ + +#ifdef __cplusplus +} +#endif + +#endif /* ONIGGNU_H */ diff --git a/oniguruma/onigposix.h b/oniguruma/onigposix.h new file mode 100644 index 0000000..f1cb35f --- /dev/null +++ b/oniguruma/onigposix.h @@ -0,0 +1,169 @@ +#ifndef ONIGPOSIX_H +#define ONIGPOSIX_H +/********************************************************************** + onigposix.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2005 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* options */ +#define REG_ICASE (1<<0) +#define REG_NEWLINE (1<<1) +#define REG_NOTBOL (1<<2) +#define REG_NOTEOL (1<<3) +#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */ +#define REG_NOSUB (1<<5) + +/* POSIX error codes */ +#define REG_NOMATCH 1 +#define REG_BADPAT 2 +#define REG_ECOLLATE 3 +#define REG_ECTYPE 4 +#define REG_EESCAPE 5 +#define REG_ESUBREG 6 +#define REG_EBRACK 7 +#define REG_EPAREN 8 +#define REG_EBRACE 9 +#define REG_BADBR 10 +#define REG_ERANGE 11 +#define REG_ESPACE 12 +#define REG_BADRPT 13 + +/* extended error codes */ +#define REG_EONIG_INTERNAL 14 +#define REG_EONIG_BADWC 15 +#define REG_EONIG_BADARG 16 +#define REG_EONIG_THREAD 17 + +/* character encodings (for reg_set_encoding()) */ +#define REG_POSIX_ENCODING_ASCII 0 +#define REG_POSIX_ENCODING_EUC_JP 1 +#define REG_POSIX_ENCODING_SJIS 2 +#define REG_POSIX_ENCODING_UTF8 3 +#define REG_POSIX_ENCODING_UTF16_BE 4 +#define REG_POSIX_ENCODING_UTF16_LE 5 + + +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + +/* POSIX regex_t */ +typedef struct { + void* onig; /* Oniguruma regex_t* */ + size_t re_nsub; + int comp_options; +} regex_t; + + +#ifndef P_ +#if defined(__STDC__) || defined(_WIN32) +# define P_(args) args +#else +# define P_(args) () +#endif +#endif + +#ifndef ONIG_EXTERN +#if defined(_WIN32) && !defined(__GNUC__) +#if defined(EXPORT) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) +#endif +#endif +#endif + +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern +#endif + +#ifndef ONIGURUMA_H +typedef unsigned int OnigOptionType; + +/* syntax */ +typedef struct { + unsigned int op; + unsigned int op2; + unsigned int behavior; + OnigOptionType options; /* default option */ +} OnigSyntaxType; + +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regsyntax.c) */ +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) +/* default syntax */ +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax + +ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; + +ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); +ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); +ONIG_EXTERN const char* onig_version P_((void)); +ONIG_EXTERN const char* onig_copyright P_((void)); + +#endif /* ONIGURUMA_H */ + + +ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options)); +ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options)); +ONIG_EXTERN void regfree P_((regex_t* reg)); +ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size)); + +/* extended API */ +ONIG_EXTERN void reg_set_encoding P_((int enc)); +ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums)); +ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg)); +ONIG_EXTERN int reg_number_of_names P_((regex_t* reg)); + +#ifdef __cplusplus +} +#endif + +#endif /* ONIGPOSIX_H */ diff --git a/oniguruma/oniguruma.h b/oniguruma/oniguruma.h new file mode 100644 index 0000000..3b55763 --- /dev/null +++ b/oniguruma/oniguruma.h @@ -0,0 +1,822 @@ +#ifndef ONIGURUMA_H +#define ONIGURUMA_H +/********************************************************************** + oniguruma.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2009 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define ONIGURUMA +#define ONIGURUMA_VERSION_MAJOR 5 +#define ONIGURUMA_VERSION_MINOR 9 +#define ONIGURUMA_VERSION_TEENY 2 + +#ifdef __cplusplus +# ifndef HAVE_PROTOTYPES +# define HAVE_PROTOTYPES 1 +# endif +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ +#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#ifdef HAVE_STDARG_H +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#ifndef P_ +#if defined(__STDC__) || defined(_WIN32) +# define P_(args) args +#else +# define P_(args) () +#endif +#endif + +#ifndef PV_ +#ifdef HAVE_STDARG_PROTOTYPES +# define PV_(args) args +#else +# define PV_(args) () +#endif +#endif + +#ifndef ONIG_EXTERN +#if defined(_WIN32) && !defined(__GNUC__) +#if defined(EXPORT) || defined(RUBY_EXPORT) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) +#endif +#endif +#endif + +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern +#endif + +/* PART: character encoding */ + +#ifndef ONIG_ESCAPE_UCHAR_COLLISION +#define UChar OnigUChar +#endif + +typedef unsigned char OnigUChar; +typedef unsigned long OnigCodePoint; +typedef unsigned int OnigCtype; +typedef unsigned int OnigDistance; + +#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) + +typedef unsigned int OnigCaseFoldType; /* case fold flag */ + +ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; + +/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ +/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ +#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) +#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) + +#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR +#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag + + +#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3 +#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13 +/* 13 => Unicode:0x1ffc */ + +/* code range */ +#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0]) +#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1] +#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2] + +typedef struct { + int byte_len; /* argument(original) character(s) byte length */ + int code_len; /* number of code */ + OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]; +} OnigCaseFoldCodeItem; + +typedef struct { + OnigCodePoint esc; + OnigCodePoint anychar; + OnigCodePoint anytime; + OnigCodePoint zero_or_one_time; + OnigCodePoint one_or_more_time; + OnigCodePoint anychar_anytime; +} OnigMetaCharTableType; + +typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); + +typedef struct OnigEncodingTypeST { + int (*mbc_enc_len)(const OnigUChar* p); + const char* name; + int max_enc_len; + int min_enc_len; + int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end); + OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end); + int (*code_to_mbclen)(OnigCodePoint code); + int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf); + int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to); + int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg); + int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]); + int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end); + int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype); + int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]); + OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p); + int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end); +} OnigEncodingType; + +typedef OnigEncodingType* OnigEncoding; + +ONIG_EXTERN OnigEncodingType OnigEncodingASCII; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN; +ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; +ONIG_EXTERN OnigEncodingType OnigEncodingCP1251; +ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; +ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; + +#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) +#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) +#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2) +#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3) +#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4) +#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5) +#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6) +#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7) +#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8) +#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9) +#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10) +#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11) +#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13) +#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14) +#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15) +#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16) +#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8) +#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE) +#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE) +#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE) +#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE) +#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP) +#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW) +#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR) +#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN) +#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS) +#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) +#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) +#define ONIG_ENCODING_CP1251 (&OnigEncodingCP1251) +#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) +#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030) + +#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) + + +/* work size */ +#define ONIGENC_CODE_TO_MBC_MAXLEN 7 +#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18 +/* 18: 6(max-byte) * 3(case-fold chars) */ + +/* character types */ +#define ONIGENC_CTYPE_NEWLINE 0 +#define ONIGENC_CTYPE_ALPHA 1 +#define ONIGENC_CTYPE_BLANK 2 +#define ONIGENC_CTYPE_CNTRL 3 +#define ONIGENC_CTYPE_DIGIT 4 +#define ONIGENC_CTYPE_GRAPH 5 +#define ONIGENC_CTYPE_LOWER 6 +#define ONIGENC_CTYPE_PRINT 7 +#define ONIGENC_CTYPE_PUNCT 8 +#define ONIGENC_CTYPE_SPACE 9 +#define ONIGENC_CTYPE_UPPER 10 +#define ONIGENC_CTYPE_XDIGIT 11 +#define ONIGENC_CTYPE_WORD 12 +#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */ +#define ONIGENC_CTYPE_ASCII 14 +#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII + + +#define onig_enc_len(enc,p,end) ONIGENC_MBC_ENC_LEN(enc,p) + +#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) +#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) +#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1) +#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) +#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) +#define ONIGENC_IS_MBC_WORD(enc,s,end) \ + ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) + + +#define ONIGENC_NAME(enc) ((enc)->name) + +#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \ + (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf) +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + (enc)->is_allowed_reverse_match(s,end) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ + (enc)->left_adjust_char_head(start, s) +#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \ + (enc)->apply_all_case_fold(case_fold_flag,f,arg) +#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \ + (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs) +#define ONIGENC_STEP_BACK(enc,start,s,n) \ + onigenc_step_back((enc),(start),(s),(n)) + +#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p) +#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) +#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) +#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) +#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end)) +#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end)) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf) +#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \ + (enc)->property_name_to_ctype(enc,p,end) + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype) + +#define ONIGENC_IS_CODE_NEWLINE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE) +#define ONIGENC_IS_CODE_GRAPH(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) +#define ONIGENC_IS_CODE_PRINT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) +#define ONIGENC_IS_CODE_ALNUM(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) +#define ONIGENC_IS_CODE_ALPHA(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) +#define ONIGENC_IS_CODE_LOWER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) +#define ONIGENC_IS_CODE_UPPER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) +#define ONIGENC_IS_CODE_CNTRL(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) +#define ONIGENC_IS_CODE_PUNCT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) +#define ONIGENC_IS_CODE_SPACE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) +#define ONIGENC_IS_CODE_BLANK(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) +#define ONIGENC_IS_CODE_DIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) +#define ONIGENC_IS_CODE_WORD(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) + +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \ + (enc)->get_ctype_code_range(ctype,sbout,ranges) + +ONIG_EXTERN +OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n)); + + +/* encoding API */ +ONIG_EXTERN +int onigenc_init P_((void)); +ONIG_EXTERN +int onigenc_set_default_encoding P_((OnigEncoding enc)); +ONIG_EXTERN +OnigEncoding onigenc_get_default_encoding P_((void)); +ONIG_EXTERN +void onigenc_set_default_caseconv_table P_((const OnigUChar* table)); +ONIG_EXTERN +OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev)); +ONIG_EXTERN +OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); +ONIG_EXTERN +OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); +ONIG_EXTERN +OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s)); +ONIG_EXTERN +int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end)); +ONIG_EXTERN +int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p)); +ONIG_EXTERN +int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); + + + +/* PART: regular expression */ + +/* config parameters */ +#define ONIG_NREGION 10 +#define ONIG_MAX_BACKREF_NUM 1000 +#define ONIG_MAX_REPEAT_NUM 100000 +#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 +/* constants */ +#define ONIG_MAX_ERROR_MESSAGE_LEN 90 + +typedef unsigned int OnigOptionType; + +#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE + +/* options */ +#define ONIG_OPTION_NONE 0U +#define ONIG_OPTION_IGNORECASE 1U +#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) +#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) +#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) +#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) +#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) +#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) +#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) +#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) +/* options (search time) */ +#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) +#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) +#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) +#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ + +#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) +#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) +#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) + +/* syntax */ +typedef struct { + unsigned int op; + unsigned int op2; + unsigned int behavior; + OnigOptionType options; /* default option */ + OnigMetaCharTableType meta_char_table; +} OnigSyntaxType; + +ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG; +ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regsyntax.c) */ +#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) + +/* default syntax */ +ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax + +/* syntax (operators) */ +#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0) +#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */ +#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */ +#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3) +#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */ +#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5) +#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */ +#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7) +#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */ +#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */ +#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */ +#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */ +#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */ +#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */ +#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */ +#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */ +#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */ +#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */ +#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */ +#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */ +#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */ +#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */ +#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */ +#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */ +#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */ +#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */ +#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */ +#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */ +#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ +#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ +#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ + +#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ +#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ +#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */ +#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */ +#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */ +#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?...) */ +#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k */ +#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g, \g */ +#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@..) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */ +#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */ +#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */ +#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */ +#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ +/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */ +#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ +#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ + +/* syntax (behavior) */ +#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ +#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */ +#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */ +#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */ +#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */ +#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */ +#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/ +#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */ +#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ +#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?)(?) */ +#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ + +/* syntax (behavior) in char class [...] */ +#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ +#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */ +#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) +#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ +/* syntax (behavior) warning */ +#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ +#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ + +/* meta character specifiers (onig_set_meta_char()) */ +#define ONIG_META_CHAR_ESCAPE 0 +#define ONIG_META_CHAR_ANYCHAR 1 +#define ONIG_META_CHAR_ANYTIME 2 +#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 +#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 +#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 + +#define ONIG_INEFFECTIVE_META_CHAR 0 + +/* error codes */ +#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) +/* normal return */ +#define ONIG_NORMAL 0 +#define ONIG_MISMATCH -1 +#define ONIG_NO_SUPPORT_CONFIG -2 + +/* internal error */ +#define ONIGERR_MEMORY -5 +#define ONIGERR_TYPE_BUG -6 +#define ONIGERR_PARSER_BUG -11 +#define ONIGERR_STACK_BUG -12 +#define ONIGERR_UNDEFINED_BYTECODE -13 +#define ONIGERR_UNEXPECTED_BYTECODE -14 +#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 +#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 +#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +/* general error */ +#define ONIGERR_INVALID_ARGUMENT -30 +/* syntax error */ +#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 +#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 +#define ONIGERR_EMPTY_CHAR_CLASS -102 +#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 +#define ONIGERR_END_PATTERN_AT_ESCAPE -104 +#define ONIGERR_END_PATTERN_AT_META -105 +#define ONIGERR_END_PATTERN_AT_CONTROL -106 +#define ONIGERR_META_CODE_SYNTAX -108 +#define ONIGERR_CONTROL_CODE_SYNTAX -109 +#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 +#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 +#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 +#define ONIGERR_NESTED_REPEAT_OPERATOR -115 +#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 +#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 +#define ONIGERR_END_PATTERN_IN_GROUP -118 +#define ONIGERR_UNDEFINED_GROUP_OPTION -119 +#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 +#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 +#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 +/* values error (syntax error) */ +#define ONIGERR_TOO_BIG_NUMBER -200 +#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 +#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 +#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 +#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 +#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 +#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 +#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 +#define ONIGERR_INVALID_BACKREF -208 +#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 +#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 +#define ONIGERR_EMPTY_GROUP_NAME -214 +#define ONIGERR_INVALID_GROUP_NAME -215 +#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 +#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 +#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 +#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 +#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 +#define ONIGERR_NEVER_ENDING_RECURSION -221 +#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 +#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 +#define ONIGERR_INVALID_CODE_POINT_VALUE -400 +#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 +#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 +#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 +#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403 + +/* errors related to thread */ +#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 + + +/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 +#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ + ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) + +typedef struct OnigCaptureTreeNodeStruct { + int group; /* group number */ + int beg; + int end; + int allocated; + int num_childs; + struct OnigCaptureTreeNodeStruct** childs; +} OnigCaptureTreeNode; + +/* match result region type */ +struct re_registers { + int allocated; + int num_regs; + int* beg; + int* end; + /* extended */ + OnigCaptureTreeNode* history_root; /* capture history tree root */ +}; + +/* capture tree traverse */ +#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1 +#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2 +#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \ + ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST ) + + +#define ONIG_REGION_NOTPOS -1 + +typedef struct re_registers OnigRegion; + +typedef struct { + OnigEncoding enc; + OnigUChar* par; + OnigUChar* par_end; +} OnigErrorInfo; + +typedef struct { + int lower; + int upper; +} OnigRepeatRange; + +typedef void (*OnigWarnFunc) P_((const char* s)); +extern void onig_null_warn P_((const char* s)); +#define ONIG_NULL_WARN onig_null_warn + +#define ONIG_CHAR_TABLE_SIZE 256 + +/* regex_t state */ +#define ONIG_STATE_NORMAL 0 +#define ONIG_STATE_SEARCHING 1 +#define ONIG_STATE_COMPILING -1 +#define ONIG_STATE_MODIFY -2 + +#define ONIG_STATE(reg) \ + ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) + +typedef struct re_pattern_buffer { + /* common members of BBuf(bytes-buffer) */ + unsigned char* p; /* compiled pattern */ + unsigned int used; /* used space for p */ + unsigned int alloc; /* allocated space for p */ + + int state; /* normal, searching, compiling */ + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_NULL_CHECK_START/END id counter */ + int num_comb_exp_check; /* combination explosion check */ + int num_call; /* number of subexp call */ + unsigned int capture_history; /* (?@...) flag (1-31) */ + unsigned int bt_mem_start; /* need backtrack flag */ + unsigned int bt_mem_end; /* need backtrack flag */ + int stack_pop_level; + int repeat_range_alloc; + OnigRepeatRange* repeat_range; + + OnigEncoding enc; + OnigOptionType options; + OnigSyntaxType* syntax; + OnigCaseFoldType case_fold_flag; + void* name_table; + + /* optimization info (string search, char-map and anchors) */ + int optimize; /* optimize flag */ + int threshold_len; /* search str-length for apply optimize */ + int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ + OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + int sub_anchor; /* start-anchor for exact or map */ + unsigned char *exact; + unsigned char *exact_end; + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + int *int_map; /* BM skip for exact_len > 255 */ + int *int_map_backward; /* BM skip for backward search */ + OnigDistance dmin; /* min-distance of exact or map */ + OnigDistance dmax; /* max-distance of exact or map */ + + /* regex_t link chain */ + struct re_pattern_buffer* chain; /* escape compile-conflict */ +} OnigRegexType; + +typedef OnigRegexType* OnigRegex; + +#ifndef ONIG_ESCAPE_REGEX_T_COLLISION + typedef OnigRegexType regex_t; +#endif + + +typedef struct { + int num_of_elements; + OnigEncoding pattern_enc; + OnigEncoding target_enc; + OnigSyntaxType* syntax; + OnigOptionType option; + OnigCaseFoldType case_fold_flag; +} OnigCompileInfo; + +/* Oniguruma Native API */ +ONIG_EXTERN +int onig_init P_((void)); +ONIG_EXTERN +int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...)); +ONIG_EXTERN +void onig_set_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +void onig_set_verb_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax)); +int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +ONIG_EXTERN +void onig_free P_((OnigRegex)); +ONIG_EXTERN +void onig_free_body P_((OnigRegex)); +ONIG_EXTERN +int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +OnigRegion* onig_region_new P_((void)); +ONIG_EXTERN +void onig_region_init P_((OnigRegion* region)); +ONIG_EXTERN +void onig_region_free P_((OnigRegion* region, int free_self)); +ONIG_EXTERN +void onig_region_copy P_((OnigRegion* to, OnigRegion* from)); +ONIG_EXTERN +void onig_region_clear P_((OnigRegion* region)); +ONIG_EXTERN +int onig_region_resize P_((OnigRegion* region, int n)); +ONIG_EXTERN +int onig_region_set P_((OnigRegion* region, int at, int beg, int end)); +ONIG_EXTERN +int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums)); +ONIG_EXTERN +int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region)); +ONIG_EXTERN +int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg)); +ONIG_EXTERN +int onig_number_of_names P_((OnigRegex reg)); +ONIG_EXTERN +int onig_number_of_captures P_((OnigRegex reg)); +ONIG_EXTERN +int onig_number_of_capture_histories P_((OnigRegex reg)); +ONIG_EXTERN +OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region)); +ONIG_EXTERN +int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg)); +ONIG_EXTERN +int onig_noname_group_capture_is_active P_((OnigRegex reg)); +ONIG_EXTERN +OnigEncoding onig_get_encoding P_((OnigRegex reg)); +ONIG_EXTERN +OnigOptionType onig_get_options P_((OnigRegex reg)); +ONIG_EXTERN +OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg)); +ONIG_EXTERN +OnigSyntaxType* onig_get_syntax P_((OnigRegex reg)); +ONIG_EXTERN +int onig_set_default_syntax P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); +ONIG_EXTERN +unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op)); +ONIG_EXTERN +void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2)); +ONIG_EXTERN +void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)); +ONIG_EXTERN +void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); +ONIG_EXTERN +int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code)); +ONIG_EXTERN +void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from)); +ONIG_EXTERN +OnigCaseFoldType onig_get_default_case_fold_flag P_((void)); +ONIG_EXTERN +int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag)); +ONIG_EXTERN +unsigned int onig_get_match_stack_limit_size P_((void)); +ONIG_EXTERN +int onig_set_match_stack_limit_size P_((unsigned int size)); +ONIG_EXTERN +int onig_end P_((void)); +ONIG_EXTERN +const char* onig_version P_((void)); +ONIG_EXTERN +const char* onig_copyright P_((void)); + +#ifdef __cplusplus +} +#endif + +#endif /* ONIGURUMA_H */ diff --git a/oniguruma/regcomp.c b/oniguruma/regcomp.c new file mode 100644 index 0000000..f9d9956 --- /dev/null +++ b/oniguruma/regcomp.c @@ -0,0 +1,6254 @@ +/********************************************************************** + regcomp.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regparse.h" + +OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; + +extern OnigCaseFoldType +onig_get_default_case_fold_flag(void) +{ + return OnigDefaultCaseFoldFlag; +} + +extern int +onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) +{ + OnigDefaultCaseFoldFlag = case_fold_flag; + return 0; +} + + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif + +static UChar* +str_dup(UChar* s, UChar* end) +{ + int len = end - s; + + if (len > 0) { + UChar* r = (UChar* )xmalloc(len + 1); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, len); + r[len] = (UChar )0; + return r; + } + else return NULL; +} + +static void +swap_node(Node* a, Node* b) +{ + Node c; + c = *a; *a = *b; *b = c; + + if (NTYPE(a) == NT_STR) { + StrNode* sn = NSTR(a); + if (sn->capa == 0) { + int len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } + + if (NTYPE(b) == NT_STR) { + StrNode* sn = NSTR(b); + if (sn->capa == 0) { + int len = sn->end - sn->s; + sn->s = sn->buf; + sn->end = sn->s + len; + } + } +} + +static OnigDistance +distance_add(OnigDistance d1, OnigDistance d2) +{ + if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) + return ONIG_INFINITE_DISTANCE; + else { + if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; + else return ONIG_INFINITE_DISTANCE; + } +} + +static OnigDistance +distance_multiply(OnigDistance d, int m) +{ + if (m == 0) return 0; + + if (d < ONIG_INFINITE_DISTANCE / m) + return d * m; + else + return ONIG_INFINITE_DISTANCE; +} + +static int +bitset_is_empty(BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { + if (bs[i] != 0) return 0; + } + return 1; +} + +#ifdef ONIG_DEBUG +static int +bitset_on_num(BitSetRef bs) +{ + int i, n; + + n = 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(bs, i)) n++; + } + return n; +} +#endif + +extern int +onig_bbuf_init(BBuf* buf, int size) +{ + if (size <= 0) { + size = 0; + buf->p = NULL; + } + else { + buf->p = (UChar* )xmalloc(size); + if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); + } + + buf->alloc = size; + buf->used = 0; + return 0; +} + + +#ifdef USE_SUBEXP_CALL + +static int +unset_addr_list_init(UnsetAddrList* uslist, int size) +{ + UnsetAddr* p; + + p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); + CHECK_NULL_RETURN_MEMERR(p); + uslist->num = 0; + uslist->alloc = size; + uslist->us = p; + return 0; +} + +static void +unset_addr_list_end(UnsetAddrList* uslist) +{ + if (IS_NOT_NULL(uslist->us)) + xfree(uslist->us); +} + +static int +unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) +{ + UnsetAddr* p; + int size; + + if (uslist->num >= uslist->alloc) { + size = uslist->alloc * 2; + p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); + CHECK_NULL_RETURN_MEMERR(p); + uslist->alloc = size; + uslist->us = p; + } + + uslist->us[uslist->num].offset = offset; + uslist->us[uslist->num].target = node; + uslist->num++; + return 0; +} +#endif /* USE_SUBEXP_CALL */ + + +static int +add_opcode(regex_t* reg, int opcode) +{ + BBUF_ADD1(reg, opcode); + return 0; +} + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +static int +add_state_check_num(regex_t* reg, int num) +{ + StateCheckNumType n = (StateCheckNumType )num; + + BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); + return 0; +} +#endif + +static int +add_rel_addr(regex_t* reg, int addr) +{ + RelAddrType ra = (RelAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_RELADDR); + return 0; +} + +static int +add_abs_addr(regex_t* reg, int addr) +{ + AbsAddrType ra = (AbsAddrType )addr; + + BBUF_ADD(reg, &ra, SIZE_ABSADDR); + return 0; +} + +static int +add_length(regex_t* reg, int len) +{ + LengthType l = (LengthType )len; + + BBUF_ADD(reg, &l, SIZE_LENGTH); + return 0; +} + +static int +add_mem_num(regex_t* reg, int num) +{ + MemNumType n = (MemNumType )num; + + BBUF_ADD(reg, &n, SIZE_MEMNUM); + return 0; +} + +static int +add_pointer(regex_t* reg, void* addr) +{ + PointerType ptr = (PointerType )addr; + + BBUF_ADD(reg, &ptr, SIZE_POINTER); + return 0; +} + +static int +add_option(regex_t* reg, OnigOptionType option) +{ + BBUF_ADD(reg, &option, SIZE_OPTION); + return 0; +} + +static int +add_opcode_rel_addr(regex_t* reg, int opcode, int addr) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_rel_addr(reg, addr); + return r; +} + +static int +add_bytes(regex_t* reg, UChar* bytes, int len) +{ + BBUF_ADD(reg, bytes, len); + return 0; +} + +static int +add_bitset(regex_t* reg, BitSetRef bs) +{ + BBUF_ADD(reg, bs, SIZE_BITSET); + return 0; +} + +static int +add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) +{ + int r; + + r = add_opcode(reg, opcode); + if (r) return r; + r = add_option(reg, option); + return r; +} + +static int compile_length_tree(Node* node, regex_t* reg); +static int compile_tree(Node* node, regex_t* reg); + + +#define IS_NEED_STR_LEN_OP_EXACT(op) \ + ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ + (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) + +static int +select_str_opcode(int mb_len, int str_len, int ignore_case) +{ + int op; + + if (ignore_case) { + switch (str_len) { + case 1: op = OP_EXACT1_IC; break; + default: op = OP_EXACTN_IC; break; + } + } + else { + switch (mb_len) { + case 1: + switch (str_len) { + case 1: op = OP_EXACT1; break; + case 2: op = OP_EXACT2; break; + case 3: op = OP_EXACT3; break; + case 4: op = OP_EXACT4; break; + case 5: op = OP_EXACT5; break; + default: op = OP_EXACTN; break; + } + break; + + case 2: + switch (str_len) { + case 1: op = OP_EXACTMB2N1; break; + case 2: op = OP_EXACTMB2N2; break; + case 3: op = OP_EXACTMB2N3; break; + default: op = OP_EXACTMB2N; break; + } + break; + + case 3: + op = OP_EXACTMB3N; + break; + + default: + op = OP_EXACTMBN; + break; + } + } + return op; +} + +static int +compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) +{ + int r; + int saved_num_null_check = reg->num_null_check; + + if (empty_info != 0) { + r = add_opcode(reg, OP_NULL_CHECK_START); + if (r) return r; + r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ + if (r) return r; + reg->num_null_check++; + } + + r = compile_tree(node, reg); + if (r) return r; + + if (empty_info != 0) { + if (empty_info == NQ_TARGET_IS_EMPTY) + r = add_opcode(reg, OP_NULL_CHECK_END); + else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); + else if (empty_info == NQ_TARGET_IS_EMPTY_REC) + r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); + + if (r) return r; + r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ + } + return r; +} + +#ifdef USE_SUBEXP_CALL +static int +compile_call(CallNode* node, regex_t* reg) +{ + int r; + + r = add_opcode(reg, OP_CALL); + if (r) return r; + r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), + node->target); + if (r) return r; + r = add_abs_addr(reg, 0 /*dummy addr.*/); + return r; +} +#endif + +static int +compile_tree_n_times(Node* node, int n, regex_t* reg) +{ + int i, r; + + for (i = 0; i < n; i++) { + r = compile_tree(node, reg); + if (r) return r; + } + return 0; +} + +static int +add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, + regex_t* reg ARG_UNUSED, int ignore_case) +{ + int len; + int op = select_str_opcode(mb_len, str_len, ignore_case); + + len = SIZE_OPCODE; + + if (op == OP_EXACTMBN) len += SIZE_LENGTH; + if (IS_NEED_STR_LEN_OP_EXACT(op)) + len += SIZE_LENGTH; + + len += mb_len * str_len; + return len; +} + +static int +add_compile_string(UChar* s, int mb_len, int str_len, + regex_t* reg, int ignore_case) +{ + int op = select_str_opcode(mb_len, str_len, ignore_case); + add_opcode(reg, op); + + if (op == OP_EXACTMBN) + add_length(reg, mb_len); + + if (IS_NEED_STR_LEN_OP_EXACT(op)) { + if (op == OP_EXACTN_IC) + add_length(reg, mb_len * str_len); + else + add_length(reg, str_len); + } + + add_bytes(reg, s, mb_len * str_len); + return 0; +} + + +static int +compile_length_string_node(Node* node, regex_t* reg) +{ + int rlen, r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev; + StrNode* sn; + + sn = NSTR(node); + if (sn->end <= sn->s) + return 0; + + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enclen(enc, p); + p += prev_len; + slen = 1; + rlen = 0; + + for (; p < sn->end; ) { + len = enclen(enc, p); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + prev = p; + slen = 1; + prev_len = len; + } + p += len; + } + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); + rlen += r; + return rlen; +} + +static int +compile_length_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +compile_string_node(Node* node, regex_t* reg) +{ + int r, len, prev_len, slen, ambig; + OnigEncoding enc = reg->enc; + UChar *p, *prev, *end; + StrNode* sn; + + sn = NSTR(node); + if (sn->end <= sn->s) + return 0; + + end = sn->end; + ambig = NSTRING_IS_AMBIG(node); + + p = prev = sn->s; + prev_len = enclen(enc, p); + p += prev_len; + slen = 1; + + for (; p < end; ) { + len = enclen(enc, p); + if (len == prev_len) { + slen++; + } + else { + r = add_compile_string(prev, prev_len, slen, reg, ambig); + if (r) return r; + + prev = p; + slen = 1; + prev_len = len; + } + + p += len; + } + return add_compile_string(prev, prev_len, slen, reg, ambig); +} + +static int +compile_string_raw_node(StrNode* sn, regex_t* reg) +{ + if (sn->end <= sn->s) + return 0; + + return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); +} + +static int +add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) +{ +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + add_length(reg, mbuf->used); + return add_bytes(reg, mbuf->p, mbuf->used); +#else + int r, pad_size; + UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; + + GET_ALIGNMENT_PAD_SIZE(p, pad_size); + add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + + r = add_bytes(reg, mbuf->p, mbuf->used); + + /* padding for return value from compile_length_cclass_node() to be fix. */ + pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; + if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); + return r; +#endif +} + +static int +compile_length_cclass_node(CClassNode* cc, regex_t* reg) +{ + int len; + + if (IS_NCCLASS_SHARE(cc)) { + len = SIZE_OPCODE + SIZE_POINTER; + return len; + } + + if (IS_NULL(cc->mbuf)) { + len = SIZE_OPCODE + SIZE_BITSET; + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + len = SIZE_OPCODE; + } + else { + len = SIZE_OPCODE + SIZE_BITSET; + } +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + len += SIZE_LENGTH + cc->mbuf->used; +#else + len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); +#endif + } + + return len; +} + +static int +compile_cclass_node(CClassNode* cc, regex_t* reg) +{ + int r; + + if (IS_NCCLASS_SHARE(cc)) { + add_opcode(reg, OP_CCLASS_NODE); + r = add_pointer(reg, cc); + return r; + } + + if (IS_NULL(cc->mbuf)) { + if (IS_NCCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_NOT); + else + add_opcode(reg, OP_CCLASS); + + r = add_bitset(reg, cc->bs); + } + else { + if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { + if (IS_NCCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MB_NOT); + else + add_opcode(reg, OP_CCLASS_MB); + + r = add_multi_byte_cclass(cc->mbuf, reg); + } + else { + if (IS_NCCLASS_NOT(cc)) + add_opcode(reg, OP_CCLASS_MIX_NOT); + else + add_opcode(reg, OP_CCLASS_MIX); + + r = add_bitset(reg, cc->bs); + if (r) return r; + r = add_multi_byte_cclass(cc->mbuf, reg); + } + } + + return r; +} + +static int +entry_repeat_range(regex_t* reg, int id, int lower, int upper) +{ +#define REPEAT_RANGE_ALLOC 4 + + OnigRepeatRange* p; + + if (reg->repeat_range_alloc == 0) { + p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); + CHECK_NULL_RETURN_MEMERR(p); + reg->repeat_range = p; + reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; + } + else if (reg->repeat_range_alloc <= id) { + int n; + n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; + p = (OnigRepeatRange* )xrealloc(reg->repeat_range, + sizeof(OnigRepeatRange) * n); + CHECK_NULL_RETURN_MEMERR(p); + reg->repeat_range = p; + reg->repeat_range_alloc = n; + } + else { + p = reg->repeat_range; + } + + p[id].lower = lower; + p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); + return 0; +} + +static int +compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, + regex_t* reg) +{ + int r; + int num_repeat = reg->num_repeat; + + r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + reg->num_repeat++; + if (r) return r; + r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); + if (r) return r; + + r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); + if (r) return r; + + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + + if ( +#ifdef USE_SUBEXP_CALL + reg->num_call > 0 || +#endif + IS_QUANTIFIER_IN_REPEAT(qn)) { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); + } + else { + r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); + } + if (r) return r; + r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ + return r; +} + +static int +is_anychar_star_quantifier(QtfrNode* qn) +{ + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && + NTYPE(qn->target) == NT_CANY) + return 1; + else + return 0; +} + +#define QUANTIFIER_EXPAND_LIMIT_SIZE 50 +#define CKN_ON (ckn > 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +static int +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int len, mod_tlen, cklen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); + + /* anychar repeat */ + if (NTYPE(qn->target) == NT_CANY) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) + len = SIZE_OP_JUMP; + else + len = 0; + + len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; + } + else { + if (qn->lower == 0) + len = SIZE_OP_JUMP; + else + len = 0; + + len += mod_tlen + SIZE_OP_PUSH + cklen; + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) /* /(?..){0}/ */ + len = SIZE_OP_JUMP + tlen; + else + len = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + len = SIZE_OP_STATE_CHECK_PUSH + tlen; + } + else { + len = SIZE_OP_PUSH + tlen; + } + } + else { + len = tlen; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + if (CKN_ON) + len += SIZE_OP_STATE_CHECK; + } + + return len; +} + +static int +compile_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int r, mod_tlen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + if (is_anychar_star_quantifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + if (CKN_ON) { + r = add_state_check_num(reg, ckn); + if (r) return r; + } + + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + } + else { + if (IS_MULTILINE(reg->options)) { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_ML_STAR + : OP_ANYCHAR_ML_STAR)); + } + else { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_STAR + : OP_ANYCHAR_STAR)); + } + if (r) return r; + if (CKN_ON) + r = add_state_check_num(reg, ckn); + + return r; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) { + r = add_opcode_rel_addr(reg, OP_JUMP, + (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); + if (r) return r; + } + + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + } + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); + } + else { + if (qn->lower == 0) { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + } + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, + -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); + } + else + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) { /* /(?..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else + r = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, tlen); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, tlen); + } + if (r) return r; + } + + r = compile_tree(qn->target, reg); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + } + + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + if (CKN_ON) { + if (r) return r; + r = add_opcode(reg, OP_STATE_CHECK); + if (r) return r; + r = add_state_check_num(reg, ckn); + } + } + return r; +} + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int len, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + /* anychar repeat */ + if (NTYPE(qn->target) == NT_CANY) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact)) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + len = SIZE_OP_JUMP; + } + else { + len = tlen * qn->lower; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; + else if (IS_NOT_NULL(qn->next_head_exact)) + len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; + else + len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; + } + else + len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ + len = SIZE_OP_JUMP + tlen; + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + len = tlen * qn->lower; + len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + } + + return len; +} + +static int +compile_quantifier_node(QtfrNode* qn, regex_t* reg) +{ + int i, r, mod_tlen; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + if (is_anychar_star_quantifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact)) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + } + else { + if (IS_MULTILINE(reg->options)) + return add_opcode(reg, OP_ANYCHAR_ML_STAR); + else + return add_opcode(reg, OP_ANYCHAR_STAR); + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); + else if (IS_NOT_NULL(qn->next_head_exact)) + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); + else + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); + } + if (r) return r; + } + else { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + } + + if (qn->greedy) { + if (IS_NOT_NULL(qn->head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTR(qn->head_exact)->s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); + } + else if (IS_NOT_NULL(qn->next_head_exact)) { + r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, + mod_tlen + SIZE_OP_JUMP); + if (r) return r; + add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); + } + } + else { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else if (!infinite && qn->greedy && + (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + int n = qn->upper - qn->lower; + + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + for (i = 0; i < n; i++) { + r = add_opcode_rel_addr(reg, OP_PUSH, + (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + } + return r; +} +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_option_node(EncloseNode* node, regex_t* reg) +{ + int tlen; + OnigOptionType prev = reg->options; + + reg->options = node->option; + tlen = compile_length_tree(node->target, reg); + reg->options = prev; + + if (tlen < 0) return tlen; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL + + tlen + SIZE_OP_SET_OPTION; + } + else + return tlen; +} + +static int +compile_option_node(EncloseNode* node, regex_t* reg) +{ + int r; + OnigOptionType prev = reg->options; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + if (r) return r; + r = add_opcode(reg, OP_FAIL); + if (r) return r; + } + + reg->options = node->option; + r = compile_tree(node->target, reg); + reg->options = prev; + + if (IS_DYNAMIC_OPTION(prev ^ node->option)) { + if (r) return r; + r = add_opcode_option(reg, OP_SET_OPTION, prev); + } + return r; +} + +static int +compile_length_enclose_node(EncloseNode* node, regex_t* reg) +{ + int len; + int tlen; + + if (node->type == ENCLOSE_OPTION) + return compile_length_option_node(node, reg); + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + else + tlen = 0; + + switch (node->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CALLED(node)) { + len = SIZE_OP_MEMORY_START_PUSH + tlen + + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + len = SIZE_OP_MEMORY_START_PUSH; + else + len = SIZE_OP_MEMORY_START; + + len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) + ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); + } + break; + + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); + tlen = compile_length_tree(qn->target, reg); + if (tlen < 0) return tlen; + + len = tlen * qn->lower + + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + } + else { + len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return len; +} + +static int get_char_length_tree(Node* node, regex_t* reg, int* len); + +static int +compile_enclose_node(EncloseNode* node, regex_t* reg) +{ + int r, len; + + if (node->type == ENCLOSE_OPTION) + return compile_option_node(node, reg); + + switch (node->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CALLED(node)) { + r = add_opcode(reg, OP_CALL); + if (r) return r; + node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; + node->state |= NST_ADDR_FIXED; + r = add_abs_addr(reg, (int )node->call_addr); + if (r) return r; + len = compile_length_tree(node->target, reg); + len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (IS_ENCLOSE_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) return r; + } +#endif + if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + r = add_opcode(reg, OP_MEMORY_START_PUSH); + else + r = add_opcode(reg, OP_MEMORY_START); + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CALLED(node)) { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) + ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); + else + r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) + ? OP_MEMORY_END_REC : OP_MEMORY_END)); + + if (r) return r; + r = add_mem_num(reg, node->regnum); + if (r) return r; + r = add_opcode(reg, OP_RETURN); + } + else +#endif + { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, OP_MEMORY_END_PUSH); + else + r = add_opcode(reg, OP_MEMORY_END); + if (r) return r; + r = add_mem_num(reg, node->regnum); + } + break; + + case ENCLOSE_STOP_BACKTRACK: + if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { + QtfrNode* qn = NQTFR(node->target); + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + + len = compile_length_tree(qn->target, reg); + if (len < 0) return len; + + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); + if (r) return r; + r = compile_tree(qn->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + } + else { + r = add_opcode(reg, OP_PUSH_STOP_BT); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_STOP_BT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_anchor_node(AnchorNode* node, regex_t* reg) +{ + int len; + int tlen = 0; + + if (node->target) { + tlen = compile_length_tree(node->target, reg); + if (tlen < 0) return tlen; + } + + switch (node->type) { + case ANCHOR_PREC_READ: + len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; + break; + case ANCHOR_PREC_READ_NOT: + len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; + break; + case ANCHOR_LOOK_BEHIND: + len = SIZE_OP_LOOK_BEHIND + tlen; + break; + case ANCHOR_LOOK_BEHIND_NOT: + len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; + break; + + default: + len = SIZE_OPCODE; + break; + } + + return len; +} + +static int +compile_anchor_node(AnchorNode* node, regex_t* reg) +{ + int r, len; + + switch (node->type) { + case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; + case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; + case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; + case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; + case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; + case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; + + case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; + case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; + case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; +#endif + + case ANCHOR_PREC_READ: + r = add_opcode(reg, OP_PUSH_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_POP_POS); + break; + + case ANCHOR_PREC_READ_NOT: + len = compile_length_tree(node->target, reg); + if (len < 0) return len; + r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_POS); + break; + + case ANCHOR_LOOK_BEHIND: + { + int n; + r = add_opcode(reg, OP_LOOK_BEHIND); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + int n; + len = compile_length_tree(node->target, reg); + r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, + len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); + if (r) return r; + if (node->char_len < 0) { + r = get_char_length_tree(node->target, reg, &n); + if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + else + n = node->char_len; + r = add_length(reg, n); + if (r) return r; + r = compile_tree(node->target, reg); + if (r) return r; + r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); + } + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_length_tree(Node* node, regex_t* reg) +{ + int len, type, r; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + len = 0; + do { + r = compile_length_tree(NCAR(node), reg); + if (r < 0) return r; + len += r; + } while (IS_NOT_NULL(node = NCDR(node))); + r = len; + break; + + case NT_ALT: + { + int n; + + n = r = 0; + do { + r += compile_length_tree(NCAR(node), reg); + n++; + } while (IS_NOT_NULL(node = NCDR(node))); + r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); + } + break; + + case NT_STR: + if (NSTRING_IS_RAW(node)) + r = compile_length_string_raw_node(NSTR(node), reg); + else + r = compile_length_string_node(node, reg); + break; + + case NT_CCLASS: + r = compile_length_cclass_node(NCCLASS(node), reg); + break; + + case NT_CTYPE: + case NT_CANY: + r = SIZE_OPCODE; + break; + + case NT_BREF: + { + BRefNode* br = NBREF(node); + +#ifdef USE_BACKREF_WITH_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else +#endif + if (br->back_num == 1) { + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) + ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + } + else { + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + r = SIZE_OP_CALL; + break; +#endif + + case NT_QTFR: + r = compile_length_quantifier_node(NQTFR(node), reg); + break; + + case NT_ENCLOSE: + r = compile_length_enclose_node(NENCLOSE(node), reg); + break; + + case NT_ANCHOR: + r = compile_length_anchor_node(NANCHOR(node), reg); + break; + + default: + return ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +compile_tree(Node* node, regex_t* reg) +{ + int n, type, len, pos, r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + do { + r = compile_tree(NCAR(node), reg); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + { + Node* x = node; + len = 0; + do { + len += compile_length_tree(NCAR(x), reg); + if (NCDR(x) != NULL) { + len += SIZE_OP_PUSH + SIZE_OP_JUMP; + } + } while (IS_NOT_NULL(x = NCDR(x))); + pos = reg->used + len; /* goal position */ + + do { + len = compile_length_tree(NCAR(node), reg); + if (IS_NOT_NULL(NCDR(node))) { + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); + if (r) break; + } + r = compile_tree(NCAR(node), reg); + if (r) break; + if (IS_NOT_NULL(NCDR(node))) { + len = pos - (reg->used + SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r) break; + } + } while (IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_STR: + if (NSTRING_IS_RAW(node)) + r = compile_string_raw_node(NSTR(node), reg); + else + r = compile_string_node(node, reg); + break; + + case NT_CCLASS: + r = compile_cclass_node(NCCLASS(node), reg); + break; + + case NT_CTYPE: + { + int op; + + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; + else op = OP_WORD; + break; + default: + return ONIGERR_TYPE_BUG; + break; + } + r = add_opcode(reg, op); + } + break; + + case NT_CANY: + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML); + else + r = add_opcode(reg, OP_ANYCHAR); + break; + + case NT_BREF: + { + BRefNode* br = NBREF(node); + +#ifdef USE_BACKREF_WITH_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); + if (r) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r) return r; + r = add_length(reg, br->nest_level); + if (r) return r; + + goto add_bacref_mems; + } + else +#endif + if (br->back_num == 1) { + n = br->back_static[0]; + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREFN_IC); + if (r) return r; + r = add_mem_num(reg, n); + } + else { + switch (n) { + case 1: r = add_opcode(reg, OP_BACKREF1); break; + case 2: r = add_opcode(reg, OP_BACKREF2); break; + default: + r = add_opcode(reg, OP_BACKREFN); + if (r) return r; + r = add_mem_num(reg, n); + break; + } + } + } + else { + int i; + int* p; + + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREF_MULTI_IC); + } + else { + r = add_opcode(reg, OP_BACKREF_MULTI); + } + if (r) return r; + +#ifdef USE_BACKREF_WITH_LEVEL + add_bacref_mems: +#endif + r = add_length(reg, br->back_num); + if (r) return r; + p = BACKREFS_P(br); + for (i = br->back_num - 1; i >= 0; i--) { + r = add_mem_num(reg, p[i]); + if (r) return r; + } + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + r = compile_call(NCALL(node), reg); + break; +#endif + + case NT_QTFR: + r = compile_quantifier_node(NQTFR(node), reg); + break; + + case NT_ENCLOSE: + r = compile_enclose_node(NENCLOSE(node), reg); + break; + + case NT_ANCHOR: + r = compile_anchor_node(NANCHOR(node), reg); + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); +#endif + break; + } + + return r; +} + +#ifdef USE_NAMED_GROUP + +static int +noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) +{ + int r = 0; + Node* node = *plink; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r = noname_disable_map(&(NCAR(node)), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + { + Node** ptarget = &(NQTFR(node)->target); + Node* old = *ptarget; + r = noname_disable_map(ptarget, map, counter); + if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { + onig_reduce_nested_quantifier(node, *ptarget); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { + if (IS_ENCLOSE_NAMED_GROUP(en)) { + (*counter)++; + map[en->regnum].new_val = *counter; + en->regnum = *counter; + r = noname_disable_map(&(en->target), map, counter); + } + else { + *plink = en->target; + en->target = NULL_NODE; + onig_node_free(node); + r = noname_disable_map(plink, map, counter); + } + } + else + r = noname_disable_map(&(en->target), map, counter); + } + break; + + default: + break; + } + + return r; +} + +static int +renumber_node_backref(Node* node, GroupNumRemap* map) +{ + int i, pos, n, old_num; + int *backs; + BRefNode* bn = NBREF(node); + + if (! IS_BACKREF_NAME_REF(bn)) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + + old_num = bn->back_num; + if (IS_NULL(bn->back_dynamic)) + backs = bn->back_static; + else + backs = bn->back_dynamic; + + for (i = 0, pos = 0; i < old_num; i++) { + n = map[backs[i]].new_val; + if (n > 0) { + backs[pos] = n; + pos++; + } + } + + bn->back_num = pos; + return 0; +} + +static int +renumber_by_map(Node* node, GroupNumRemap* map) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r = renumber_by_map(NCAR(node), map); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + case NT_QTFR: + r = renumber_by_map(NQTFR(node)->target, map); + break; + case NT_ENCLOSE: + r = renumber_by_map(NENCLOSE(node)->target, map); + break; + + case NT_BREF: + r = renumber_node_backref(node, map); + break; + + default: + break; + } + + return r; +} + +static int +numbered_ref_check(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r = numbered_ref_check(NCAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + case NT_QTFR: + r = numbered_ref_check(NQTFR(node)->target); + break; + case NT_ENCLOSE: + r = numbered_ref_check(NENCLOSE(node)->target); + break; + + case NT_BREF: + if (! IS_BACKREF_NAME_REF(NBREF(node))) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + break; + + default: + break; + } + + return r; +} + +static int +disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) +{ + int r, i, pos, counter; + BitStatusType loc; + GroupNumRemap* map; + + map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); + CHECK_NULL_RETURN_MEMERR(map); + for (i = 1; i <= env->num_mem; i++) { + map[i].new_val = 0; + } + counter = 0; + r = noname_disable_map(root, map, &counter); + if (r != 0) return r; + + r = renumber_by_map(*root, map); + if (r != 0) return r; + + for (i = 1, pos = 1; i <= env->num_mem; i++) { + if (map[i].new_val > 0) { + SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; + pos++; + } + } + + loc = env->capture_history; + BIT_STATUS_CLEAR(env->capture_history); + for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(loc, i)) { + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); + } + } + + env->num_mem = env->num_named; + reg->num_mem = env->num_named; + + return onig_renumber_name_table(reg, map); +} +#endif /* USE_NAMED_GROUP */ + +#ifdef USE_SUBEXP_CALL +static int +unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) +{ + int i, offset; + EncloseNode* en; + AbsAddrType addr; + + for (i = 0; i < uslist->num; i++) { + en = NENCLOSE(uslist->us[i].target); + if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; + addr = en->call_addr; + offset = uslist->us[i].offset; + + BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); + } + return 0; +} +#endif + +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT +static int +quantifiers_memory_node_info(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + { + int v; + do { + v = quantifiers_memory_node_info(NCAR(node)); + if (v > r) r = v; + } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { + return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ + } + else + r = quantifiers_memory_node_info(NCALL(node)->target); + break; +#endif + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + if (qn->upper != 0) { + r = quantifiers_memory_node_info(qn->target); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: + return NQ_TARGET_IS_EMPTY_MEM; + break; + + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = quantifiers_memory_node_info(en->target); + break; + default: + break; + } + } + break; + + case NT_BREF: + case NT_STR: + case NT_CTYPE: + case NT_CCLASS: + case NT_CANY: + case NT_ANCHOR: + default: + break; + } + + return r; +} +#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ + +static int +get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) +{ + OnigDistance tmin; + int r = 0; + + *min = 0; + switch (NTYPE(node)) { + case NT_BREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BRefNode* br = NBREF(node); + if (br->state & NST_RECURSION) break; + + backs = BACKREFS_P(br); + if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[0]], min, env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_min_match_length(nodes[backs[i]], &tmin, env); + if (r != 0) break; + if (*min > tmin) *min = tmin; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) { + EncloseNode* en = NENCLOSE(NCALL(node)->target); + if (IS_ENCLOSE_MIN_FIXED(en)) + *min = en->min_len; + } + else + r = get_min_match_length(NCALL(node)->target, min, env); + break; +#endif + + case NT_LIST: + do { + r = get_min_match_length(NCAR(node), &tmin, env); + if (r == 0) *min += tmin; + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + { + Node *x, *y; + y = node; + do { + x = NCAR(y); + r = get_min_match_length(x, &tmin, env); + if (r != 0) break; + if (y == node) *min = tmin; + else if (*min > tmin) *min = tmin; + } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); + } + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + *min = sn->end - sn->s; + } + break; + + case NT_CTYPE: + *min = 1; + break; + + case NT_CCLASS: + case NT_CANY: + *min = 1; + break; + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + + if (qn->lower > 0) { + r = get_min_match_length(qn->target, min, env); + if (r == 0) + *min = distance_multiply(*min, qn->lower); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_MIN_FIXED(en)) + *min = en->min_len; + else { + r = get_min_match_length(en->target, min, env); + if (r == 0) { + en->min_len = *min; + SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); + } + } + break; +#endif + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = get_min_match_length(en->target, min, env); + break; + } + } + break; + + case NT_ANCHOR: + default: + break; + } + + return r; +} + +static int +get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) +{ + OnigDistance tmax; + int r = 0; + + *max = 0; + switch (NTYPE(node)) { + case NT_LIST: + do { + r = get_max_match_length(NCAR(node), &tmax, env); + if (r == 0) + *max = distance_add(*max, tmax); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + do { + r = get_max_match_length(NCAR(node), &tmax, env); + if (r == 0 && *max < tmax) *max = tmax; + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + *max = sn->end - sn->s; + } + break; + + case NT_CTYPE: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + case NT_CCLASS: + case NT_CANY: + *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + break; + + case NT_BREF: + { + int i; + int* backs; + Node** nodes = SCANENV_MEM_NODES(env); + BRefNode* br = NBREF(node); + if (br->state & NST_RECURSION) { + *max = ONIG_INFINITE_DISTANCE; + break; + } + backs = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + r = get_max_match_length(nodes[backs[i]], &tmax, env); + if (r != 0) break; + if (*max < tmax) *max = tmax; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_max_match_length(NCALL(node)->target, max, env); + else + *max = ONIG_INFINITE_DISTANCE; + break; +#endif + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + + if (qn->upper != 0) { + r = get_max_match_length(qn->target, max, env); + if (r == 0 && *max != 0) { + if (! IS_REPEAT_INFINITE(qn->upper)) + *max = distance_multiply(*max, qn->upper); + else + *max = ONIG_INFINITE_DISTANCE; + } + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_MAX_FIXED(en)) + *max = en->max_len; + else { + r = get_max_match_length(en->target, max, env); + if (r == 0) { + en->max_len = *max; + SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); + } + } + break; +#endif + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = get_max_match_length(en->target, max, env); + break; + } + } + break; + + case NT_ANCHOR: + default: + break; + } + + return r; +} + +#define GET_CHAR_LEN_VARLEN -1 +#define GET_CHAR_LEN_TOP_ALT_VARLEN -2 + +/* fixed size pattern node only */ +static int +get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) +{ + int tlen; + int r = 0; + + level++; + *len = 0; + switch (NTYPE(node)) { + case NT_LIST: + do { + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + if (r == 0) + *len = distance_add(*len, tlen); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + { + int tlen2; + int varlen = 0; + + r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { + r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); + if (r == 0) { + if (tlen != tlen2) + varlen = 1; + } + } + if (r == 0) { + if (varlen != 0) { + if (level == 1) + r = GET_CHAR_LEN_TOP_ALT_VARLEN; + else + r = GET_CHAR_LEN_VARLEN; + } + else + *len = tlen; + } + } + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + UChar *s = sn->s; + while (s < sn->end) { + s += enclen(reg->enc, s); + (*len)++; + } + } + break; + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + if (qn->lower == qn->upper) { + r = get_char_length_tree1(qn->target, reg, &tlen, level); + if (r == 0) + *len = distance_multiply(tlen, qn->lower); + } + else + r = GET_CHAR_LEN_VARLEN; + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (! IS_CALL_RECURSION(NCALL(node))) + r = get_char_length_tree1(NCALL(node)->target, reg, len, level); + else + r = GET_CHAR_LEN_VARLEN; + break; +#endif + + case NT_CTYPE: + *len = 1; + break; + + case NT_CCLASS: + case NT_CANY: + *len = 1; + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + if (IS_ENCLOSE_CLEN_FIXED(en)) + *len = en->char_len; + else { + r = get_char_length_tree1(en->target, reg, len, level); + if (r == 0) { + en->char_len = *len; + SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); + } + } + break; +#endif + case ENCLOSE_OPTION: + case ENCLOSE_STOP_BACKTRACK: + r = get_char_length_tree1(en->target, reg, len, level); + break; + default: + break; + } + } + break; + + case NT_ANCHOR: + break; + + default: + r = GET_CHAR_LEN_VARLEN; + break; + } + + return r; +} + +static int +get_char_length_tree(Node* node, regex_t* reg, int* len) +{ + return get_char_length_tree1(node, reg, len, 0); +} + +/* x is not included y ==> 1 : 0 */ +static int +is_not_included(Node* x, Node* y, regex_t* reg) +{ + int i, len; + OnigCodePoint code; + UChar *p, c; + int ytype; + + retry: + ytype = NTYPE(y); + switch (NTYPE(x)) { + case NT_CTYPE: + { + switch (ytype) { + case NT_CTYPE: + if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && + NCTYPE(y)->not != NCTYPE(x)->not) + return 1; + else + return 0; + break; + + case NT_CCLASS: + swap: + { + Node* tmp; + tmp = x; x = y; y = tmp; + goto retry; + } + break; + + case NT_STR: + goto swap; + break; + + default: + break; + } + } + break; + + case NT_CCLASS: + { + CClassNode* xc = NCCLASS(x); + switch (ytype) { + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(y)->not == 0) { + if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(xc->bs, i)) { + if (IS_CODE_SB_WORD(reg->enc, i)) return 0; + } + } + return 1; + } + return 0; + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! IS_CODE_SB_WORD(reg->enc, i)) { + if (!IS_NCCLASS_NOT(xc)) { + if (BITSET_AT(xc->bs, i)) + return 0; + } + else { + if (! BITSET_AT(xc->bs, i)) + return 0; + } + } + } + return 1; + } + break; + + default: + break; + } + break; + + case NT_CCLASS: + { + int v; + CClassNode* yc = NCCLASS(y); + + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + v = BITSET_AT(xc->bs, i); + if ((v != 0 && !IS_NCCLASS_NOT(xc)) || + (v == 0 && IS_NCCLASS_NOT(xc))) { + v = BITSET_AT(yc->bs, i); + if ((v != 0 && !IS_NCCLASS_NOT(yc)) || + (v == 0 && IS_NCCLASS_NOT(yc))) + return 0; + } + } + if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || + (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) + return 1; + return 0; + } + break; + + case NT_STR: + goto swap; + break; + + default: + break; + } + } + break; + + case NT_STR: + { + StrNode* xs = NSTR(x); + if (NSTRING_LEN(x) == 0) + break; + + c = *(xs->s); + switch (ytype) { + case NT_CTYPE: + switch (NCTYPE(y)->ctype) { + case ONIGENC_CTYPE_WORD: + if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) + return NCTYPE(y)->not; + else + return !(NCTYPE(y)->not); + break; + default: + break; + } + break; + + case NT_CCLASS: + { + CClassNode* cc = NCCLASS(y); + + code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, + xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); + return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); + } + break; + + case NT_STR: + { + UChar *q; + StrNode* ys = NSTR(y); + len = NSTRING_LEN(x); + if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); + if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { + /* tiny version */ + return 0; + } + else { + for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { + if (*p != *q) return 1; + } + } + } + break; + + default: + break; + } + } + break; + + default: + break; + } + + return 0; +} + +static Node* +get_head_value_node(Node* node, int exact, regex_t* reg) +{ + Node* n = NULL_NODE; + + switch (NTYPE(node)) { + case NT_BREF: + case NT_ALT: + case NT_CANY: +#ifdef USE_SUBEXP_CALL + case NT_CALL: +#endif + break; + + case NT_CTYPE: + case NT_CCLASS: + if (exact == 0) { + n = node; + } + break; + + case NT_LIST: + n = get_head_value_node(NCAR(node), exact, reg); + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + + if (sn->end <= sn->s) + break; + + if (exact != 0 && + !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { + } + else { + n = node; + } + } + break; + + case NT_QTFR: + { + QtfrNode* qn = NQTFR(node); + if (qn->lower > 0) { + if (IS_NOT_NULL(qn->head_exact)) + n = qn->head_exact; + else + n = get_head_value_node(qn->target, exact, reg); + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + switch (en->type) { + case ENCLOSE_OPTION: + { + OnigOptionType options = reg->options; + + reg->options = NENCLOSE(node)->option; + n = get_head_value_node(NENCLOSE(node)->target, exact, reg); + reg->options = options; + } + break; + + case ENCLOSE_MEMORY: + case ENCLOSE_STOP_BACKTRACK: + n = get_head_value_node(en->target, exact, reg); + break; + } + } + break; + + case NT_ANCHOR: + if (NANCHOR(node)->type == ANCHOR_PREC_READ) + n = get_head_value_node(NANCHOR(node)->target, exact, reg); + break; + + default: + break; + } + + return n; +} + +static int +check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) +{ + int type, r = 0; + + type = NTYPE(node); + if ((NTYPE2BIT(type) & type_mask) == 0) + return 1; + + switch (type) { + case NT_LIST: + case NT_ALT: + do { + r = check_type_tree(NCAR(node), type_mask, enclose_mask, + anchor_mask); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, + anchor_mask); + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + if ((en->type & enclose_mask) == 0) + return 1; + + r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); + } + break; + + case NT_ANCHOR: + type = NANCHOR(node)->type; + if ((type & anchor_mask) == 0) + return 1; + + if (NANCHOR(node)->target) + r = check_type_tree(NANCHOR(node)->target, + type_mask, enclose_mask, anchor_mask); + break; + + default: + break; + } + return r; +} + +#ifdef USE_SUBEXP_CALL + +#define RECURSION_EXIST 1 +#define RECURSION_INFINITE 2 + +static int +subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + Node *x; + OnigDistance min; + int ret; + + x = node; + do { + ret = subexp_inf_recursive_check(NCAR(x), env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r |= ret; + if (head) { + ret = get_min_match_length(NCAR(x), &min, env); + if (ret != 0) return ret; + if (min != 0) head = 0; + } + } while (IS_NOT_NULL(x = NCDR(x))); + } + break; + + case NT_ALT: + { + int ret; + r = RECURSION_EXIST; + do { + ret = subexp_inf_recursive_check(NCAR(node), env, head); + if (ret < 0 || ret == RECURSION_INFINITE) return ret; + r &= ret; + } while (IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_QTFR: + r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); + if (r == RECURSION_EXIST) { + if (NQTFR(node)->lower == 0) r = 0; + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check(an->target, env, head); + break; + } + } + break; + + case NT_CALL: + r = subexp_inf_recursive_check(NCALL(node)->target, env, head); + break; + + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) + return 0; + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); + else { + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + +static int +subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + case NT_ALT: + do { + r = subexp_inf_recursive_check_trav(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_inf_recursive_check_trav(an->target, env); + break; + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + if (IS_ENCLOSE_RECURSION(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = subexp_inf_recursive_check(en->target, env, 1); + if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + } + r = subexp_inf_recursive_check_trav(en->target, env); + } + + break; + + default: + break; + } + + return r; +} + +static int +subexp_recursive_check(Node* node) +{ + int r = 0; + + switch (NTYPE(node)) { + case NT_LIST: + case NT_ALT: + do { + r |= subexp_recursive_check(NCAR(node)); + } while (IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = subexp_recursive_check(NQTFR(node)->target); + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check(an->target); + break; + } + } + break; + + case NT_CALL: + r = subexp_recursive_check(NCALL(node)->target); + if (r != 0) SET_CALL_RECURSION(node); + break; + + case NT_ENCLOSE: + if (IS_ENCLOSE_MARK2(NENCLOSE(node))) + return 0; + else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + return 1; /* recursion */ + else { + SET_ENCLOSE_STATUS(node, NST_MARK2); + r = subexp_recursive_check(NENCLOSE(node)->target); + CLEAR_ENCLOSE_STATUS(node, NST_MARK2); + } + break; + + default: + break; + } + + return r; +} + + +static int +subexp_recursive_check_trav(Node* node, ScanEnv* env) +{ +#define FOUND_CALLED_NODE 1 + + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + case NT_ALT: + { + int ret; + do { + ret = subexp_recursive_check_trav(NCAR(node), env); + if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; + else if (ret < 0) return ret; + } while (IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_QTFR: + r = subexp_recursive_check_trav(NQTFR(node)->target, env); + if (NQTFR(node)->upper == 0) { + if (r == FOUND_CALLED_NODE) + NQTFR(node)->is_refered = 1; + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = subexp_recursive_check_trav(an->target, env); + break; + } + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + if (! IS_ENCLOSE_RECURSION(en)) { + if (IS_ENCLOSE_CALLED(en)) { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = subexp_recursive_check(en->target); + if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + } + } + r = subexp_recursive_check_trav(en->target, env); + if (IS_ENCLOSE_CALLED(en)) + r |= FOUND_CALLED_NODE; + } + break; + + default: + break; + } + + return r; +} + +static int +setup_subexp_call(Node* node, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + do { + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_ALT: + do { + r = setup_subexp_call(NCAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_QTFR: + r = setup_subexp_call(NQTFR(node)->target, env); + break; + case NT_ENCLOSE: + r = setup_subexp_call(NENCLOSE(node)->target, env); + break; + + case NT_CALL: + { + CallNode* cn = NCALL(node); + Node** nodes = SCANENV_MEM_NODES(env); + + if (cn->group_num != 0) { + int gnum = cn->group_num; + +#ifdef USE_NAMED_GROUP + if (env->num_named > 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + } +#endif + if (gnum > env->num_mem) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } + +#ifdef USE_NAMED_GROUP + set_call_attr: +#endif + cn->target = nodes[cn->group_num]; + if (IS_NULL(cn->target)) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + SET_ENCLOSE_STATUS(cn->target, NST_CALLED); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); + cn->unset_addr_list = env->unset_addr_list; + } +#ifdef USE_NAMED_GROUP + else { + int *refs; + + int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, + &refs); + if (n <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + else if (n > 1) { + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->group_num = refs[0]; + goto set_call_attr; + } + } +#endif + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + + switch (an->type) { + case ANCHOR_PREC_READ: + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: + case ANCHOR_LOOK_BEHIND_NOT: + r = setup_subexp_call(an->target, env); + break; + } + } + break; + + default: + break; + } + + return r; +} +#endif + +/* divide different length alternatives in look-behind. + (?<=A|B) ==> (?<=A)|(?<=B) + (? (?type; + + head = an->target; + np = NCAR(head); + swap_node(node, head); + NCAR(node) = head; + NANCHOR(head)->target = np; + + np = node; + while ((np = NCDR(np)) != NULL_NODE) { + insert_node = onig_node_new_anchor(anc_type); + CHECK_NULL_RETURN_MEMERR(insert_node); + NANCHOR(insert_node)->target = NCAR(np); + NCAR(np) = insert_node; + } + + if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { + np = node; + do { + SET_NTYPE(np, NT_LIST); /* alt -> list */ + } while ((np = NCDR(np)) != NULL_NODE); + } + return 0; +} + +static int +setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) +{ + int r, len; + AnchorNode* an = NANCHOR(node); + + r = get_char_length_tree(an->target, reg, &len); + if (r == 0) + an->char_len = len; + else if (r == GET_CHAR_LEN_VARLEN) + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) + r = divide_look_behind_alternatives(node); + else + r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + } + + return r; +} + +static int +next_setup(Node* node, Node* next_node, regex_t* reg) +{ + int type; + + retry: + type = NTYPE(node); + if (type == NT_QTFR) { + QtfrNode* qn = NQTFR(node); + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { +#ifdef USE_QTFR_PEEK_NEXT + Node* n = get_head_value_node(next_node, 1, reg); + /* '\0': for UTF-16BE etc... */ + if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { + qn->next_head_exact = n; + } +#endif + /* automatic posseivation a*b ==> (?>a*)b */ + if (qn->lower <= 1) { + int ttype = NTYPE(qn->target); + if (IS_NODE_TYPE_SIMPLE(ttype)) { + Node *x, *y; + x = get_head_value_node(qn->target, 0, reg); + if (IS_NOT_NULL(x)) { + y = get_head_value_node(next_node, 0, reg); + if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { + Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); + CHECK_NULL_RETURN_MEMERR(en); + SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + swap_node(node, en); + NENCLOSE(node)->target = en; + } + } + } + } + } + } + else if (type == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(node); + if (en->type == ENCLOSE_MEMORY) { + node = en->target; + goto retry; + } + } + return 0; +} + + +static int +update_string_node_case_fold(regex_t* reg, Node *node) +{ + UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *sbuf, *ebuf, *sp; + int r, i, len, sbuf_size; + StrNode* sn = NSTR(node); + + end = sn->end; + sbuf_size = (end - sn->s) * 2; + sbuf = (UChar* )xmalloc(sbuf_size); + CHECK_NULL_RETURN_MEMERR(sbuf); + ebuf = sbuf + sbuf_size; + + sp = sbuf; + p = sn->s; + while (p < end) { + len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); + q = buf; + for (i = 0; i < len; i++) { + if (sp >= ebuf) { + sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); + CHECK_NULL_RETURN_MEMERR(sbuf); + sp = sbuf + sbuf_size; + sbuf_size *= 2; + ebuf = sbuf + sbuf_size; + } + + *sp++ = buf[i]; + } + } + + r = onig_node_str_set(node, sbuf, sp); + if (r != 0) { + xfree(sbuf); + return r; + } + + xfree(sbuf); + return 0; +} + +static int +expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, + regex_t* reg) +{ + int r; + Node *node; + + node = onig_node_new_str(s, end); + if (IS_NULL(node)) return ONIGERR_MEMORY; + + r = update_string_node_case_fold(reg, node); + if (r != 0) { + onig_node_free(node); + return r; + } + + NSTRING_SET_AMBIG(node); + NSTRING_SET_DONT_GET_OPT_INFO(node); + *rnode = node; + return 0; +} + +static int +expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], + UChar *p, int slen, UChar *end, + regex_t* reg, Node **rnode) +{ + int r, i, j, len, varlen; + Node *anode, *var_anode, *snode, *xnode, *an; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + + *rnode = var_anode = NULL_NODE; + + varlen = 0; + for (i = 0; i < item_num; i++) { + if (items[i].byte_len != slen) { + varlen = 1; + break; + } + } + + if (varlen != 0) { + *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(var_anode)) return ONIGERR_MEMORY; + + xnode = onig_node_new_list(NULL, NULL); + if (IS_NULL(xnode)) goto mem_err; + NCAR(var_anode) = xnode; + + anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) goto mem_err; + NCAR(xnode) = anode; + } + else { + *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(anode)) return ONIGERR_MEMORY; + } + + snode = onig_node_new_str(p, p + slen); + if (IS_NULL(snode)) goto mem_err; + + NCAR(anode) = snode; + + for (i = 0; i < item_num; i++) { + snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + + for (j = 0; j < items[i].code_len; j++) { + len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); + if (len < 0) { + r = len; + goto mem_err2; + } + + r = onig_node_str_cat(snode, buf, buf + len); + if (r != 0) goto mem_err2; + } + + an = onig_node_new_alt(NULL_NODE, NULL_NODE); + if (IS_NULL(an)) { + goto mem_err2; + } + + if (items[i].byte_len != slen) { + Node *rem; + UChar *q = p + items[i].byte_len; + + if (q < end) { + r = expand_case_fold_make_rem_string(&rem, q, end, reg); + if (r != 0) { + onig_node_free(an); + goto mem_err2; + } + + xnode = onig_node_list_add(NULL_NODE, snode); + if (IS_NULL(xnode)) { + onig_node_free(an); + onig_node_free(rem); + goto mem_err2; + } + if (IS_NULL(onig_node_list_add(xnode, rem))) { + onig_node_free(an); + onig_node_free(xnode); + onig_node_free(rem); + goto mem_err; + } + + NCAR(an) = xnode; + } + else { + NCAR(an) = snode; + } + + NCDR(var_anode) = an; + var_anode = an; + } + else { + NCAR(an) = snode; + NCDR(anode) = an; + anode = an; + } + } + + return varlen; + + mem_err2: + onig_node_free(snode); + + mem_err: + onig_node_free(*rnode); + + return ONIGERR_MEMORY; +} + +static int +expand_case_fold_string(Node* node, regex_t* reg) +{ +#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 + + int r, n, len, alt_num; + UChar *start, *end, *p; + Node *top_root, *root, *snode, *prev_node; + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + StrNode* sn = NSTR(node); + + if (NSTRING_IS_AMBIG(node)) return 0; + + start = sn->s; + end = sn->end; + if (start >= end) return 0; + + r = 0; + top_root = root = prev_node = snode = NULL_NODE; + alt_num = 1; + p = start; + while (p < end) { + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, + p, end, items); + if (n < 0) { + r = n; + goto err; + } + + len = enclen(reg->enc, p); + + if (n == 0) { + if (IS_NULL(snode)) { + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } + + prev_node = snode = onig_node_new_str(NULL, NULL); + if (IS_NULL(snode)) goto mem_err; + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, snode))) { + onig_node_free(snode); + goto mem_err; + } + } + } + + r = onig_node_str_cat(snode, p, p + len); + if (r != 0) goto err; + } + else { + alt_num *= (n + 1); + if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; + + if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(prev_node); + goto mem_err; + } + } + + r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); + if (r < 0) goto mem_err; + if (r == 1) { + if (IS_NULL(root)) { + top_root = prev_node; + } + else { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } + + root = NCAR(prev_node); + } + else { /* r == 0 */ + if (IS_NOT_NULL(root)) { + if (IS_NULL(onig_node_list_add(root, prev_node))) { + onig_node_free(prev_node); + goto mem_err; + } + } + } + + snode = NULL_NODE; + } + + p += len; + } + + if (p < end) { + Node *srem; + + r = expand_case_fold_make_rem_string(&srem, p, end, reg); + if (r != 0) goto mem_err; + + if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { + top_root = root = onig_node_list_add(NULL_NODE, prev_node); + if (IS_NULL(root)) { + onig_node_free(srem); + onig_node_free(prev_node); + goto mem_err; + } + } + + if (IS_NULL(root)) { + prev_node = srem; + } + else { + if (IS_NULL(onig_node_list_add(root, srem))) { + onig_node_free(srem); + goto mem_err; + } + } + } + + /* ending */ + top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); + swap_node(node, top_root); + onig_node_free(top_root); + return 0; + + mem_err: + r = ONIGERR_MEMORY; + + err: + onig_node_free(top_root); + return r; +} + + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define CEC_THRES_NUM_BIG_REPEAT 512 +#define CEC_INFINITE_NUM 0x7fffffff + +#define CEC_IN_INFINITE_REPEAT (1<<0) +#define CEC_IN_FINITE_REPEAT (1<<1) +#define CEC_CONT_BIG_REPEAT (1<<2) + +static int +setup_comb_exp_check(Node* node, int state, ScanEnv* env) +{ + int type; + int r = state; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_comb_exp_check(NCAR(node), r, env); + prev = NCAR(node); + } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_ALT: + { + int ret; + do { + ret = setup_comb_exp_check(NCAR(node), state, env); + r |= ret; + } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_QTFR: + { + int child_state = state; + int add_state = 0; + QtfrNode* qn = NQTFR(node); + Node* target = qn->target; + int var_num; + + if (! IS_REPEAT_INFINITE(qn->upper)) { + if (qn->upper > 1) { + /* {0,1}, {1,1} are allowed */ + child_state |= CEC_IN_FINITE_REPEAT; + + /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ + if (env->backrefed_mem == 0) { + if (NTYPE(qn->target) == NT_ENCLOSE) { + EncloseNode* en = NENCLOSE(qn->target); + if (en->type == ENCLOSE_MEMORY) { + if (NTYPE(en->target) == NT_QTFR) { + QtfrNode* q = NQTFR(en->target); + if (IS_REPEAT_INFINITE(q->upper) + && q->greedy == qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + if (qn->upper == 1) + child_state = state; + } + } + } + } + } + } + } + + if (state & CEC_IN_FINITE_REPEAT) { + qn->comb_exp_check_num = -1; + } + else { + if (IS_REPEAT_INFINITE(qn->upper)) { + var_num = CEC_INFINITE_NUM; + child_state |= CEC_IN_INFINITE_REPEAT; + } + else { + var_num = qn->upper - qn->lower; + } + + if (var_num >= CEC_THRES_NUM_BIG_REPEAT) + add_state |= CEC_CONT_BIG_REPEAT; + + if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || + ((state & CEC_CONT_BIG_REPEAT) != 0 && + var_num >= CEC_THRES_NUM_BIG_REPEAT)) { + if (qn->comb_exp_check_num == 0) { + env->num_comb_exp_check++; + qn->comb_exp_check_num = env->num_comb_exp_check; + if (env->curr_max_regnum > env->comb_exp_max_regnum) + env->comb_exp_max_regnum = env->curr_max_regnum; + } + } + } + + r = setup_comb_exp_check(target, child_state, env); + r |= add_state; + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + switch (en->type) { + case ENCLOSE_MEMORY: + { + if (env->curr_max_regnum < en->regnum) + env->curr_max_regnum = en->regnum; + + r = setup_comb_exp_check(en->target, state, env); + } + break; + + default: + r = setup_comb_exp_check(en->target, state, env); + break; + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) + env->has_recursion = 1; + else + r = setup_comb_exp_check(NCALL(node)->target, state, env); + break; +#endif + + default: + break; + } + + return r; +} +#endif + +#define IN_ALT (1<<0) +#define IN_NOT (1<<1) +#define IN_REPEAT (1<<2) +#define IN_VAR_REPEAT (1<<3) + +/* setup_tree does the following work. + 1. check empty loop. (set qn->target_empty_info) + 2. expand ignore-case in char class. + 3. set memory status bit flags. (reg->mem_stats) + 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. + 5. find invalid patterns in look-behind. + 6. expand repeated string. + */ +static int +setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) +{ + int type; + int r = 0; + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_tree(NCAR(node), reg, state, env); + if (IS_NOT_NULL(prev) && r == 0) { + r = next_setup(prev, NCAR(node), reg); + } + prev = NCAR(node); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + } + break; + + case NT_ALT: + do { + r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); + } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + break; + + case NT_CCLASS: + break; + + case NT_STR: + if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { + r = expand_case_fold_string(node, reg); + } + break; + + case NT_CTYPE: + case NT_CANY: + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + break; +#endif + + case NT_BREF: + { + int i; + int* p; + Node** nodes = SCANENV_MEM_NODES(env); + BRefNode* br = NBREF(node); + p = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); + BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); +#ifdef USE_BACKREF_WITH_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); + } +#endif + SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + } + } + break; + + case NT_QTFR: + { + OnigDistance d; + QtfrNode* qn = NQTFR(node); + Node* target = qn->target; + + if ((state & IN_REPEAT) != 0) { + qn->state |= NST_IN_REPEAT; + } + + if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { + r = get_min_match_length(target, &d, env); + if (r) break; + if (d == 0) { + qn->target_empty_info = NQ_TARGET_IS_EMPTY; +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + r = quantifiers_memory_node_info(target); + if (r < 0) break; + if (r > 0) { + qn->target_empty_info = r; + } +#endif +#if 0 + r = get_max_match_length(target, &d, env); + if (r == 0 && d == 0) { + /* ()* ==> ()?, ()+ ==> () */ + qn->upper = 1; + if (qn->lower > 1) qn->lower = 1; + if (NTYPE(target) == NT_STR) { + qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ + } + } +#endif + } + } + + state |= IN_REPEAT; + if (qn->lower != qn->upper) + state |= IN_VAR_REPEAT; + r = setup_tree(target, reg, state, env); + if (r) break; + + /* expand string */ +#define EXPAND_STRING_MAX_LENGTH 100 + if (NTYPE(target) == NT_STR) { + if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && + qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int len = NSTRING_LEN(target); + StrNode* sn = NSTR(target); + + if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int i, n = qn->lower; + onig_node_conv_to_str_node(node, NSTR(target)->flag); + for (i = 0; i < n; i++) { + r = onig_node_str_cat(node, sn->s, sn->end); + if (r) break; + } + onig_node_free(target); + break; /* break case NT_QTFR: */ + } + } + } + +#ifdef USE_OP_PUSH_OR_JUMP_EXACT + if (qn->greedy && (qn->target_empty_info != 0)) { + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); + if (IS_NOT_NULL(tqn->head_exact)) { + qn->head_exact = tqn->head_exact; + tqn->head_exact = NULL; + } + } + else { + qn->head_exact = get_head_value_node(qn->target, 1, reg); + } + } +#endif + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + switch (en->type) { + case ENCLOSE_OPTION: + { + OnigOptionType options = reg->options; + reg->options = NENCLOSE(node)->option; + r = setup_tree(NENCLOSE(node)->target, reg, state, env); + reg->options = options; + } + break; + + case ENCLOSE_MEMORY: + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { + BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); + /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ + } + r = setup_tree(en->target, reg, state, env); + break; + + case ENCLOSE_STOP_BACKTRACK: + { + Node* target = en->target; + r = setup_tree(target, reg, state, env); + if (NTYPE(target) == NT_QTFR) { + QtfrNode* tqn = NQTFR(target); + if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && + tqn->greedy != 0) { /* (?>a*), a*+ etc... */ + int qtype = NTYPE(tqn->target); + if (IS_NODE_TYPE_SIMPLE(qtype)) + SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + } + } + } + break; + } + } + break; + + case NT_ANCHOR: + { + AnchorNode* an = NANCHOR(node); + + switch (an->type) { + case ANCHOR_PREC_READ: + r = setup_tree(an->target, reg, state, env); + break; + case ANCHOR_PREC_READ_NOT: + r = setup_tree(an->target, reg, (state | IN_NOT), env); + break; + +/* allowed node types in look-behind */ +#define ALLOWED_TYPE_IN_LB \ + ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ + BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) + +#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) +#define ALLOWED_ENCLOSE_IN_LB_NOT 0 + +#define ALLOWED_ANCHOR_IN_LB \ +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) +#define ALLOWED_ANCHOR_IN_LB_NOT \ +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) + + case ANCHOR_LOOK_BEHIND: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, state, env); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_look_behind(node, reg, env); + if (r != 0) return r; + r = setup_tree(an->target, reg, (state | IN_NOT), env); + } + break; + } + } + break; + + default: + break; + } + + return r; +} + +/* set skip map for Boyer-Moor search */ +static int +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + UChar skip[], int** int_skip) +{ + int i, len; + + len = end - s; + if (len < ONIG_CHAR_TABLE_SIZE) { + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; + + for (i = 0; i < len - 1; i++) + skip[s[i]] = len - 1 - i; + } + else { + if (IS_NULL(*int_skip)) { + *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; + } + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; + + for (i = 0; i < len - 1; i++) + (*int_skip)[s[i]] = len - 1 - i; + } + return 0; +} + +#define OPT_EXACT_MAXLEN 24 + +typedef struct { + OnigDistance min; /* min byte length */ + OnigDistance max; /* max byte length */ +} MinMaxLen; + +typedef struct { + MinMaxLen mmd; + OnigEncoding enc; + OnigOptionType options; + OnigCaseFoldType case_fold_flag; + ScanEnv* scan_env; +} OptEnv; + +typedef struct { + int left_anchor; + int right_anchor; +} OptAncInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int reach_end; + int ignore_case; + int len; + UChar s[OPT_EXACT_MAXLEN]; +} OptExactInfo; + +typedef struct { + MinMaxLen mmd; /* info position */ + OptAncInfo anc; + + int value; /* weighted value */ + UChar map[ONIG_CHAR_TABLE_SIZE]; +} OptMapInfo; + +typedef struct { + MinMaxLen len; + + OptAncInfo anc; + OptExactInfo exb; /* boundary */ + OptExactInfo exm; /* middle */ + OptExactInfo expr; /* prec read (?=...) */ + + OptMapInfo map; /* boundary */ +} NodeOptInfo; + + +static int +map_position_value(OnigEncoding enc, int i) +{ + static const short int ByteValTable[] = { + 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 + }; + + if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { + if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) + return 20; + else + return (int )ByteValTable[i]; + } + else + return 4; /* Take it easy. */ +} + +static int +distance_value(MinMaxLen* mm) +{ + /* 1000 / (min-max-dist + 1) */ + static const short int dist_vals[] = { + 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, + 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, + 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, + 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, + 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, + 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, + 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 + }; + + int d; + + if (mm->max == ONIG_INFINITE_DISTANCE) return 0; + + d = mm->max - mm->min; + if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) + /* return dist_vals[d] * 16 / (mm->min + 12); */ + return (int )dist_vals[d]; + else + return 1; +} + +static int +comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) +{ + if (v2 <= 0) return -1; + if (v1 <= 0) return 1; + + v1 *= distance_value(d1); + v2 *= distance_value(d2); + + if (v2 > v1) return 1; + if (v2 < v1) return -1; + + if (d2->min < d1->min) return 1; + if (d2->min > d1->min) return -1; + return 0; +} + +static int +is_equal_mml(MinMaxLen* a, MinMaxLen* b) +{ + return (a->min == b->min && a->max == b->max) ? 1 : 0; +} + + +static void +set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) +{ + mml->min = min; + mml->max = max; +} + +static void +clear_mml(MinMaxLen* mml) +{ + mml->min = mml->max = 0; +} + +static void +copy_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = from->min; + to->max = from->max; +} + +static void +add_mml(MinMaxLen* to, MinMaxLen* from) +{ + to->min = distance_add(to->min, from->min); + to->max = distance_add(to->max, from->max); +} + +#if 0 +static void +add_len_mml(MinMaxLen* to, OnigDistance len) +{ + to->min = distance_add(to->min, len); + to->max = distance_add(to->max, len); +} +#endif + +static void +alt_merge_mml(MinMaxLen* to, MinMaxLen* from) +{ + if (to->min > from->min) to->min = from->min; + if (to->max < from->max) to->max = from->max; +} + +static void +copy_opt_env(OptEnv* to, OptEnv* from) +{ + *to = *from; +} + +static void +clear_opt_anc_info(OptAncInfo* anc) +{ + anc->left_anchor = 0; + anc->right_anchor = 0; +} + +static void +copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) +{ + *to = *from; +} + +static void +concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, + OnigDistance left_len, OnigDistance right_len) +{ + clear_opt_anc_info(to); + + to->left_anchor = left->left_anchor; + if (left_len == 0) { + to->left_anchor |= right->left_anchor; + } + + to->right_anchor = right->right_anchor; + if (right_len == 0) { + to->right_anchor |= left->right_anchor; + } +} + +static int +is_left_anchor(int anc) +{ + if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || + anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || + anc == ANCHOR_PREC_READ_NOT) + return 0; + + return 1; +} + +static int +is_set_opt_anc_info(OptAncInfo* to, int anc) +{ + if ((to->left_anchor & anc) != 0) return 1; + + return ((to->right_anchor & anc) != 0 ? 1 : 0); +} + +static void +add_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor |= anc; + else + to->right_anchor |= anc; +} + +static void +remove_opt_anc_info(OptAncInfo* to, int anc) +{ + if (is_left_anchor(anc)) + to->left_anchor &= ~anc; + else + to->right_anchor &= ~anc; +} + +static void +alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) +{ + to->left_anchor &= add->left_anchor; + to->right_anchor &= add->right_anchor; +} + +static int +is_full_opt_exact_info(OptExactInfo* ex) +{ + return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); +} + +static void +clear_opt_exact_info(OptExactInfo* ex) +{ + clear_mml(&ex->mmd); + clear_opt_anc_info(&ex->anc); + ex->reach_end = 0; + ex->ignore_case = 0; + ex->len = 0; + ex->s[0] = '\0'; +} + +static void +copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) +{ + *to = *from; +} + +static void +concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) +{ + int i, j, len; + UChar *p, *end; + OptAncInfo tanc; + + if (! to->ignore_case && add->ignore_case) { + if (to->len >= add->len) return ; /* avoid */ + + to->ignore_case = 1; + } + + p = add->s; + end = p + add->len; + for (i = to->len; p < end; ) { + len = enclen(enc, p); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } + + to->len = i; + to->reach_end = (p == end ? add->reach_end : 0); + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); + if (! to->reach_end) tanc.right_anchor = 0; + copy_opt_anc_info(&to->anc, &tanc); +} + +static void +concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, + int raw ARG_UNUSED, OnigEncoding enc) +{ + int i, j, len; + UChar *p; + + for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { + len = enclen(enc, p); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } + + to->len = i; +} + +static void +alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) +{ + int i, j, len; + + if (add->len == 0 || to->len == 0) { + clear_opt_exact_info(to); + return ; + } + + if (! is_equal_mml(&to->mmd, &add->mmd)) { + clear_opt_exact_info(to); + return ; + } + + for (i = 0; i < to->len && i < add->len; ) { + if (to->s[i] != add->s[i]) break; + len = enclen(env->enc, to->s + i); + + for (j = 1; j < len; j++) { + if (to->s[i+j] != add->s[i+j]) break; + } + if (j < len) break; + i += len; + } + + if (! add->reach_end || i < add->len || i < to->len) { + to->reach_end = 0; + } + to->len = i; + to->ignore_case |= add->ignore_case; + + alt_merge_opt_anc_info(&to->anc, &add->anc); + if (! to->reach_end) to->anc.right_anchor = 0; +} + +static void +select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) +{ + int v1, v2; + + v1 = now->len; + v2 = alt->len; + + if (v2 == 0) { + return ; + } + else if (v1 == 0) { + copy_opt_exact_info(now, alt); + return ; + } + else if (v1 <= 2 && v2 <= 2) { + /* ByteValTable[x] is big value --> low price */ + v2 = map_position_value(enc, now->s[0]); + v1 = map_position_value(enc, alt->s[0]); + + if (now->len > 1) v1 += 5; + if (alt->len > 1) v2 += 5; + } + + if (now->ignore_case == 0) v1 *= 2; + if (alt->ignore_case == 0) v2 *= 2; + + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_exact_info(now, alt); +} + +static void +clear_opt_map_info(OptMapInfo* map) +{ + static const OptMapInfo clean_info = { + {0, 0}, {0, 0}, 0, + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + } + }; + + xmemcpy(map, &clean_info, sizeof(OptMapInfo)); +} + +static void +copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) +{ + *to = *from; +} + +static void +add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) +{ + if (map->map[c] == 0) { + map->map[c] = 1; + map->value += map_position_value(enc, c); + } +} + +static int +add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, + OnigEncoding enc, OnigCaseFoldType case_fold_flag) +{ + OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int i, n; + + add_char_opt_map_info(map, p[0], enc); + + case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); + if (n < 0) return n; + + for (i = 0; i < n; i++) { + ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); + add_char_opt_map_info(map, buf[0], enc); + } + + return 0; +} + +static void +select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) +{ + static int z = 1<<15; /* 32768: something big value */ + + int v1, v2; + + if (alt->value == 0) return ; + if (now->value == 0) { + copy_opt_map_info(now, alt); + return ; + } + + v1 = z / now->value; + v2 = z / alt->value; + if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + copy_opt_map_info(now, alt); +} + +static int +comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) +{ +#define COMP_EM_BASE 20 + int ve, vm; + + if (m->value <= 0) return -1; + + ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + vm = COMP_EM_BASE * 5 * 2 / m->value; + return comp_distance_value(&e->mmd, &m->mmd, ve, vm); +} + +static void +alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) +{ + int i, val; + + /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ + if (to->value == 0) return ; + if (add->value == 0 || to->mmd.max < add->mmd.min) { + clear_opt_map_info(to); + return ; + } + + alt_merge_mml(&to->mmd, &add->mmd); + + val = 0; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (add->map[i]) + to->map[i] = 1; + + if (to->map[i]) + val += map_position_value(enc, i); + } + to->value = val; + + alt_merge_opt_anc_info(&to->anc, &add->anc); +} + +static void +set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) +{ + copy_mml(&(opt->exb.mmd), mmd); + copy_mml(&(opt->expr.mmd), mmd); + copy_mml(&(opt->map.mmd), mmd); +} + +static void +clear_node_opt_info(NodeOptInfo* opt) +{ + clear_mml(&opt->len); + clear_opt_anc_info(&opt->anc); + clear_opt_exact_info(&opt->exb); + clear_opt_exact_info(&opt->exm); + clear_opt_exact_info(&opt->expr); + clear_opt_map_info(&opt->map); +} + +static void +copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) +{ + *to = *from; +} + +static void +concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) +{ + int exb_reach, exm_reach; + OptAncInfo tanc; + + concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); + copy_opt_anc_info(&to->anc, &tanc); + + if (add->exb.len > 0 && to->len.max == 0) { + concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, + to->len.max, add->len.max); + copy_opt_anc_info(&add->exb.anc, &tanc); + } + + if (add->map.value > 0 && to->len.max == 0) { + if (add->map.mmd.max == 0) + add->map.anc.left_anchor |= to->anc.left_anchor; + } + + exb_reach = to->exb.reach_end; + exm_reach = to->exm.reach_end; + + if (add->len.max != 0) + to->exb.reach_end = to->exm.reach_end = 0; + + if (add->exb.len > 0) { + if (exb_reach) { + concat_opt_exact_info(&to->exb, &add->exb, enc); + clear_opt_exact_info(&add->exb); + } + else if (exm_reach) { + concat_opt_exact_info(&to->exm, &add->exb, enc); + clear_opt_exact_info(&add->exb); + } + } + select_opt_exact_info(enc, &to->exm, &add->exb); + select_opt_exact_info(enc, &to->exm, &add->exm); + + if (to->expr.len > 0) { + if (add->len.max > 0) { + if (to->expr.len > (int )add->len.max) + to->expr.len = add->len.max; + + if (to->expr.mmd.max == 0) + select_opt_exact_info(enc, &to->exb, &to->expr); + else + select_opt_exact_info(enc, &to->exm, &to->expr); + } + } + else if (add->expr.len > 0) { + copy_opt_exact_info(&to->expr, &add->expr); + } + + select_opt_map_info(&to->map, &add->map); + + add_mml(&to->len, &add->len); +} + +static void +alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) +{ + alt_merge_opt_anc_info (&to->anc, &add->anc); + alt_merge_opt_exact_info(&to->exb, &add->exb, env); + alt_merge_opt_exact_info(&to->exm, &add->exm, env); + alt_merge_opt_exact_info(&to->expr, &add->expr, env); + alt_merge_opt_map_info(env->enc, &to->map, &add->map); + + alt_merge_mml(&to->len, &add->len); +} + + +#define MAX_NODE_OPT_INFO_REF_COUNT 5 + +static int +optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) +{ + int type; + int r = 0; + + clear_node_opt_info(opt); + set_bound_node_opt_info(opt, &env->mmd); + + type = NTYPE(node); + switch (type) { + case NT_LIST: + { + OptEnv nenv; + NodeOptInfo nopt; + Node* nd = node; + + copy_opt_env(&nenv, env); + do { + r = optimize_node_left(NCAR(nd), &nopt, &nenv); + if (r == 0) { + add_mml(&nenv.mmd, &nopt.len); + concat_left_node_opt_info(env->enc, opt, &nopt); + } + } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); + } + break; + + case NT_ALT: + { + NodeOptInfo nopt; + Node* nd = node; + + do { + r = optimize_node_left(NCAR(nd), &nopt, env); + if (r == 0) { + if (nd == node) copy_node_opt_info(opt, &nopt); + else alt_merge_node_opt_info(opt, &nopt, env); + } + } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); + } + break; + + case NT_STR: + { + StrNode* sn = NSTR(node); + int slen = sn->end - sn->s; + int is_raw = NSTRING_IS_RAW(node); + + if (! NSTRING_IS_AMBIG(node)) { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + NSTRING_IS_RAW(node), env->enc); + if (slen > 0) { + add_char_opt_map_info(&opt->map, *(sn->s), env->enc); + } + set_mml(&opt->len, slen, slen); + } + else { + int max; + + if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { + int n = onigenc_strlen(env->enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; + } + else { + concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, + is_raw, env->enc); + opt->exb.ignore_case = 1; + + if (slen > 0) { + r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, + env->enc, env->case_fold_flag); + if (r != 0) break; + } + + max = slen; + } + + set_mml(&opt->len, slen, max); + } + + if (opt->exb.len == slen) + opt->exb.reach_end = 1; + } + break; + + case NT_CCLASS: + { + int i, z; + CClassNode* cc = NCCLASS(node); + + /* no need to check ignore case. (setted in setup_tree()) */ + + if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + set_mml(&opt->len, min, max); + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + z = BITSET_AT(cc->bs, i); + if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + set_mml(&opt->len, 1, 1); + } + } + break; + + case NT_CTYPE: + { + int i, min, max; + + max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + + if (max == 1) { + min = 1; + + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + else { + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (ONIGENC_IS_CODE_WORD(env->enc, i)) { + add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + } + } + } + break; + } + } + else { + min = ONIGENC_MBC_MINLEN(env->enc); + } + set_mml(&opt->len, min, max); + } + break; + + case NT_CANY: + { + OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); + OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + set_mml(&opt->len, min, max); + } + break; + + case NT_ANCHOR: + switch (NANCHOR(node)->type) { + case ANCHOR_BEGIN_BUF: + case ANCHOR_BEGIN_POSITION: + case ANCHOR_BEGIN_LINE: + case ANCHOR_END_BUF: + case ANCHOR_SEMI_END_BUF: + case ANCHOR_END_LINE: + add_opt_anc_info(&opt->anc, NANCHOR(node)->type); + break; + + case ANCHOR_PREC_READ: + { + NodeOptInfo nopt; + + r = optimize_node_left(NANCHOR(node)->target, &nopt, env); + if (r == 0) { + if (nopt.exb.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exb); + else if (nopt.exm.len > 0) + copy_opt_exact_info(&opt->expr, &nopt.exm); + + opt->expr.reach_end = 0; + + if (nopt.map.value > 0) + copy_opt_map_info(&opt->map, &nopt.map); + } + } + break; + + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ + case ANCHOR_LOOK_BEHIND_NOT: + break; + } + break; + + case NT_BREF: + { + int i; + int* backs; + OnigDistance min, max, tmin, tmax; + Node** nodes = SCANENV_MEM_NODES(env->scan_env); + BRefNode* br = NBREF(node); + + if (br->state & NST_RECURSION) { + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + break; + } + backs = BACKREFS_P(br); + r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); + if (r != 0) break; + for (i = 1; i < br->back_num; i++) { + r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); + if (r != 0) break; + r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); + if (r != 0) break; + if (min > tmin) min = tmin; + if (max < tmax) max = tmax; + } + if (r == 0) set_mml(&opt->len, min, max); + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + if (IS_CALL_RECURSION(NCALL(node))) + set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + else { + OnigOptionType save = env->options; + env->options = NENCLOSE(NCALL(node)->target)->option; + r = optimize_node_left(NCALL(node)->target, opt, env); + env->options = save; + } + break; +#endif + + case NT_QTFR: + { + int i; + OnigDistance min, max; + NodeOptInfo nopt; + QtfrNode* qn = NQTFR(node); + + r = optimize_node_left(qn->target, &nopt, env); + if (r) break; + + if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { + if (env->mmd.max == 0 && + NTYPE(qn->target) == NT_CANY && qn->greedy) { + if (IS_MULTILINE(env->options)) + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); + else + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); + } + } + else { + if (qn->lower > 0) { + copy_node_opt_info(opt, &nopt); + if (nopt.exb.len > 0) { + if (nopt.exb.reach_end) { + for (i = 2; i <= qn->lower && + ! is_full_opt_exact_info(&opt->exb); i++) { + concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); + } + if (i < qn->lower) { + opt->exb.reach_end = 0; + } + } + } + + if (qn->lower != qn->upper) { + opt->exb.reach_end = 0; + opt->exm.reach_end = 0; + } + if (qn->lower > 1) + opt->exm.reach_end = 0; + } + } + + min = distance_multiply(nopt.len.min, qn->lower); + if (IS_REPEAT_INFINITE(qn->upper)) + max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); + else + max = distance_multiply(nopt.len.max, qn->upper); + + set_mml(&opt->len, min, max); + } + break; + + case NT_ENCLOSE: + { + EncloseNode* en = NENCLOSE(node); + + switch (en->type) { + case ENCLOSE_OPTION: + { + OnigOptionType save = env->options; + + env->options = en->option; + r = optimize_node_left(en->target, opt, env); + env->options = save; + } + break; + + case ENCLOSE_MEMORY: +#ifdef USE_SUBEXP_CALL + en->opt_count++; + if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { + OnigDistance min, max; + + min = 0; + max = ONIG_INFINITE_DISTANCE; + if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; + if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; + set_mml(&opt->len, min, max); + } + else +#endif + { + r = optimize_node_left(en->target, opt, env); + + if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { + if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) + remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); + } + } + break; + + case ENCLOSE_STOP_BACKTRACK: + r = optimize_node_left(en->target, opt, env); + break; + } + } + break; + + default: +#ifdef ONIG_DEBUG + fprintf(stderr, "optimize_node_left: undefined node type %d\n", + NTYPE(node)); +#endif + r = ONIGERR_TYPE_BUG; + break; + } + + return r; +} + +static int +set_optimize_exact_info(regex_t* reg, OptExactInfo* e) +{ + int r; + + if (e->len == 0) return 0; + + if (e->ignore_case) { + reg->exact = (UChar* )xmalloc(e->len); + CHECK_NULL_RETURN_MEMERR(reg->exact); + xmemcpy(reg->exact, e->s, e->len); + reg->exact_end = reg->exact + e->len; + reg->optimize = ONIG_OPTIMIZE_EXACT_IC; + } + else { + int allow_reverse; + + reg->exact = str_dup(e->s, e->s + e->len); + CHECK_NULL_RETURN_MEMERR(reg->exact); + reg->exact_end = reg->exact + e->len; + + allow_reverse = + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); + + if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { + r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, + reg->map, &(reg->int_map)); + if (r) return r; + + reg->optimize = (allow_reverse != 0 + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + } + else { + reg->optimize = ONIG_OPTIMIZE_EXACT; + } + } + + reg->dmin = e->mmd.min; + reg->dmax = e->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); + } + + return 0; +} + +static void +set_optimize_map_info(regex_t* reg, OptMapInfo* m) +{ + int i; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + reg->map[i] = m->map[i]; + + reg->optimize = ONIG_OPTIMIZE_MAP; + reg->dmin = m->mmd.min; + reg->dmax = m->mmd.max; + + if (reg->dmin != ONIG_INFINITE_DISTANCE) { + reg->threshold_len = reg->dmin + 1; + } +} + +static void +set_sub_anchor(regex_t* reg, OptAncInfo* anc) +{ + reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; +} + +#ifdef ONIG_DEBUG +static void print_optimize_info(FILE* f, regex_t* reg); +#endif + +static int +set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) +{ + + int r; + NodeOptInfo opt; + OptEnv env; + + env.enc = reg->enc; + env.options = reg->options; + env.case_fold_flag = reg->case_fold_flag; + env.scan_env = scan_env; + clear_mml(&env.mmd); + + r = optimize_node_left(node, &opt, &env); + if (r) return r; + + reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); + + if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { + reg->anchor_dmin = opt.len.min; + reg->anchor_dmax = opt.len.max; + } + + if (opt.exb.len > 0 || opt.exm.len > 0) { + select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); + if (opt.map.value > 0 && + comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + goto set_map; + } + else { + r = set_optimize_exact_info(reg, &opt.exb); + set_sub_anchor(reg, &opt.exb.anc); + } + } + else if (opt.map.value > 0) { + set_map: + set_optimize_map_info(reg, &opt.map); + set_sub_anchor(reg, &opt.map.anc); + } + else { + reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; + if (opt.len.max == 0) + reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; + } + +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) + print_optimize_info(stderr, reg); +#endif + return r; +} + +static void +clear_optimize_info(regex_t* reg) +{ + reg->optimize = ONIG_OPTIMIZE_NONE; + reg->anchor = 0; + reg->anchor_dmin = 0; + reg->anchor_dmax = 0; + reg->sub_anchor = 0; + reg->exact_end = (UChar* )NULL; + reg->threshold_len = 0; + if (IS_NOT_NULL(reg->exact)) { + xfree(reg->exact); + reg->exact = (UChar* )NULL; + } +} + +#ifdef ONIG_DEBUG + +static void print_enc_string(FILE* fp, OnigEncoding enc, + const UChar *s, const UChar *end) +{ + fprintf(fp, "\nPATTERN: /"); + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + const UChar *p; + OnigCodePoint code; + + p = s; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + fprintf(fp, " 0x%04x ", (int )code); + } + else { + fputc((int )code, fp); + } + + p += enclen(enc, p); + } + } + else { + while (s < end) { + fputc((int )*s, fp); + s++; + } + } + + fprintf(fp, "/\n"); +} + +static void +print_distance_range(FILE* f, OnigDistance a, OnigDistance b) +{ + if (a == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", a); + + fputs("-", f); + + if (b == ONIG_INFINITE_DISTANCE) + fputs("inf", f); + else + fprintf(f, "(%u)", b); +} + +static void +print_anchor(FILE* f, int anchor) +{ + int q = 0; + + fprintf(f, "["); + + if (anchor & ANCHOR_BEGIN_BUF) { + fprintf(f, "begin-buf"); + q = 1; + } + if (anchor & ANCHOR_BEGIN_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-line"); + } + if (anchor & ANCHOR_BEGIN_POSITION) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "begin-pos"); + } + if (anchor & ANCHOR_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-buf"); + } + if (anchor & ANCHOR_SEMI_END_BUF) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "semi-end-buf"); + } + if (anchor & ANCHOR_END_LINE) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "end-line"); + } + if (anchor & ANCHOR_ANYCHAR_STAR) { + if (q) fprintf(f, ", "); + q = 1; + fprintf(f, "anychar-star"); + } + if (anchor & ANCHOR_ANYCHAR_STAR_ML) { + if (q) fprintf(f, ", "); + fprintf(f, "anychar-star-pl"); + } + + fprintf(f, "]"); +} + +static void +print_optimize_info(FILE* f, regex_t* reg) +{ + static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", + "EXACT_IC", "MAP" }; + + fprintf(f, "optimize: %s\n", on[reg->optimize]); + fprintf(f, " anchor: "); print_anchor(f, reg->anchor); + if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) + print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); + fprintf(f, "\n"); + + if (reg->optimize) { + fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); + fprintf(f, "\n"); + } + fprintf(f, "\n"); + + if (reg->exact) { + UChar *p; + fprintf(f, "exact: ["); + for (p = reg->exact; p < reg->exact_end; p++) { + fputc(*p, f); + } + fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); + } + else if (reg->optimize & ONIG_OPTIMIZE_MAP) { + int c, i, n = 0; + + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + if (reg->map[i]) n++; + + fprintf(f, "map: n=%d\n", n); + if (n > 0) { + c = 0; + fputc('[', f); + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { + if (reg->map[i] != 0) { + if (c > 0) fputs(", ", f); + c++; + if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && + ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) + fputc(i, f); + else + fprintf(f, "%d", i); + } + } + fprintf(f, "]\n"); + } + } +} +#endif /* ONIG_DEBUG */ + + +extern void +onig_free_body(regex_t* reg) +{ + if (IS_NOT_NULL(reg)) { + if (IS_NOT_NULL(reg->p)) xfree(reg->p); + if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); + if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); + if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); + if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); + if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + +#ifdef USE_NAMED_GROUP + onig_names_free(reg); +#endif + } +} + +extern void +onig_free(regex_t* reg) +{ + if (IS_NOT_NULL(reg)) { + onig_free_body(reg); + xfree(reg); + } +} + +#define REGEX_TRANSFER(to,from) do {\ + (to)->state = ONIG_STATE_MODIFY;\ + onig_free_body(to);\ + xmemcpy(to, from, sizeof(regex_t));\ + xfree(from);\ +} while (0) + +extern void +onig_transfer(regex_t* to, regex_t* from) +{ + THREAD_ATOMIC_START; + REGEX_TRANSFER(to, from); + THREAD_ATOMIC_END; +} + +#define REGEX_CHAIN_HEAD(reg) do {\ + while (IS_NOT_NULL((reg)->chain)) {\ + (reg) = (reg)->chain;\ + }\ +} while (0) + +extern void +onig_chain_link_add(regex_t* to, regex_t* add) +{ + THREAD_ATOMIC_START; + REGEX_CHAIN_HEAD(to); + to->chain = add; + THREAD_ATOMIC_END; +} + +extern void +onig_chain_reduce(regex_t* reg) +{ + regex_t *head, *prev; + + prev = reg; + head = prev->chain; + if (IS_NOT_NULL(head)) { + reg->state = ONIG_STATE_MODIFY; + while (IS_NOT_NULL(head->chain)) { + prev = head; + head = head->chain; + } + prev->chain = (regex_t* )NULL; + REGEX_TRANSFER(reg, head); + } +} + +#ifdef ONIG_DEBUG +static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); +#endif +#ifdef ONIG_DEBUG_PARSE_TREE +static void print_tree P_((FILE* f, Node* node)); +#endif + +extern int +onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigErrorInfo* einfo) +{ +#define COMPILE_INIT_SIZE 20 + + int r, init_size; + Node* root; + ScanEnv scan_env; +#ifdef USE_SUBEXP_CALL + UnsetAddrList uslist; +#endif + + if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + + reg->state = ONIG_STATE_COMPILING; + +#ifdef ONIG_DEBUG + print_enc_string(stderr, reg->enc, pattern, pattern_end); +#endif + + if (reg->alloc == 0) { + init_size = (pattern_end - pattern) * 2; + if (init_size <= 0) init_size = COMPILE_INIT_SIZE; + r = BBUF_INIT(reg, init_size); + if (r != 0) goto end; + } + else + reg->used = 0; + + reg->num_mem = 0; + reg->num_repeat = 0; + reg->num_null_check = 0; + reg->repeat_range_alloc = 0; + reg->repeat_range = (OnigRepeatRange* )NULL; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + reg->num_comb_exp_check = 0; +#endif + + r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); + if (r != 0) goto err; + +#ifdef USE_NAMED_GROUP + /* mixed use named group and no-named group */ + if (scan_env.num_named > 0 && + IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + if (scan_env.num_named != scan_env.num_mem) + r = disable_noname_group_capture(&root, reg, &scan_env); + else + r = numbered_ref_check(root); + + if (r != 0) goto err; + } +#endif + +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_init(&uslist, scan_env.num_call); + if (r != 0) goto err; + scan_env.unset_addr_list = &uslist; + r = setup_subexp_call(root, &scan_env); + if (r != 0) goto err_unset; + r = subexp_recursive_check_trav(root, &scan_env); + if (r < 0) goto err_unset; + r = subexp_inf_recursive_check_trav(root, &scan_env); + if (r != 0) goto err_unset; + + reg->num_call = scan_env.num_call; + } + else + reg->num_call = 0; +#endif + + r = setup_tree(root, reg, 0, &scan_env); + if (r != 0) goto err_unset; + +#ifdef ONIG_DEBUG_PARSE_TREE + print_tree(stderr, root); +#endif + + reg->capture_history = scan_env.capture_history; + reg->bt_mem_start = scan_env.bt_mem_start; + reg->bt_mem_start |= reg->capture_history; + if (IS_FIND_CONDITION(reg->options)) + BIT_STATUS_ON_ALL(reg->bt_mem_end); + else { + reg->bt_mem_end = scan_env.bt_mem_end; + reg->bt_mem_end |= reg->capture_history; + } + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (scan_env.backrefed_mem == 0 +#ifdef USE_SUBEXP_CALL + || scan_env.num_call == 0 +#endif + ) { + setup_comb_exp_check(root, 0, &scan_env); +#ifdef USE_SUBEXP_CALL + if (scan_env.has_recursion != 0) { + scan_env.num_comb_exp_check = 0; + } + else +#endif + if (scan_env.comb_exp_max_regnum > 0) { + int i; + for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { + if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { + scan_env.num_comb_exp_check = 0; + break; + } + } + } + } + + reg->num_comb_exp_check = scan_env.num_comb_exp_check; +#endif + + clear_optimize_info(reg); +#ifndef ONIG_DONT_OPTIMIZE + r = set_optimize_info_from_tree(root, reg, &scan_env); + if (r != 0) goto err_unset; +#endif + + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { + xfree(scan_env.mem_nodes_dynamic); + scan_env.mem_nodes_dynamic = (Node** )NULL; + } + + r = compile_tree(root, reg); + if (r == 0) { + r = add_opcode(reg, OP_END); +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + r = unset_addr_list_fix(&uslist, reg); + unset_addr_list_end(&uslist); + if (r) goto err; + } +#endif + + if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) + reg->stack_pop_level = STACK_POP_LEVEL_ALL; + else { + if (reg->bt_mem_start != 0) + reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; + else + reg->stack_pop_level = STACK_POP_LEVEL_FREE; + } + } +#ifdef USE_SUBEXP_CALL + else if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + onig_node_free(root); + +#ifdef ONIG_DEBUG_COMPILE +#ifdef USE_NAMED_GROUP + onig_print_names(stderr, reg); +#endif + print_compiled_byte_code_list(stderr, reg); +#endif + + end: + reg->state = ONIG_STATE_NORMAL; + return r; + + err_unset: +#ifdef USE_SUBEXP_CALL + if (scan_env.num_call > 0) { + unset_addr_list_end(&uslist); + } +#endif + err: + if (IS_NOT_NULL(scan_env.error)) { + if (IS_NOT_NULL(einfo)) { + einfo->enc = scan_env.enc; + einfo->par = scan_env.error; + einfo->par_end = scan_env.error_end; + } + } + + onig_node_free(root); + if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) + xfree(scan_env.mem_nodes_dynamic); + return r; +} + +#ifdef USE_RECOMPILE_API +extern int +onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + regex_t *new_reg; + + r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); + if (r) return r; + if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_transfer(reg, new_reg); + } + else { + onig_chain_link_add(reg, new_reg); + } + return 0; +} +#endif + +static int onig_inited = 0; + +extern int +onig_reg_init(regex_t* reg, OnigOptionType option, + OnigCaseFoldType case_fold_flag, + OnigEncoding enc, OnigSyntaxType* syntax) +{ + if (! onig_inited) + onig_init(); + + if (IS_NULL(reg)) + return ONIGERR_INVALID_ARGUMENT; + + if (ONIGENC_IS_UNDEF(enc)) + return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; + + if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) + == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; + } + + (reg)->state = ONIG_STATE_MODIFY; + + if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { + option |= syntax->options; + option &= ~ONIG_OPTION_SINGLELINE; + } + else + option |= syntax->options; + + (reg)->enc = enc; + (reg)->options = option; + (reg)->syntax = syntax; + (reg)->optimize = 0; + (reg)->exact = (UChar* )NULL; + (reg)->int_map = (int* )NULL; + (reg)->int_map_backward = (int* )NULL; + (reg)->chain = (regex_t* )NULL; + + (reg)->p = (UChar* )NULL; + (reg)->alloc = 0; + (reg)->used = 0; + (reg)->name_table = (void* )NULL; + + (reg)->case_fold_flag = case_fold_flag; + return 0; +} + +extern int +onig_new_without_alloc(regex_t* reg, const UChar* pattern, + const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, + OnigSyntaxType* syntax, OnigErrorInfo* einfo) +{ + int r; + + r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r) return r; + + r = onig_compile(reg, pattern, pattern_end, einfo); + return r; +} + +extern int +onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) +{ + int r; + + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) return ONIGERR_MEMORY; + + r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); + if (r) goto err; + + r = onig_compile(*reg, pattern, pattern_end, einfo); + if (r) { + err: + onig_free(*reg); + *reg = NULL; + } + return r; +} + + +extern int +onig_init(void) +{ + if (onig_inited != 0) + return 0; + + THREAD_SYSTEM_INIT; + THREAD_ATOMIC_START; + + onig_inited = 1; + + onigenc_init(); + /* onigenc_set_default_caseconv_table((UChar* )0); */ + +#ifdef ONIG_DEBUG_STATISTICS + onig_statistics_init(); +#endif + + THREAD_ATOMIC_END; + return 0; +} + + +extern int +onig_end(void) +{ + THREAD_ATOMIC_START; + +#ifdef ONIG_DEBUG_STATISTICS + onig_print_statistics(stderr); +#endif + +#ifdef USE_SHARED_CCLASS_TABLE + onig_free_shared_cclass_table(); +#endif + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + onig_free_node_list(); +#endif + + onig_inited = 0; + + THREAD_ATOMIC_END; + THREAD_SYSTEM_END; + return 0; +} + +extern int +onig_is_in_code_range(const UChar* p, OnigCodePoint code) +{ + OnigCodePoint n, *data; + OnigCodePoint low, high, x; + + GET_CODE_POINT(n, p); + data = (OnigCodePoint* )p; + data++; + + for (low = 0, high = n; low < high; ) { + x = (low + high) >> 1; + if (code > data[x * 2 + 1]) + low = x + 1; + else + high = x; + } + + return ((low < n && code >= data[low * 2]) ? 1 : 0); +} + +extern int +onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) +{ + int found; + + if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } + } + else { + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); + } + + if (IS_NCCLASS_NOT(cc)) + return !found; + else + return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int len; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + len = 2; + } + else { + len = ONIGENC_CODE_TO_MBCLEN(enc, code); + } + return onig_is_code_in_cc_len(len, code, cc); +} + + +#ifdef ONIG_DEBUG + +/* arguments type */ +#define ARG_SPECIAL -1 +#define ARG_NON 0 +#define ARG_RELADDR 1 +#define ARG_ABSADDR 2 +#define ARG_LENGTH 3 +#define ARG_MEMNUM 4 +#define ARG_OPTION 5 +#define ARG_STATE_CHECK 6 + +OnigOpInfoType OnigOpInfo[] = { + { OP_FINISH, "finish", ARG_NON }, + { OP_END, "end", ARG_NON }, + { OP_EXACT1, "exact1", ARG_SPECIAL }, + { OP_EXACT2, "exact2", ARG_SPECIAL }, + { OP_EXACT3, "exact3", ARG_SPECIAL }, + { OP_EXACT4, "exact4", ARG_SPECIAL }, + { OP_EXACT5, "exact5", ARG_SPECIAL }, + { OP_EXACTN, "exactn", ARG_SPECIAL }, + { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, + { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, + { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, + { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, + { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, + { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, + { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, + { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, + { OP_CCLASS, "cclass", ARG_SPECIAL }, + { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, + { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, + { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, + { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, + { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, + { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, + { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, + { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_NOT_WORD, "not-word", ARG_NON }, + { OP_WORD_BOUND, "word-bound", ARG_NON }, + { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, + { OP_WORD_BEGIN, "word-begin", ARG_NON }, + { OP_WORD_END, "word-end", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREFN, "backrefn", ARG_MEMNUM }, + { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_POP, "pop", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, + { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, + { OP_PUSH_POS, "push-pos", ARG_NON }, + { OP_POP_POS, "pop-pos", ARG_NON }, + { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, + { OP_FAIL_POS, "fail-pos", ARG_NON }, + { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, + { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, + { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, + { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, + { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_ML_STAR, + "state-check-anychar-ml*", ARG_STATE_CHECK }, + { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].name; + } + return ""; +} + +static int +op2arg_type(int opcode) +{ + int i; + + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + if (opcode == OnigOpInfo[i].opcode) + return OnigOpInfo[i].arg_type; + } + return ARG_SPECIAL; +} + +static void +Indent(FILE* f, int indent) +{ + int i; + for (i = 0; i < indent; i++) putc(' ', f); +} + +static void +p_string(FILE* f, int len, UChar* s) +{ + fputs(":", f); + while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ + int x = len * mb_len; + + fprintf(f, ":%d:", len); + while (x-- > 0) { fputc(*s++, f); } +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, + OnigEncoding enc) +{ + int i, n, arg_type; + RelAddrType addr; + LengthType len; + MemNumType mem; + StateCheckNumType scn; + OnigCodePoint code; + UChar *q; + + fprintf(f, "[%s", op2name(*bp)); + arg_type = op2arg_type(*bp); + if (arg_type != ARG_SPECIAL) { + bp++; + switch (arg_type) { + case ARG_NON: + break; + case ARG_RELADDR: + GET_RELADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_ABSADDR: + GET_ABSADDR_INC(addr, bp); + fprintf(f, ":(%d)", addr); + break; + case ARG_LENGTH: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + case ARG_MEMNUM: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + case ARG_OPTION: + { + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); + } + break; + + case ARG_STATE_CHECK: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + fprintf(f, ":%d", scn); + break; + } + } + else { + switch (*bp++) { + case OP_EXACT1: + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p_string(f, 1, bp++); break; + case OP_EXACT2: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACT3: + p_string(f, 3, bp); bp += 3; break; + case OP_EXACT4: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACT5: + p_string(f, 5, bp); bp += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_EXACTMB2N1: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACTMB2N2: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACTMB2N3: + p_string(f, 6, bp); bp += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 2, bp); + bp += len * 2; + break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 3, bp); + bp += len * 3; + break; + case OP_EXACTMBN: + { + int mb_len; + + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } + } + break; + + case OP_EXACT1_IC: + len = enclen(enc, bp); + p_string(f, len, bp); + bp += len; + break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_CCLASS: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d", (int )code, len); + break; + + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d:%d", n, (int )code, len); + break; + + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%u:%d", (unsigned int )cc, n); + } + break; + + case OP_BACKREFN_IC: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_MULTI: + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + break; + + case OP_BACKREF_WITH_LEVEL: + { + OnigOptionType option; + LengthType level; + + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + + case OP_REPEAT: + case OP_REPEAT_NG: + { + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); + } + break; + + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":(%d)", addr); + p_string(f, 1, bp); + bp += 1; + break; + + case OP_LOOK_BEHIND: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + + case OP_PUSH_LOOK_BEHIND_NOT: + GET_RELADDR_INC(addr, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:(%d)", len, addr); + break; + + case OP_STATE_CHECK_PUSH: + case OP_STATE_CHECK_PUSH_OR_JUMP: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:(%d)", scn, addr); + break; + + default: + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", + *--bp); + } + } + fputs("]", f); + if (nextp) *nextp = bp; +} + +static void +print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ + int ncode; + UChar* bp = reg->p; + UChar* end = reg->p + reg->used; + + fprintf(f, "code length: %d\n", reg->used); + + ncode = 0; + while (bp < end) { + ncode++; + if (bp > reg->p) { + if (ncode % 5 == 0) + fprintf(f, "\n"); + else + fputs(" ", f); + } + onig_print_compiled_byte_code(f, bp, &bp, reg->enc); + } + + fprintf(f, "\n"); +} + +static void +print_indent_tree(FILE* f, Node* node, int indent) +{ + int i, type; + int add = 3; + UChar* p; + + Indent(f, indent); + if (IS_NULL(node)) { + fprintf(f, "ERROR: null node!!!\n"); + exit (0); + } + + type = NTYPE(node); + switch (type) { + case NT_LIST: + case NT_ALT: + if (NTYPE(node) == NT_LIST) + fprintf(f, "\n", (int )node); + else + fprintf(f, "\n", (int )node); + + print_indent_tree(f, NCAR(node), indent + add); + while (IS_NOT_NULL(node = NCDR(node))) { + if (NTYPE(node) != type) { + fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); + exit(0); + } + print_indent_tree(f, NCAR(node), indent + add); + } + break; + + case NT_STR: + fprintf(f, "", + (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); + for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { + if (*p >= 0x20 && *p < 0x7f) + fputc(*p, f); + else { + fprintf(f, " 0x%02x", *p); + } + } + break; + + case NT_CCLASS: + fprintf(f, "", (int )node); + if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); + if (NCCLASS(node)->mbuf) { + BBuf* bbuf = NCCLASS(node)->mbuf; + for (i = 0; i < bbuf->used; i++) { + if (i > 0) fprintf(f, ","); + fprintf(f, "%0x", bbuf->p[i]); + } + } + break; + + case NT_CTYPE: + fprintf(f, " ", (int )node); + switch (NCTYPE(node)->ctype) { + case ONIGENC_CTYPE_WORD: + if (NCTYPE(node)->not != 0) + fputs("not word", f); + else + fputs("word", f); + break; + + default: + fprintf(f, "ERROR: undefined ctype.\n"); + exit(0); + } + break; + + case NT_CANY: + fprintf(f, "", (int )node); + break; + + case NT_ANCHOR: + fprintf(f, " ", (int )node); + switch (NANCHOR(node)->type) { + case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; + case ANCHOR_END_BUF: fputs("end buf", f); break; + case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; + case ANCHOR_END_LINE: fputs("end line", f); break; + case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; + case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; + + case ANCHOR_WORD_BOUND: fputs("word bound", f); break; + case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; +#ifdef USE_WORD_BEGIN_END + case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; + case ANCHOR_WORD_END: fputs("word end", f); break; +#endif + case ANCHOR_PREC_READ: fputs("prec read", f); break; + case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break; + case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break; + case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; + + default: + fprintf(f, "ERROR: undefined anchor type.\n"); + break; + } + break; + + case NT_BREF: + { + int* p; + BRefNode* br = NBREF(node); + p = BACKREFS_P(br); + fprintf(f, "", (int )node); + for (i = 0; i < br->back_num; i++) { + if (i > 0) fputs(", ", f); + fprintf(f, "%d", p[i]); + } + } + break; + +#ifdef USE_SUBEXP_CALL + case NT_CALL: + { + CallNode* cn = NCALL(node); + fprintf(f, "", (int )node); + p_string(f, cn->name_end - cn->name, cn->name); + } + break; +#endif + + case NT_QTFR: + fprintf(f, "{%d,%d}%s\n", (int )node, + NQTFR(node)->lower, NQTFR(node)->upper, + (NQTFR(node)->greedy ? "" : "?")); + print_indent_tree(f, NQTFR(node)->target, indent + add); + break; + + case NT_ENCLOSE: + fprintf(f, " ", (int )node); + switch (NENCLOSE(node)->type) { + case ENCLOSE_OPTION: + fprintf(f, "option:%d\n", NENCLOSE(node)->option); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); + break; + case ENCLOSE_MEMORY: + fprintf(f, "memory:%d", NENCLOSE(node)->regnum); + break; + case ENCLOSE_STOP_BACKTRACK: + fprintf(f, "stop-bt"); + break; + + default: + break; + } + fprintf(f, "\n"); + print_indent_tree(f, NENCLOSE(node)->target, indent + add); + break; + + default: + fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); + break; + } + + if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && + type != NT_ENCLOSE) + fprintf(f, "\n"); + fflush(f); +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_PARSE_TREE +static void +print_tree(FILE* f, Node* node) +{ + print_indent_tree(f, node, 0); +} +#endif diff --git a/oniguruma/regenc.c b/oniguruma/regenc.c new file mode 100644 index 0000000..8090350 --- /dev/null +++ b/oniguruma/regenc.c @@ -0,0 +1,902 @@ +/********************************************************************** + regenc.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; + +extern int +onigenc_init(void) +{ + return 0; +} + +extern OnigEncoding +onigenc_get_default_encoding(void) +{ + return OnigEncDefaultCharEncoding; +} + +extern int +onigenc_set_default_encoding(OnigEncoding enc) +{ + OnigEncDefaultCharEncoding = enc; + return 0; +} + +extern UChar* +onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + if (p < s) { + p += enclen(enc, p); + } + return p; +} + +extern UChar* +onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, + const UChar* start, const UChar* s, const UChar** prev) +{ + UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); + + if (p < s) { + if (prev) *prev = (const UChar* )p; + p += enclen(enc, p); + } + else { + if (prev) *prev = (const UChar* )NULL; /* Sorry */ + } + return p; +} + +extern UChar* +onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +{ + if (s <= start) + return (UChar* )NULL; + + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); +} + +extern UChar* +onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n) +{ + while (ONIG_IS_NOT_NULL(s) && n-- > 0) { + if (s <= start) + return (UChar* )NULL; + + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1); + } + return (UChar* )s; +} + +extern UChar* +onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) +{ + UChar* q = (UChar* )p; + while (n-- > 0) { + q += ONIGENC_MBC_ENC_LEN(enc, q); + } + return (q <= end ? q : NULL); +} + +extern int +onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int n = 0; + UChar* q = (UChar* )p; + + while (q < end) { + q += ONIGENC_MBC_ENC_LEN(enc, q); + n++; + } + return n; +} + +extern int +onigenc_strlen_null(OnigEncoding enc, const UChar* s) +{ + int n = 0; + UChar* p = (UChar* )s; + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return n; + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return n; + } + p += ONIGENC_MBC_ENC_LEN(enc, p); + n++; + } +} + +extern int +onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) +{ + UChar* start = (UChar* )s; + UChar* p = (UChar* )s; + + while (1) { + if (*p == '\0') { + UChar* q; + int len = ONIGENC_MBC_MINLEN(enc); + + if (len == 1) return (int )(p - start); + q = p + 1; + while (len > 1) { + if (*q != '\0') break; + q++; + len--; + } + if (len == 1) return (int )(p - start); + } + p += ONIGENC_MBC_ENC_LEN(enc, p); + } +} + +const UChar OnigEncAsciiToLowerCaseTable[] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; + +#ifdef USE_UPPER_CASE_TABLE +const UChar OnigEncAsciiToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', +}; +#endif + +const unsigned short OnigEncAsciiCtypeTable[256] = { + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, + 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, + 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, + 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, + 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, + 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, + 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +}; + +const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', + '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', + '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', + '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', + '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', + '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +#ifdef USE_UPPER_CASE_TABLE +const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { + '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', + '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', + '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', + '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', + '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', + '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', + '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', + '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', + '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', + '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', + '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', + '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', + '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', + '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', +}; +#endif + +extern void +onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) +{ + /* nothing */ + /* obsoleted. */ +} + +extern UChar* +onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) +{ + return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); +} + +const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { + { 0x41, 0x61 }, + { 0x42, 0x62 }, + { 0x43, 0x63 }, + { 0x44, 0x64 }, + { 0x45, 0x65 }, + { 0x46, 0x66 }, + { 0x47, 0x67 }, + { 0x48, 0x68 }, + { 0x49, 0x69 }, + { 0x4a, 0x6a }, + { 0x4b, 0x6b }, + { 0x4c, 0x6c }, + { 0x4d, 0x6d }, + { 0x4e, 0x6e }, + { 0x4f, 0x6f }, + { 0x50, 0x70 }, + { 0x51, 0x71 }, + { 0x52, 0x72 }, + { 0x53, 0x73 }, + { 0x54, 0x74 }, + { 0x55, 0x75 }, + { 0x56, 0x76 }, + { 0x57, 0x77 }, + { 0x58, 0x78 }, + { 0x59, 0x79 }, + { 0x5a, 0x7a } +}; + +extern int +onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + OnigCodePoint code; + int i, r; + + for (i = 0; + i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); + i++) { + code = OnigAsciiLowerMap[i].to; + r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); + if (r != 0) return r; + + code = OnigAsciiLowerMap[i].from; + r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); + if (r != 0) return r; + } + + return 0; +} + +extern int +onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end ARG_UNUSED, + OnigCaseFoldCodeItem items[]) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + return 1; + } + else + return 0; +} + +static int +ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + static OnigCodePoint ss[] = { 0x73, 0x73 }; + + return (*f)((OnigCodePoint )0xdf, ss, 2, arg); +} + +extern int +onigenc_apply_all_case_fold_with_map(int map_size, + const OnigPairCaseFoldCodes map[], + int ess_tsett_flag, OnigCaseFoldType flag, + OnigApplyAllCaseFoldFunc f, void* arg) +{ + OnigCodePoint code; + int i, r; + + r = onigenc_ascii_apply_all_case_fold(flag, f, arg); + if (r != 0) return r; + + for (i = 0; i < map_size; i++) { + code = map[i].to; + r = (*f)(map[i].from, &code, 1, arg); + if (r != 0) return r; + + code = map[i].from; + r = (*f)(map[i].to, &code, 1, arg); + if (r != 0) return r; + } + + if (ess_tsett_flag != 0) + return ss_apply_all_case_fold(flag, f, arg); + + return 0; +} + +extern int +onigenc_get_case_fold_codes_by_str_with_map(int map_size, + const OnigPairCaseFoldCodes map[], + int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, + const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) +{ + if (0x41 <= *p && *p <= 0x5a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p + 0x20); + if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == 0x53 || *(p+1) == 0x73)) { + /* SS */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (0x61 <= *p && *p <= 0x7a) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )(*p - 0x20); + if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == 0x73 || *(p+1) == 0x53)) { + /* ss */ + items[1].byte_len = 2; + items[1].code_len = 1; + items[1].code[0] = (OnigCodePoint )0xdf; + return 2; + } + else + return 1; + } + else if (*p == 0xdf && ess_tsett_flag != 0) { + items[0].byte_len = 1; + items[0].code_len = 2; + items[0].code[0] = (OnigCodePoint )'s'; + items[0].code[1] = (OnigCodePoint )'s'; + + items[1].byte_len = 1; + items[1].code_len = 2; + items[1].code[0] = (OnigCodePoint )'S'; + items[1].code[1] = (OnigCodePoint )'S'; + + items[2].byte_len = 1; + items[2].code_len = 2; + items[2].code[0] = (OnigCodePoint )'s'; + items[2].code[1] = (OnigCodePoint )'S'; + + items[3].byte_len = 1; + items[3].code_len = 2; + items[3].code[0] = (OnigCodePoint )'S'; + items[3].code[1] = (OnigCodePoint )'s'; + + return 4; + } + else { + int i; + + for (i = 0; i < map_size; i++) { + if (*p == map[i].from) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].to; + return 1; + } + else if (*p == map[i].to) { + items[0].byte_len = 1; + items[0].code_len = 1; + items[0].code[0] = map[i].from; + return 1; + } + } + } + + return 0; +} + + +extern int +onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, + OnigCodePoint* sb_out ARG_UNUSED, + const OnigCodePoint* ranges[] ARG_UNUSED) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) +{ + if (p < end) { + if (*p == 0x0a) return 1; + } + return 0; +} + +/* for single byte encodings */ +extern int +onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, + const UChar*end ARG_UNUSED, UChar* lower) +{ + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); + + (*p)++; + return 1; /* return byte length of converted char to lower */ +} + +#if 0 +extern int +onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + (*pp)++; + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); +} +#endif + +extern int +onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED) +{ + return 1; +} + +extern OnigCodePoint +onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) +{ + return (OnigCodePoint )(*p); +} + +extern int +onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED) +{ + return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE); +} + +extern int +onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) +{ + *buf = (UChar )(code & 0xff); + return 1; +} + +extern UChar* +onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, + const UChar* s) +{ + return (UChar* )s; +} + +extern int +onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, + const UChar* end ARG_UNUSED) +{ + return TRUE; +} + +extern int +onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, + const UChar* end ARG_UNUSED) +{ + return FALSE; +} + +extern OnigCodePoint +onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) +{ + int c, i, len; + OnigCodePoint n; + + len = enclen(enc, p); + n = (OnigCodePoint )(*p++); + if (len == 1) return n; + + for (i = 1; i < len; i++) { + if (p >= end) break; + c = *p++; + n <<= 8; n += c; + } + return n; +} + +extern int +onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower) +{ + int len; + const UChar *p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); + (*pp)++; + return 1; + } + else { + int i; + + len = enclen(enc, p); + for (i = 0; i < len; i++) { + *lower++ = *p++; + } + (*pp) += len; + return len; /* return byte length of converted to lower char */ + } +} + +#if 0 +extern int +onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, + const UChar** pp, const UChar* end) +{ + const UChar* p = *pp; + + if (ONIGENC_IS_MBC_ASCII(p)) { + (*pp)++; + return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); + } + + (*pp) += enclen(enc, p); + return FALSE; +} +#endif + +extern int +onigenc_mb2_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb4_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff000000) != 0) return 4; + else if ((code & 0xff0000) != 0) return 3; + else if ((code & 0xff00) != 0) return 2; + else return 1; +} + +extern int +onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff00) != 0) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enclen(enc, buf) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) +{ + UChar *p = buf; + + if ((code & 0xff000000) != 0) { + *p++ = (UChar )((code >> 24) & 0xff); + } + if ((code & 0xff0000) != 0 || p != buf) { + *p++ = (UChar )((code >> 16) & 0xff); + } + if ((code & 0xff00) != 0 || p != buf) { + *p++ = (UChar )((code >> 8) & 0xff); + } + *p++ = (UChar )(code & 0xff); + +#if 1 + if (enclen(enc, buf) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; +#endif + return p - buf; +} + +extern int +onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) +{ + static PosixBracketEntryType PBS[] = { + { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 }, + { (UChar* )NULL, -1, 0 } + }; + + PosixBracketEntryType *pb; + int len; + + len = onigenc_strlen(enc, p, end); + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (len == pb->len && + onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) + return pb->ctype; + } + + return ONIGERR_INVALID_CHAR_PROPERTY_NAME; +} + +extern int +onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +extern int +onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, + unsigned int ctype) +{ + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { + return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); + } + } + + return FALSE; +} + +extern int +onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, + const UChar* sascii /* ascii */, int n) +{ + int x, c; + + while (n-- > 0) { + if (p >= end) return (int )(*sascii); + + c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); + x = *sascii - c; + if (x) return x; + + sascii++; + p += enclen(enc, p); + } + return 0; +} + +/* Property management */ +static int +resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) +{ + int size; + const OnigCodePoint **list = *plist; + + size = sizeof(OnigCodePoint*) * new_size; + if (IS_NULL(list)) { + list = (const OnigCodePoint** )xmalloc(size); + } + else { + list = (const OnigCodePoint** )xrealloc((void* )list, size); + } + + if (IS_NULL(list)) return ONIGERR_MEMORY; + + *plist = list; + *psize = new_size; + + return 0; +} + +extern int +onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, + hash_table_type **table, const OnigCodePoint*** plist, int *pnum, + int *psize) +{ +#define PROP_INIT_SIZE 16 + + int r; + + if (*psize <= *pnum) { + int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); + r = resize_property_list(new_size, plist, psize); + if (r != 0) return r; + } + + (*plist)[*pnum] = prop; + + if (ONIG_IS_NULL(*table)) { + *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); + if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; + } + + *pnum = *pnum + 1; + onig_st_insert_strend(*table, name, name + strlen((char* )name), + (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); + return 0; +} + +extern int +onigenc_property_list_init(int (*f)(void)) +{ + int r; + + THREAD_ATOMIC_START; + + r = f(); + + THREAD_ATOMIC_END; + return r; +} diff --git a/oniguruma/regenc.h b/oniguruma/regenc.h new file mode 100644 index 0000000..4096328 --- /dev/null +++ b/oniguruma/regenc.h @@ -0,0 +1,189 @@ +#ifndef REGENC_H +#define REGENC_H +/********************************************************************** + regenc.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef PACKAGE +/* PACKAGE is defined in config.h */ +#include "config.h" +#endif + +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif + +#include "oniguruma.h" + +typedef struct { + OnigCodePoint from; + OnigCodePoint to; +} OnigPairCaseFoldCodes; + + +#ifndef NULL +#define NULL ((void* )0) +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef ARG_UNUSED +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif +#endif + +#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) +#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL +#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) + +#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) + +/* character types bit flag */ +#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE) +#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA) +#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK) +#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL) +#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT) +#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH) +#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER) +#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT) +#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT) +#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE) +#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER) +#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT) +#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD) +#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM) +#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII) + +#define CTYPE_TO_BIT(ctype) (1<<(ctype)) +#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \ + ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\ + (ctype) == ONIGENC_CTYPE_PRINT) + + +typedef struct { + UChar *name; + int ctype; + short int len; +} PosixBracketEntryType; + + +/* #define USE_CRNL_AS_LINE_TERMINATOR */ +#define USE_UNICODE_PROPERTIES +/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ + + +#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII + +/* for encoding system implementation (internal) */ +ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); +ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); + + +/* methods for single byte encoding */ +ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); +ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); +ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); +ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); +ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); +ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); + +/* methods for multi byte encoding */ +ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); +ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); +ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); +ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); + + +/* in enc/unicode.c */ +ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); +ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); +ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[])); +ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); +ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); + + +#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) +#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) + +#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ + OnigEncISO_8859_1_ToLowerCaseTable[c] +#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ + OnigEncISO_8859_1_ToUpperCaseTable[c] + +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; + +ONIG_EXTERN int +onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); +ONIG_EXTERN UChar* +onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); + +/* defined in regexec.c, but used in enc/xxx.c */ +extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); + +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; +ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; +ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; +ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; + +#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80) +#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] +#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] +#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ + ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) +#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ + (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\ + ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) + + +#endif /* REGENC_H */ diff --git a/oniguruma/regerror.c b/oniguruma/regerror.c new file mode 100644 index 0000000..385e560 --- /dev/null +++ b/oniguruma/regerror.c @@ -0,0 +1,387 @@ +/********************************************************************** + regerror.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" +#include /* for vsnprintf() */ + +#ifdef HAVE_STDARG_PROTOTYPES +#include +#define va_init_list(a,b) va_start(a,b) +#else +#include +#define va_init_list(a,b) va_start(a) +#endif + +extern UChar* +onig_error_code_to_format(int code) +{ + char *p; + + if (code >= 0) return (UChar* )0; + + switch (code) { + case ONIG_MISMATCH: + p = "mismatch"; break; + case ONIG_NO_SUPPORT_CONFIG: + p = "no support in this configuration"; break; + case ONIGERR_MEMORY: + p = "fail to memory allocation"; break; + case ONIGERR_MATCH_STACK_LIMIT_OVER: + p = "match-stack limit over"; break; + case ONIGERR_TYPE_BUG: + p = "undefined type (bug)"; break; + case ONIGERR_PARSER_BUG: + p = "internal parser error (bug)"; break; + case ONIGERR_STACK_BUG: + p = "stack error (bug)"; break; + case ONIGERR_UNDEFINED_BYTECODE: + p = "undefined bytecode (bug)"; break; + case ONIGERR_UNEXPECTED_BYTECODE: + p = "unexpected bytecode (bug)"; break; + case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: + p = "default multibyte-encoding is not setted"; break; + case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: + p = "can't convert to wide-char on specified multibyte-encoding"; break; + case ONIGERR_INVALID_ARGUMENT: + p = "invalid argument"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACE: + p = "end pattern at left brace"; break; + case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: + p = "end pattern at left bracket"; break; + case ONIGERR_EMPTY_CHAR_CLASS: + p = "empty char-class"; break; + case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: + p = "premature end of char-class"; break; + case ONIGERR_END_PATTERN_AT_ESCAPE: + p = "end pattern at escape"; break; + case ONIGERR_END_PATTERN_AT_META: + p = "end pattern at meta"; break; + case ONIGERR_END_PATTERN_AT_CONTROL: + p = "end pattern at control"; break; + case ONIGERR_META_CODE_SYNTAX: + p = "invalid meta-code syntax"; break; + case ONIGERR_CONTROL_CODE_SYNTAX: + p = "invalid control-code syntax"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: + p = "char-class value at end of range"; break; + case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: + p = "char-class value at start of range"; break; + case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: + p = "unmatched range specifier in char-class"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: + p = "target of repeat operator is not specified"; break; + case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: + p = "target of repeat operator is invalid"; break; + case ONIGERR_NESTED_REPEAT_OPERATOR: + p = "nested repeat operator"; break; + case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: + p = "unmatched close parenthesis"; break; + case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: + p = "end pattern with unmatched parenthesis"; break; + case ONIGERR_END_PATTERN_IN_GROUP: + p = "end pattern in group"; break; + case ONIGERR_UNDEFINED_GROUP_OPTION: + p = "undefined group option"; break; + case ONIGERR_INVALID_POSIX_BRACKET_TYPE: + p = "invalid POSIX bracket type"; break; + case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: + p = "invalid pattern in look-behind"; break; + case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: + p = "invalid repeat range {lower,upper}"; break; + case ONIGERR_TOO_BIG_NUMBER: + p = "too big number"; break; + case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: + p = "too big number for repeat range"; break; + case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: + p = "upper is smaller than lower in repeat range"; break; + case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: + p = "empty range in char class"; break; + case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: + p = "mismatch multibyte code length in char-class range"; break; + case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: + p = "too many multibyte code ranges are specified"; break; + case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: + p = "too short multibyte code string"; break; + case ONIGERR_TOO_BIG_BACKREF_NUMBER: + p = "too big backref number"; break; + case ONIGERR_INVALID_BACKREF: +#ifdef USE_NAMED_GROUP + p = "invalid backref number/name"; break; +#else + p = "invalid backref number"; break; +#endif + case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: + p = "numbered backref/call is not allowed. (use name)"; break; + case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: + p = "too big wide-char value"; break; + case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: + p = "too long wide-char value"; break; + case ONIGERR_INVALID_CODE_POINT_VALUE: + p = "invalid code point value"; break; + case ONIGERR_EMPTY_GROUP_NAME: + p = "group name is empty"; break; + case ONIGERR_INVALID_GROUP_NAME: + p = "invalid group name <%n>"; break; + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: +#ifdef USE_NAMED_GROUP + p = "invalid char in group name <%n>"; break; +#else + p = "invalid char in group number <%n>"; break; +#endif + case ONIGERR_UNDEFINED_NAME_REFERENCE: + p = "undefined name <%n> reference"; break; + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + p = "undefined group <%n> reference"; break; + case ONIGERR_MULTIPLEX_DEFINED_NAME: + p = "multiplex defined name <%n>"; break; + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + p = "multiplex definition name <%n> call"; break; + case ONIGERR_NEVER_ENDING_RECURSION: + p = "never ending recursion"; break; + case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: + p = "group number is too big for capture history"; break; + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + p = "invalid character property name {%n}"; break; + case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: + p = "not supported encoding combination"; break; + case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: + p = "invalid combination of options"; break; + case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: + p = "over thread pass limit count"; break; + + default: + p = "undefined error code"; break; + } + + return (UChar* )p; +} + +static void sprint_byte(char* s, unsigned int v) +{ + sprintf(s, "%02x", (v & 0377)); +} + +static void sprint_byte_with_x(char* s, unsigned int v) +{ + sprintf(s, "\\x%02x", (v & 0377)); +} + +static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, + UChar buf[], int buf_size, int *is_over) +{ + int len; + UChar *p; + OnigCodePoint code; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + p = s; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + if (code > 0xffff && len + 10 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); + sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); + len += 10; + } + else if (len + 6 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); + len += 6; + } + else { + break; + } + } + else { + buf[len++] = (UChar )code; + } + + p += enclen(enc, p); + if (len >= buf_size) break; + } + + *is_over = ((p < end) ? 1 : 0); + } + else { + len = MIN((end - s), buf_size); + xmemcpy(buf, s, (size_t )len); + *is_over = ((buf_size < (end - s)) ? 1 : 0); + } + + return len; +} + + +/* for ONIG_MAX_ERROR_MESSAGE_LEN */ +#define MAX_ERROR_PAR_LEN 30 + +extern int +#ifdef HAVE_STDARG_PROTOTYPES +onig_error_code_to_str(UChar* s, int code, ...) +#else +onig_error_code_to_str(s, code, va_alist) + UChar* s; + int code; + va_dcl +#endif +{ + UChar *p, *q; + OnigErrorInfo* einfo; + int len, is_over; + UChar parbuf[MAX_ERROR_PAR_LEN]; + va_list vargs; + + va_init_list(vargs, code); + + switch (code) { + case ONIGERR_UNDEFINED_NAME_REFERENCE: + case ONIGERR_UNDEFINED_GROUP_REFERENCE: + case ONIGERR_MULTIPLEX_DEFINED_NAME: + case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: + case ONIGERR_INVALID_GROUP_NAME: + case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: + case ONIGERR_INVALID_CHAR_PROPERTY_NAME: + einfo = va_arg(vargs, OnigErrorInfo*); + len = to_ascii(einfo->enc, einfo->par, einfo->par_end, + parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); + q = onig_error_code_to_format(code); + p = s; + while (*q != '\0') { + if (*q == '%') { + q++; + if (*q == 'n') { /* '%n': name */ + xmemcpy(p, parbuf, len); + p += len; + if (is_over != 0) { + xmemcpy(p, "...", 3); + p += 3; + } + q++; + } + else + goto normal_char; + } + else { + normal_char: + *p++ = *q++; + } + } + *p = '\0'; + len = p - s; + break; + + default: + q = onig_error_code_to_format(code); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q); + xmemcpy(s, q, len); + s[len] = '\0'; + break; + } + + va_end(vargs); + return len; +} + + +void +#ifdef HAVE_STDARG_PROTOTYPES +onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, ...) +#else +onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) + UChar buf[]; + int bufsize; + OnigEncoding enc; + UChar* pat; + UChar* pat_end; + const UChar *fmt; + va_dcl +#endif +{ + int n, need, len; + UChar *p, *s, *bp; + UChar bs[6]; + va_list args; + + va_init_list(args, fmt); + n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); + va_end(args); + + need = (pat_end - pat) * 4 + 4; + + if (n + need < bufsize) { + strcat((char* )buf, ": /"); + s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); + + p = pat; + while (p < pat_end) { + if (*p == '\\') { + *s++ = *p++; + len = enclen(enc, p); + while (len-- > 0) *s++ = *p++; + } + else if (*p == '/') { + *s++ = (unsigned char )'\\'; + *s++ = *p++; + } + else if (ONIGENC_IS_MBC_HEAD(enc, p)) { + len = enclen(enc, p); + if (ONIGENC_MBC_MINLEN(enc) == 1) { + while (len-- > 0) *s++ = *p++; + } + else { /* for UTF16 */ + int blen; + + while (len-- > 0) { + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); + blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (blen-- > 0) *s++ = *bp++; + } + } + } + else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && + !ONIGENC_IS_CODE_SPACE(enc, *p)) { + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + bp = bs; + while (len-- > 0) *s++ = *bp++; + } + else { + *s++ = *p++; + } + } + + *s++ = '/'; + *s = '\0'; + } +} diff --git a/oniguruma/regexec.c b/oniguruma/regexec.c new file mode 100644 index 0000000..7430d78 --- /dev/null +++ b/oniguruma/regexec.c @@ -0,0 +1,3803 @@ +/********************************************************************** + regexec.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + +#ifdef USE_CRNL_AS_LINE_TERMINATOR +#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ + (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ + ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) +#endif + +#ifdef USE_CAPTURE_HISTORY +static void history_tree_free(OnigCaptureTreeNode* node); + +static void +history_tree_clear(OnigCaptureTreeNode* node) +{ + int i; + + if (IS_NOT_NULL(node)) { + for (i = 0; i < node->num_childs; i++) { + if (IS_NOT_NULL(node->childs[i])) { + history_tree_free(node->childs[i]); + } + } + for (i = 0; i < node->allocated; i++) { + node->childs[i] = (OnigCaptureTreeNode* )0; + } + node->num_childs = 0; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + node->group = -1; + } +} + +static void +history_tree_free(OnigCaptureTreeNode* node) +{ + history_tree_clear(node); + xfree(node); +} + +static void +history_root_free(OnigRegion* r) +{ + if (IS_NOT_NULL(r->history_root)) { + history_tree_free(r->history_root); + r->history_root = (OnigCaptureTreeNode* )0; + } +} + +static OnigCaptureTreeNode* +history_node_new(void) +{ + OnigCaptureTreeNode* node; + + node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); + CHECK_NULL_RETURN(node); + node->childs = (OnigCaptureTreeNode** )0; + node->allocated = 0; + node->num_childs = 0; + node->group = -1; + node->beg = ONIG_REGION_NOTPOS; + node->end = ONIG_REGION_NOTPOS; + + return node; +} + +static int +history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) +{ +#define HISTORY_TREE_INIT_ALLOC_SIZE 8 + + if (parent->num_childs >= parent->allocated) { + int n, i; + + if (IS_NULL(parent->childs)) { + n = HISTORY_TREE_INIT_ALLOC_SIZE; + parent->childs = + (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); + } + else { + n = parent->allocated * 2; + parent->childs = + (OnigCaptureTreeNode** )xrealloc(parent->childs, + sizeof(OnigCaptureTreeNode*) * n); + } + CHECK_NULL_RETURN_MEMERR(parent->childs); + for (i = parent->allocated; i < n; i++) { + parent->childs[i] = (OnigCaptureTreeNode* )0; + } + parent->allocated = n; + } + + parent->childs[parent->num_childs] = child; + parent->num_childs++; + return 0; +} + +static OnigCaptureTreeNode* +history_tree_clone(OnigCaptureTreeNode* node) +{ + int i; + OnigCaptureTreeNode *clone, *child; + + clone = history_node_new(); + CHECK_NULL_RETURN(clone); + + clone->beg = node->beg; + clone->end = node->end; + for (i = 0; i < node->num_childs; i++) { + child = history_tree_clone(node->childs[i]); + if (IS_NULL(child)) { + history_tree_free(clone); + return (OnigCaptureTreeNode* )0; + } + history_tree_add_child(clone, child); + } + + return clone; +} + +extern OnigCaptureTreeNode* +onig_get_capture_tree(OnigRegion* region) +{ + return region->history_root; +} +#endif /* USE_CAPTURE_HISTORY */ + +extern void +onig_region_clear(OnigRegion* region) +{ + int i; + + for (i = 0; i < region->num_regs; i++) { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(region); +#endif +} + +extern int +onig_region_resize(OnigRegion* region, int n) +{ + region->num_regs = n; + + if (n < ONIG_NREGION) + n = ONIG_NREGION; + + if (region->allocated == 0) { + region->beg = (int* )xmalloc(n * sizeof(int)); + region->end = (int* )xmalloc(n * sizeof(int)); + + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = n; + } + else if (region->allocated < n) { + region->beg = (int* )xrealloc(region->beg, n * sizeof(int)); + region->end = (int* )xrealloc(region->end, n * sizeof(int)); + + if (region->beg == 0 || region->end == 0) + return ONIGERR_MEMORY; + + region->allocated = n; + } + + return 0; +} + +static int +onig_region_resize_clear(OnigRegion* region, int n) +{ + int r; + + r = onig_region_resize(region, n); + if (r != 0) return r; + onig_region_clear(region); + return 0; +} + +extern int +onig_region_set(OnigRegion* region, int at, int beg, int end) +{ + if (at < 0) return ONIGERR_INVALID_ARGUMENT; + + if (at >= region->allocated) { + int r = onig_region_resize(region, at + 1); + if (r < 0) return r; + } + + region->beg[at] = beg; + region->end[at] = end; + return 0; +} + +extern void +onig_region_init(OnigRegion* region) +{ + region->num_regs = 0; + region->allocated = 0; + region->beg = (int* )0; + region->end = (int* )0; + region->history_root = (OnigCaptureTreeNode* )0; +} + +extern OnigRegion* +onig_region_new(void) +{ + OnigRegion* r; + + r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); + onig_region_init(r); + return r; +} + +extern void +onig_region_free(OnigRegion* r, int free_self) +{ + if (r) { + if (r->allocated > 0) { + if (r->beg) xfree(r->beg); + if (r->end) xfree(r->end); + r->allocated = 0; + } +#ifdef USE_CAPTURE_HISTORY + history_root_free(r); +#endif + if (free_self) xfree(r); + } +} + +extern void +onig_region_copy(OnigRegion* to, OnigRegion* from) +{ +#define RREGC_SIZE (sizeof(int) * from->num_regs) + int i; + + if (to == from) return; + + if (to->allocated == 0) { + if (from->num_regs > 0) { + to->beg = (int* )xmalloc(RREGC_SIZE); + to->end = (int* )xmalloc(RREGC_SIZE); + to->allocated = from->num_regs; + } + } + else if (to->allocated < from->num_regs) { + to->beg = (int* )xrealloc(to->beg, RREGC_SIZE); + to->end = (int* )xrealloc(to->end, RREGC_SIZE); + to->allocated = from->num_regs; + } + + for (i = 0; i < from->num_regs; i++) { + to->beg[i] = from->beg[i]; + to->end[i] = from->end[i]; + } + to->num_regs = from->num_regs; + +#ifdef USE_CAPTURE_HISTORY + history_root_free(to); + + if (IS_NOT_NULL(from->history_root)) { + to->history_root = history_tree_clone(from->history_root); + } +#endif +} + + +/** stack **/ +#define INVALID_STACK_INDEX -1 + +/* stack type */ +/* used by normal-POP */ +#define STK_ALT 0x0001 +#define STK_LOOK_BEHIND_NOT 0x0002 +#define STK_POS_NOT 0x0003 +/* handled by normal-POP */ +#define STK_MEM_START 0x0100 +#define STK_MEM_END 0x8200 +#define STK_REPEAT_INC 0x0300 +#define STK_STATE_CHECK_MARK 0x1000 +/* avoided by normal-POP */ +#define STK_NULL_CHECK_START 0x3000 +#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ +#define STK_MEM_END_MARK 0x8400 +#define STK_POS 0x0500 /* used when POP-POS */ +#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0700 +#define STK_CALL_FRAME 0x0800 +#define STK_RETURN 0x0900 +#define STK_VOID 0x0a00 /* for fill a blank */ + +/* stack type check mask */ +#define STK_MASK_POP_USED 0x00ff +#define STK_MASK_TO_VOID_TARGET 0x10ff +#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).best_len = ONIG_MISMATCH;\ +} while(0) +#else +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ +} while(0) +#endif + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 + +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ + if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ + unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ + offset = ((offset) * (state_num)) >> 3;\ + if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ + if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ + (msa).state_check_buff = (void* )xmalloc(size);\ + else \ + (msa).state_check_buff = (void* )xalloca(size);\ + xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ + (size_t )(size - (offset))); \ + (msa).state_check_buff_size = size;\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + } while(0) + +#define MATCH_ARG_FREE(msa) do {\ + if ((msa).stack_p) xfree((msa).stack_p);\ + if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ + if ((msa).state_check_buff) xfree((msa).state_check_buff);\ + }\ +} while(0) +#else +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) +#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) +#endif + + + +#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ + if (msa->stack_p) {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ + stk_alloc = (OnigStackType* )(msa->stack_p);\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + msa->stack_n;\ + }\ + else {\ + alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ + + sizeof(OnigStackType) * (stack_num));\ + stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ + stk_base = stk_alloc;\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ + }\ +} while(0) + +#define STACK_SAVE do{\ + if (stk_base != stk_alloc) {\ + msa->stack_p = stk_base;\ + msa->stack_n = stk_end - stk_base;\ + };\ +} while(0) + +static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimitSize; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimitSize = size; + return 0; +} + +static int +stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, + OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) +{ + unsigned int n; + OnigStackType *x, *stk_base, *stk_end, *stk; + + stk_base = *arg_stk_base; + stk_end = *arg_stk_end; + stk = *arg_stk; + + n = stk_end - stk_base; + if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { + x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + xmemcpy(x, stk_base, n * sizeof(OnigStackType)); + n *= 2; + } + else { + n *= 2; + if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { + if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) + return ONIGERR_MATCH_STACK_LIMIT_OVER; + else + n = MatchStackLimitSize; + } + x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); + if (IS_NULL(x)) { + STACK_SAVE; + return ONIGERR_MEMORY; + } + } + *arg_stk = x + (stk - stk_base); + *arg_stk_base = x; + *arg_stk_end = x + n; + return 0; +} + +#define STACK_ENSURE(n) do {\ + if (stk_end - stk < (n)) {\ + int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ + if (r != 0) { STACK_SAVE; return r; } \ + }\ +} while(0) + +#define STACK_AT(index) (stk_base + (index)) +#define GET_STACK_INDEX(stk) ((stk) - stk_base) + +#define STACK_PUSH_TYPE(stack_type) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + STACK_INC;\ +} while(0) + +#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define STATE_CHECK_POS(s,snum) \ + (((s) - str) * num_comb_exp_check + ((snum) - 1)) +#define STATE_CHECK_VAL(v,snum) do {\ + if (state_check_buff != NULL) {\ + int x = STATE_CHECK_POS(s,snum);\ + (v) = state_check_buff[x/8] & (1<<(x%8));\ + }\ + else (v) = 0;\ +} while(0) + + +#define ELSE_IF_STATE_CHECK_MARK(stk) \ + else if ((stk)->type == STK_STATE_CHECK_MARK) { \ + int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ + state_check_buff[x/8] |= (1<<(x%8)); \ + } + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.state_check = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_ALT;\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_STATE_CHECK(s,snum) do {\ + if (state_check_buff != NULL) {\ + STACK_ENSURE(1);\ + stk->type = STK_STATE_CHECK_MARK;\ + stk->u.state.pstr = (s);\ + stk->u.state.state_check = (snum);\ + STACK_INC;\ + }\ +} while(0) + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define ELSE_IF_STATE_CHECK_MARK(stk) + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + STACK_INC;\ +} while(0) +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) +#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) +#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) +#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ + STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) + +#define STACK_PUSH_REPEAT(id, pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT;\ + stk->u.repeat.num = (id);\ + stk->u.repeat.pcode = (pat);\ + stk->u.repeat.count = 0;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_REPEAT_INC(sindex) do {\ + STACK_ENSURE(1);\ + stk->type = STK_REPEAT_INC;\ + stk->u.repeat_inc.si = (sindex);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_START(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_START;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END(mnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END;\ + stk->u.mem.num = (mnum);\ + stk->u.mem.pstr = (s);\ + stk->u.mem.start = mem_start_stk[mnum];\ + stk->u.mem.end = mem_end_stk[mnum];\ + mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_MEM_END_MARK(mnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MEM_END_MARK;\ + stk->u.mem.num = (mnum);\ + STACK_INC;\ +} while(0) + +#define STACK_GET_MEM_START(mnum, k) do {\ + int level = 0;\ + k = stk;\ + while (k > stk_base) {\ + k--;\ + if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ + && k->u.mem.num == (mnum)) {\ + level++;\ + }\ + else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) break;\ + level--;\ + }\ + }\ +} while(0) + +#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ + int level = 0;\ + while (k < stk) {\ + if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ + if (level == 0) (start) = k->u.mem.pstr;\ + level++;\ + }\ + else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ + level--;\ + if (level == 0) {\ + (end) = k->u.mem.pstr;\ + break;\ + }\ + }\ + k++;\ + }\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_START;\ + stk->u.null_check.num = (cnum);\ + stk->u.null_check.pstr = (s);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_NULL_CHECK_END;\ + stk->u.null_check.num = (cnum);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALL_FRAME(pat) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALL_FRAME;\ + stk->u.call_frame.ret_addr = (pat);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_RETURN do {\ + STACK_ENSURE(1);\ + stk->type = STK_RETURN;\ + STACK_INC;\ +} while(0) + + +#ifdef ONIG_DEBUG +#define STACK_BASE_CHECK(p, at) \ + if ((p) < stk_base) {\ + fprintf(stderr, "at %s\n", at);\ + goto stack_error;\ + } +#else +#define STACK_BASE_CHECK(p, at) +#endif + +#define STACK_POP_ONE do {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ +} while(0) + +#define STACK_POP do {\ + switch (pop_level) {\ + case STACK_POP_LEVEL_FREE:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + case STACK_POP_LEVEL_MEM_START:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 2"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + default:\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP 3"); \ + if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ + break;\ + }\ +} while(0) + +#define STACK_POP_TIL_POS_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ + if (stk->type == STK_POS_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ + if (stk->type == STK_LOOK_BEHIND_NOT) break;\ + else if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ + mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ + }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ + }\ +} while(0) + +#define STACK_POS_END(k) do {\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_POS_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_POS) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_STOP_BT_END do {\ + OnigStackType *k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ + if (IS_TO_VOID_TARGET(k)) {\ + k->type = STK_VOID;\ + }\ + else if (k->type == STK_STOP_BT) {\ + k->type = STK_VOID;\ + break;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK(isnull,id,s) do {\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + (isnull) = (k->u.null_check.pstr == (s));\ + break;\ + }\ + else level--;\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + level++;\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + }\ + }\ +} while(0) + +#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ + if (k->type == STK_NULL_CHECK_START) {\ + if (k->u.null_check.num == (id)) {\ + if (level == 0) {\ + if (k->u.null_check.pstr != (s)) {\ + (isnull) = 0;\ + break;\ + }\ + else {\ + UChar* endp;\ + (isnull) = 1;\ + while (k < stk) {\ + if (k->type == STK_MEM_START) {\ + if (k->u.mem.end == INVALID_STACK_INDEX) {\ + (isnull) = 0; break;\ + }\ + if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ + else\ + endp = (UChar* )k->u.mem.end;\ + if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */ \ + }\ + }\ + k++;\ + }\ + break;\ + }\ + }\ + else {\ + level--;\ + }\ + }\ + }\ + else if (k->type == STK_NULL_CHECK_END) {\ + if (k->u.null_check.num == (id)) level++;\ + }\ + }\ +} while(0) + +#define STACK_GET_REPEAT(id, k) do {\ + int level = 0;\ + k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ + if (k->type == STK_REPEAT) {\ + if (level == 0) {\ + if (k->u.repeat.num == (id)) {\ + break;\ + }\ + }\ + }\ + else if (k->type == STK_CALL_FRAME) level--;\ + else if (k->type == STK_RETURN) level++;\ + }\ +} while(0) + +#define STACK_RETURN(addr) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_RETURN"); \ + if (k->type == STK_CALL_FRAME) {\ + if (level == 0) {\ + (addr) = k->u.call_frame.ret_addr;\ + break;\ + }\ + else level--;\ + }\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while(0) + + +#define STRING_CMP(s1,s2,len) do {\ + while (len-- > 0) {\ + if (*s1++ != *s2++) goto fail;\ + }\ +} while(0) + +#define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ + goto fail; \ +} while(0) + +static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, + UChar* s1, UChar** ps2, int mblen) +{ + UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + UChar *p1, *p2, *end1, *s2, *end2; + int len1, len2; + + s2 = *ps2; + end1 = s1 + mblen; + end2 = s2 + mblen; + while (s1 < end1) { + len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1); + len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2); + if (len1 != len2) return 0; + p1 = buf1; + p2 = buf2; + while (len1-- > 0) { + if (*p1 != *p2) return 0; + p1++; + p2++; + } + } + + *ps2 = s2; + return 1; +} + +#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ + is_fail = 0;\ + while (len-- > 0) {\ + if (*s1++ != *s2++) {\ + is_fail = 1; break;\ + }\ + }\ +} while(0) + +#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\ + if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \ + is_fail = 1; \ + else \ + is_fail = 0; \ +} while(0) + + +#define IS_EMPTY_STR (str == end) +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define DATA_ENSURE_CHECK1 (s < right_range) +#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) +#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail +#else +#define DATA_ENSURE_CHECK1 (s < end) +#define DATA_ENSURE_CHECK(n) (s + (n) <= end) +#define DATA_ENSURE(n) if (s + (n) > end) goto fail +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + + +#ifdef USE_CAPTURE_HISTORY +static int +make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, + OnigStackType* stk_top, UChar* str, regex_t* reg) +{ + int n, r; + OnigCaptureTreeNode* child; + OnigStackType* k = *kp; + + while (k < stk_top) { + if (k->type == STK_MEM_START) { + n = k->u.mem.num; + if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && + BIT_STATUS_AT(reg->capture_history, n) != 0) { + child = history_node_new(); + CHECK_NULL_RETURN_MEMERR(child); + child->group = n; + child->beg = (int )(k->u.mem.pstr - str); + r = history_tree_add_child(node, child); + if (r != 0) return r; + *kp = (k + 1); + r = make_capture_history_tree(child, kp, stk_top, str, reg); + if (r != 0) return r; + + k = *kp; + child->end = (int )(k->u.mem.pstr - str); + } + } + else if (k->type == STK_MEM_END) { + if (k->u.mem.num == node->group) { + node->end = (int )(k->u.mem.pstr - str); + *kp = k; + return 0; + } + } + k++; + } + + return 1; /* 1: root node ending. */ +} +#endif + +#ifdef USE_BACKREF_WITH_LEVEL +static int mem_is_in_memp(int mem, int num, UChar* memp) +{ + int i; + MemNumType m; + + for (i = 0; i < num; i++) { + GET_MEMNUM_INC(m, memp); + if (mem == (int )m) return 1; + } + return 0; +} + +static int backref_match_at_nested_level(regex_t* reg + , OnigStackType* top, OnigStackType* stk_base + , int ignore_case, int case_fold_flag + , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +{ + UChar *ss, *p, *pstart, *pend = NULL_UCHARP; + int level; + OnigStackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_START) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, case_fold_flag, + pstart, &ss, (int )(pend - pstart)) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } + } + else if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } + } + } + k--; + } + + return 0; +} +#endif /* USE_BACKREF_WITH_LEVEL */ + + +#ifdef ONIG_DEBUG_STATISTICS + +#define USE_TIMEOFDAY + +#ifdef USE_TIMEOFDAY +#ifdef HAVE_SYS_TIME_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +static struct timeval ts, te; +#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) +#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ + (((te).tv_sec - (ts).tv_sec)*1000000)) +#else +#ifdef HAVE_SYS_TIMES_H +#include +#endif +static struct tms ts, te; +#define GETTIME(t) times(&(t)) +#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) +#endif + +static int OpCounter[256]; +static int OpPrevCounter[256]; +static unsigned long OpTime[256]; +static int OpCurr = OP_FINISH; +static int OpPrevTarget = OP_FAIL; +static int MaxStackDepth = 0; + +#define MOP_IN(opcode) do {\ + if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ + OpCurr = opcode;\ + OpCounter[opcode]++;\ + GETTIME(ts);\ +} while(0) + +#define MOP_OUT do {\ + GETTIME(te);\ + OpTime[OpCurr] += TIMEDIFF(te, ts);\ +} while(0) + +extern void +onig_statistics_init(void) +{ + int i; + for (i = 0; i < 256; i++) { + OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; + } + MaxStackDepth = 0; +} + +extern void +onig_print_statistics(FILE* f) +{ + int i; + fprintf(f, " count prev time\n"); + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + fprintf(f, "%8d: %8d: %10ld: %s\n", + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + } + fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); +} + +#define STACK_INC do {\ + stk++;\ + if (stk - stk_base > MaxStackDepth) \ + MaxStackDepth = stk - stk_base;\ +} while(0) + +#else +#define STACK_INC stk++ + +#define MOP_IN(opcode) +#define MOP_OUT +#endif + + +/* matching region of POSIX API */ +typedef int regoff_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} posix_regmatch_t; + +/* match data(str - end) from position (sstart). */ +/* if sstart == str then set sprev to NULL. */ +static int +match_at(regex_t* reg, const UChar* str, const UChar* end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar* right_range, +#endif + const UChar* sstart, UChar* sprev, OnigMatchArg* msa) +{ + static UChar FinishCode[] = { OP_FINISH }; + + int i, n, num_mem, best_len, pop_level; + LengthType tlen, tlen2; + MemNumType mem; + RelAddrType addr; + OnigOptionType option = reg->options; + OnigEncoding encode = reg->enc; + OnigCaseFoldType case_fold_flag = reg->case_fold_flag; + UChar *s, *q, *sbegin; + UChar *p = reg->p; + char *alloca_base; + OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; + OnigStackType *stkp; /* used as any purpose. */ + OnigStackIndex si; + OnigStackIndex *repeat_stk; + OnigStackIndex *mem_start_stk, *mem_end_stk; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int scv; + unsigned char* state_check_buff = msa->state_check_buff; + int num_comb_exp_check = reg->num_comb_exp_check; +#endif + n = reg->num_repeat + reg->num_mem * 2; + + STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); + pop_level = reg->stack_pop_level; + num_mem = reg->num_mem; + repeat_stk = (OnigStackIndex* )alloca_base; + + mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); + mem_end_stk = mem_start_stk + num_mem; + mem_start_stk--; /* for index start from 1, + mem_start_stk[1]..mem_start_stk[num_mem] */ + mem_end_stk--; /* for index start from 1, + mem_end_stk[1]..mem_end_stk[num_mem] */ + for (i = 1; i <= num_mem; i++) { + mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; + } + +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", + (int )str, (int )end, (int )sstart, (int )sprev); + fprintf(stderr, "size: %d, start offset: %d\n", + (int )(end - str), (int )(sstart - str)); +#endif + + STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ + best_len = ONIG_MISMATCH; + s = (UChar* )sstart; + while (1) { +#ifdef ONIG_DEBUG_MATCH + { + UChar *q, *bp, buf[50]; + int len; + fprintf(stderr, "%4d> \"", (int )(s - str)); + bp = buf; + for (i = 0, q = s; i < 7 && q < end; i++) { + len = enclen(encode, q); + while (len-- > 0) *bp++ = *q++; + } + if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } + else { xmemcpy(bp, "\"", 1); bp += 1; } + *bp = 0; + fputs((char* )buf, stderr); + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); + onig_print_compiled_byte_code(stderr, p, NULL, encode); + fprintf(stderr, "\n"); + } +#endif + + sbegin = s; + switch (*p++) { + case OP_END: MOP_IN(OP_END); + n = s - sstart; + if (n > best_len) { + OnigRegion* region; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; + } +#endif + best_len = n; + region = msa->region; + if (region) { +#ifdef USE_POSIX_API_REGION_OPTION + if (IS_POSIX_REGION(msa->options)) { + posix_regmatch_t* rmt = (posix_regmatch_t* )region; + + rmt[0].rm_so = sstart - str; + rmt[0].rm_eo = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; + + rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; + } + } + } + else { +#endif /* USE_POSIX_API_REGION_OPTION */ + region->beg[0] = sstart - str; + region->end[0] = s - str; + for (i = 1; i <= num_mem; i++) { + if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (BIT_STATUS_AT(reg->bt_mem_start, i)) + region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; + else + region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; + + region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[i])) - str; + } + else { + region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; + } + } + +#ifdef USE_CAPTURE_HISTORY + if (reg->capture_history != 0) { + int r; + OnigCaptureTreeNode* node; + + if (IS_NULL(region->history_root)) { + region->history_root = node = history_node_new(); + CHECK_NULL_RETURN_MEMERR(node); + } + else { + node = region->history_root; + history_tree_clear(node); + } + + node->group = 0; + node->beg = sstart - str; + node->end = s - str; + + stkp = stk_base; + r = make_capture_history_tree(region->history_root, &stkp, + stk, (UChar* )str, reg); + if (r < 0) { + best_len = r; /* error code */ + goto finish; + } + } +#endif /* USE_CAPTURE_HISTORY */ +#ifdef USE_POSIX_API_REGION_OPTION + } /* else IS_POSIX_REGION() */ +#endif + } /* if (region) */ + } /* n > best_len */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + end_best_len: +#endif + MOP_OUT; + + if (IS_FIND_CONDITION(option)) { + if (IS_FIND_NOT_EMPTY(option) && s == sstart) { + best_len = ONIG_MISMATCH; + goto fail; /* for retry */ + } + if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { + goto fail; /* for retry */ + } + } + + /* default behavior: return first-matching result. */ + goto finish; + break; + + case OP_EXACT1: MOP_IN(OP_EXACT1); +#if 0 + DATA_ENSURE(1); + if (*p != *s) goto fail; + p++; s++; +#endif + if (*p != *s++) goto fail; + DATA_ENSURE(0); + p++; + MOP_OUT; + break; + + case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); + { + int len; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) { + goto fail; + } + p++; q++; + } + } + MOP_OUT; + break; + + case OP_EXACT2: MOP_IN(OP_EXACT2); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACT3: MOP_IN(OP_EXACT3); + DATA_ENSURE(3); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACT4: MOP_IN(OP_EXACT4); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACT5: MOP_IN(OP_EXACT5); + DATA_ENSURE(5); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + sprev = s; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACTN: MOP_IN(OP_EXACTN); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen); + while (tlen-- > 0) { + if (*p++ != *s++) goto fail; + } + sprev = s - 1; + MOP_OUT; + continue; + break; + + case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); + { + int len; + UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + GET_LENGTH_INC(tlen, p); + endp = p + tlen; + + while (p < endp) { + sprev = s; + DATA_ENSURE(1); + len = ONIGENC_MBC_CASE_FOLD(encode, + /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ + case_fold_flag, + &s, end, lowbuf); + DATA_ENSURE(0); + q = lowbuf; + while (len-- > 0) { + if (*p != *q) goto fail; + p++; q++; + } + } + } + + MOP_OUT; + continue; + break; + + case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); + DATA_ENSURE(2); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + break; + + case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); + DATA_ENSURE(4); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); + DATA_ENSURE(6); + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + sprev = s; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + MOP_OUT; + continue; + break; + + case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 2); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 2; + MOP_OUT; + continue; + break; + + case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); + GET_LENGTH_INC(tlen, p); + DATA_ENSURE(tlen * 3); + while (tlen-- > 0) { + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - 3; + MOP_OUT; + continue; + break; + + case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); + GET_LENGTH_INC(tlen, p); /* mb-len */ + GET_LENGTH_INC(tlen2, p); /* string len */ + tlen2 *= tlen; + DATA_ENSURE(tlen2); + while (tlen2-- > 0) { + if (*p != *s) goto fail; + p++; s++; + } + sprev = s - tlen; + MOP_OUT; + continue; + break; + + case OP_CCLASS: MOP_IN(OP_CCLASS); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; + p += SIZE_BITSET; + s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ + MOP_OUT; + break; + + case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); + if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; + + cclass_mb: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len; + + DATA_ENSURE(1); + mb_len = enclen(encode, s); + DATA_ENSURE(mb_len); + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (! onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (! onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + MOP_OUT; + break; + + case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s)) { + p += SIZE_BITSET; + goto cclass_mb; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) == 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + MOP_OUT; + break; + + case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); + DATA_ENSURE(1); + if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; + p += SIZE_BITSET; + s += enclen(encode, s); + MOP_OUT; + break; + + case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_HEAD(encode, s)) { + s++; + GET_LENGTH_INC(tlen, p); + p += tlen; + goto cc_mb_not_success; + } + + cclass_mb_not: + GET_LENGTH_INC(tlen, p); + { + OnigCodePoint code; + UChar *ss; + int mb_len = enclen(encode, s); + + if (! DATA_ENSURE_CHECK(mb_len)) { + DATA_ENSURE(1); + s = (UChar* )end; + p += tlen; + goto cc_mb_not_success; + } + + ss = s; + s += mb_len; + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + if (onig_is_in_code_range(p, code)) goto fail; +#else + q = p; + ALIGNMENT_RIGHT(q); + if (onig_is_in_code_range(q, code)) goto fail; +#endif + } + p += tlen; + + cc_mb_not_success: + MOP_OUT; + break; + + case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_HEAD(encode, s)) { + p += SIZE_BITSET; + goto cclass_mb_not; + } + else { + if (BITSET_AT(((BitSetRef )p), *s) != 0) + goto fail; + + p += SIZE_BITSET; + GET_LENGTH_INC(tlen, p); + p += tlen; + s++; + } + MOP_OUT; + break; + + case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); + { + OnigCodePoint code; + void *node; + int mb_len; + UChar *ss; + + DATA_ENSURE(1); + GET_POINTER_INC(node, p); + mb_len = enclen(encode, s); + ss = s; + s += mb_len; + DATA_ENSURE(0); + code = ONIGENC_MBC_TO_CODE(encode, ss, s); + if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; + } + MOP_OUT; + break; + + case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); + DATA_ENSURE(1); + n = enclen(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + s += n; + MOP_OUT; + break; + + case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); + DATA_ENSURE(1); + n = enclen(encode, s); + DATA_ENSURE(n); + s += n; + MOP_OUT; + break; + + case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); + while (DATA_ENSURE_CHECK1) { + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + MOP_OUT; + break; + + case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); + while (DATA_ENSURE_CHECK1) { + STACK_PUSH_ALT(p, s, sprev); + n = enclen(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + MOP_OUT; + break; + + case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + p++; + MOP_OUT; + break; + + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + while (DATA_ENSURE_CHECK1) { + if (*p == *s) { + STACK_PUSH_ALT(p + 1, s, sprev); + } + n = enclen(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + p++; + MOP_OUT; + break; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); + GET_STATE_CHECK_NUM_INC(mem, p); + while (DATA_ENSURE_CHECK1) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enclen(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + MOP_OUT; + break; + + case OP_STATE_CHECK_ANYCHAR_ML_STAR: + MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); + + GET_STATE_CHECK_NUM_INC(mem, p); + while (DATA_ENSURE_CHECK1) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enclen(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + MOP_OUT; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + case OP_WORD: MOP_IN(OP_WORD); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s); + MOP_OUT; + break; + + case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + + s += enclen(encode, s); + MOP_OUT; + break; + + case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + == ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + continue; + break; + + case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); + if (ON_STR_BEGIN(s)) { + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) + goto fail; + } + else if (ON_STR_END(s)) { + if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + else { + if (ONIGENC_IS_MBC_WORD(encode, s, end) + != ONIGENC_IS_MBC_WORD(encode, sprev, end)) + goto fail; + } + MOP_OUT; + continue; + break; + +#ifdef USE_WORD_BEGIN_END + case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); + if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { + if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + MOP_OUT; + continue; + } + } + goto fail; + break; + + case OP_WORD_END: MOP_IN(OP_WORD_END); + if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { + if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { + MOP_OUT; + continue; + } + } + goto fail; + break; +#endif + + case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); + if (! ON_STR_BEGIN(s)) goto fail; + + MOP_OUT; + continue; + break; + + case OP_END_BUF: MOP_IN(OP_END_BUF); + if (! ON_STR_END(s)) goto fail; + + MOP_OUT; + continue; + break; + + case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); + if (ON_STR_BEGIN(s)) { + if (IS_NOTBOL(msa->options)) goto fail; + MOP_OUT; + continue; + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { + MOP_OUT; + continue; + } + goto fail; + break; + + case OP_END_LINE: MOP_IN(OP_END_LINE); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { + MOP_OUT; + continue; + } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + MOP_OUT; + continue; + } +#endif + goto fail; + break; + + case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); + if (ON_STR_END(s)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { +#endif + if (IS_NOTEOL(msa->options)) goto fail; + MOP_OUT; + continue; +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + } +#endif + } + else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && + ON_STR_END(s + enclen(encode, s))) { + MOP_OUT; + continue; + } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + UChar* ss = s + enclen(encode, s); + ss += enclen(encode, ss); + if (ON_STR_END(ss)) { + MOP_OUT; + continue; + } + } +#endif + goto fail; + break; + + case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); + if (s != msa->start) + goto fail; + + MOP_OUT; + continue; + break; + + case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_START(mem, s); + MOP_OUT; + continue; + break; + + case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); + GET_MEMNUM_INC(mem, p); + mem_start_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; + continue; + break; + + case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); + GET_MEMNUM_INC(mem, p); + STACK_PUSH_MEM_END(mem, s); + MOP_OUT; + continue; + break; + + case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + MOP_OUT; + continue; + break; + +#ifdef USE_SUBEXP_CALL + case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); + GET_MEMNUM_INC(mem, p); + STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ + STACK_PUSH_MEM_END(mem, s); + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + MOP_OUT; + continue; + break; + + case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); + GET_MEMNUM_INC(mem, p); + mem_end_stk[mem] = (OnigStackIndex )((void* )s); + STACK_GET_MEM_START(mem, stkp); + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + mem_start_stk[mem] = GET_STACK_INDEX(stkp); + else + mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); + + STACK_PUSH_MEM_END_MARK(mem); + MOP_OUT; + continue; + break; +#endif + + case OP_BACKREF1: MOP_IN(OP_BACKREF1); + mem = 1; + goto backref; + break; + + case OP_BACKREF2: MOP_IN(OP_BACKREF2); + mem = 2; + goto backref; + break; + + case OP_BACKREFN: MOP_IN(OP_BACKREFN); + GET_MEMNUM_INC(mem, p); + backref: + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP(pstart, s, n); + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + + MOP_OUT; + continue; + } + break; + + case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); + GET_MEMNUM_INC(mem, p); + { + int len; + UChar *pstart, *pend; + + /* if you want to remove following line, + you should check in parse and compile time. */ + if (mem > num_mem) goto fail; + if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + STRING_CMP_IC(case_fold_flag, pstart, &s, n); + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + + MOP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE(pstart, swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + continue; + } + break; + + case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); + { + int len, is_fail; + UChar *pstart, *pend, *swork; + + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; + else + pstart = (UChar* )((void* )mem_start_stk[mem]); + + pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + ? STACK_AT(mem_end_stk[mem])->u.mem.pstr + : (UChar* )((void* )mem_end_stk[mem])); + n = pend - pstart; + DATA_ENSURE(n); + sprev = s; + swork = s; + STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); + if (is_fail) continue; + s = swork; + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + continue; + } + break; + +#ifdef USE_BACKREF_WITH_LEVEL + case OP_BACKREF_WITH_LEVEL: + { + int len; + OnigOptionType ic; + LengthType level; + + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic + , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + MOP_OUT; + continue; + } + + break; +#endif + +#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ + case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); + GET_OPTION_INC(option, p); + STACK_PUSH_ALT(p, s, sprev); + p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; + MOP_OUT; + continue; + break; + + case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); + GET_OPTION_INC(option, p); + MOP_OUT; + continue; + break; +#endif + + case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_PUSH_NULL_CHECK_START(mem, s); + MOP_OUT; + continue; + break; + + case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK(isnull, mem, s); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + null_check_found: + /* empty loop founded, skip next instruction */ + switch (*p++) { + case OP_JUMP: + case OP_PUSH: + p += SIZE_RELADDR; + break; + case OP_REPEAT_INC: + case OP_REPEAT_INC_NG: + case OP_REPEAT_INC_SG: + case OP_REPEAT_INC_NG_SG: + p += SIZE_MEMNUM; + break; + default: + goto unexpected_bytecode_error; + break; + } + } + } + MOP_OUT; + continue; + break; + +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + } + MOP_OUT; + continue; + break; +#endif + +#ifdef USE_SUBEXP_CALL + case OP_NULL_CHECK_END_MEMST_PUSH: + MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + { + int isnull; + + GET_MEMNUM_INC(mem, p); /* mem: null check id */ +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT + STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); +#else + STACK_NULL_CHECK_REC(isnull, mem, s); +#endif + if (isnull) { +#ifdef ONIG_DEBUG_MATCH + fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", + (int )mem, (int )s); +#endif + if (isnull == -1) goto fail; + goto null_check_found; + } + else { + STACK_PUSH_NULL_CHECK_END(mem); + } + } + MOP_OUT; + continue; + break; +#endif + + case OP_JUMP: MOP_IN(OP_JUMP); + GET_RELADDR_INC(addr, p); + p += addr; + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_PUSH: MOP_IN(OP_PUSH); + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + break; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + MOP_OUT; + continue; + break; + + case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); + GET_STATE_CHECK_NUM_INC(mem, p); + GET_RELADDR_INC(addr, p); + STATE_CHECK_VAL(scv, mem); + if (scv) { + p += addr; + } + else { + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + } + MOP_OUT; + continue; + break; + + case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_STATE_CHECK(s, mem); + MOP_OUT; + continue; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + + case OP_POP: MOP_IN(OP_POP); + STACK_POP_ONE; + MOP_OUT; + continue; + break; + + case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); + GET_RELADDR_INC(addr, p); + if (*p == *s && DATA_ENSURE_CHECK1) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + } + p += (addr + 1); + MOP_OUT; + continue; + break; + + case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); + GET_RELADDR_INC(addr, p); + if (*p == *s) { + p++; + STACK_PUSH_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + } + p++; + MOP_OUT; + continue; + break; + + case OP_REPEAT: MOP_IN(OP_REPEAT); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p + addr, s, sprev); + } + } + MOP_OUT; + continue; + break; + + case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); + { + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + GET_RELADDR_INC(addr, p); + + STACK_ENSURE(1); + repeat_stk[mem] = GET_STACK_INDEX(stk); + STACK_PUSH_REPEAT(mem, p); + + if (reg->repeat_range[mem].lower == 0) { + STACK_PUSH_ALT(p, s, sprev); + p += addr; + } + } + MOP_OUT; + continue; + break; + + case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc: + stkp->u.repeat.count++; + if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { + /* end of repeat. Nothing to do. */ + } + else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + STACK_PUSH_ALT(p, s, sprev); + p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ + } + else { + p = stkp->u.repeat.pcode; + } + STACK_PUSH_REPEAT_INC(si); + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc; + break; + + case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + si = repeat_stk[mem]; + stkp = STACK_AT(si); + + repeat_inc_ng: + stkp->u.repeat.count++; + if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { + if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { + UChar* pcode = stkp->u.repeat.pcode; + + STACK_PUSH_REPEAT_INC(si); + STACK_PUSH_ALT(pcode, s, sprev); + } + else { + p = stkp->u.repeat.pcode; + STACK_PUSH_REPEAT_INC(si); + } + } + else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + STACK_PUSH_REPEAT_INC(si); + } + MOP_OUT; + CHECK_INTERRUPT_IN_MATCH_AT; + continue; + break; + + case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); + GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ + STACK_GET_REPEAT(mem, stkp); + si = GET_STACK_INDEX(stkp); + goto repeat_inc_ng; + break; + + case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); + STACK_PUSH_POS(s, sprev); + MOP_OUT; + continue; + break; + + case OP_POP_POS: MOP_IN(OP_POP_POS); + { + STACK_POS_END(stkp); + s = stkp->u.state.pstr; + sprev = stkp->u.state.pstr_prev; + } + MOP_OUT; + continue; + break; + + case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); + GET_RELADDR_INC(addr, p); + STACK_PUSH_POS_NOT(p + addr, s, sprev); + MOP_OUT; + continue; + break; + + case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); + STACK_POP_TIL_POS_NOT; + goto fail; + break; + + case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); + STACK_PUSH_STOP_BT; + MOP_OUT; + continue; + break; + + case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); + STACK_STOP_BT_END; + MOP_OUT; + continue; + break; + + case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); + GET_LENGTH_INC(tlen, p); + s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + if (IS_NULL(s)) goto fail; + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + MOP_OUT; + continue; + break; + + case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); + GET_RELADDR_INC(addr, p); + GET_LENGTH_INC(tlen, p); + q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); + if (IS_NULL(q)) { + /* too short case -> success. ex. /(?p + addr; + MOP_OUT; + continue; + break; + + case OP_RETURN: MOP_IN(OP_RETURN); + STACK_RETURN(p); + STACK_PUSH_RETURN; + MOP_OUT; + continue; + break; +#endif + + case OP_FINISH: + goto finish; + break; + + fail: + MOP_OUT; + /* fall */ + case OP_FAIL: MOP_IN(OP_FAIL); + STACK_POP; + p = stk->u.state.pcode; + s = stk->u.state.pstr; + sprev = stk->u.state.pstr_prev; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (stk->u.state.state_check != 0) { + stk->type = STK_STATE_CHECK_MARK; + stk++; + } +#endif + + MOP_OUT; + continue; + break; + + default: + goto bytecode_error; + + } /* end of switch */ + sprev = sbegin; + } /* end of while(1) */ + + finish: + STACK_SAVE; + return best_len; + +#ifdef ONIG_DEBUG + stack_error: + STACK_SAVE; + return ONIGERR_STACK_BUG; +#endif + + bytecode_error: + STACK_SAVE; + return ONIGERR_UNDEFINED_BYTECODE; + + unexpected_bytecode_error: + STACK_SAVE; + return ONIGERR_UNEXPECTED_BYTECODE; +} + + +static UChar* +slow_search(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *t, *p, *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + while (s < end) { + if (*s == *target) { + p = s + 1; + t = target + 1; + while (t < target_end) { + if (*t != *p++) + break; + t++; + } + if (t == target_end) + return s; + } + s += enclen(enc, s); + } + + return (UChar* )NULL; +} + +static int +str_lower_case_match(OnigEncoding enc, int case_fold_flag, + const UChar* t, const UChar* tend, + const UChar* p, const UChar* end) +{ + int lowlen; + UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; + + while (t < tend) { + lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); + q = lowbuf; + while (lowlen > 0) { + if (*t++ != *q++) return 0; + lowlen--; + } + } + + return 1; +} + +static UChar* +slow_search_ic(OnigEncoding enc, int case_fold_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* text_end, UChar* text_range) +{ + UChar *s, *end; + + end = (UChar* )text_end; + end -= target_end - target - 1; + if (end > text_range) + end = text_range; + + s = (UChar* )text; + + while (s < end) { + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, text_end)) + return s; + + s += enclen(enc, s); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *t, *p, *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); + + while (s >= text) { + if (*s == *target) { + p = s + 1; + t = target + 1; + while (t < target_end) { + if (*t != *p++) + break; + t++; + } + if (t == target_end) + return s; + } + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); + } + + return (UChar* )NULL; +} + +static UChar* +slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, + UChar* target, UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + UChar *s; + + s = (UChar* )text_end; + s -= (target_end - target); + if (s > text_start) + s = (UChar* )text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); + + while (s >= text) { + if (str_lower_case_match(enc, case_fold_flag, + target, target_end, s, text_end)) + return s; + + s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s); + } + + return (UChar* )NULL; +} + +static UChar* +bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) +{ + const UChar *s, *se, *t, *p, *end; + const UChar *tail; + int skip, tlen1; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", + (int )text, (int )text_end, (int )text_range); +#endif + + tail = target_end - 1; + tlen1 = tail - target; + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; + + s = text; + + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; + } + skip = reg->map[*se]; + t = s; + do { + s += enclen(reg->enc, s); + } while ((s - t) < skip && s < end); + } + } + else { + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; + } + skip = reg->int_map[*se]; + t = s; + do { + s += enclen(reg->enc, s); + } while ((s - t) < skip && s < end); + } + } + + return (UChar* )NULL; +} + +static UChar* +bm_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, const UChar* text_range) +{ + const UChar *s, *t, *p, *end; + const UChar *tail; + + end = text_range + (target_end - target) - 1; + if (end > text_end) + end = text_end; + + tail = target_end - 1; + s = text + (target_end - target) - 1; + if (IS_NULL(reg->int_map)) { + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; + } + s += reg->map[*s]; + } + } + else { /* see int_map[] */ + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; + } + s += reg->int_map[*s]; + } + } + return (UChar* )NULL; +} + +static int +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + int** skip) + +{ + int i, len; + + if (IS_NULL(*skip)) { + *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); + if (IS_NULL(*skip)) return ONIGERR_MEMORY; + } + + len = end - s; + for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) + (*skip)[i] = len; + + for (i = len - 1; i > 0; i--) + (*skip)[s[i]] = i; + + return 0; +} + +static UChar* +bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* adjust_text, + const UChar* text_end, const UChar* text_start) +{ + const UChar *s, *t, *p; + + s = text_end - (target_end - target); + if (text_start < s) + s = text_start; + else + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + + while (s >= text) { + p = s; + t = target; + while (t < target_end && *p == *t) { + p++; t++; + } + if (t == target_end) + return (UChar* )s; + + s -= reg->int_map_backward[*s]; + s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + } + + return (UChar* )NULL; +} + +static UChar* +map_search(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* text_range) +{ + const UChar *s = text; + + while (s < text_range) { + if (map[*s]) return (UChar* )s; + + s += enclen(enc, s); + } + return (UChar* )NULL; +} + +static UChar* +map_search_backward(OnigEncoding enc, UChar map[], + const UChar* text, const UChar* adjust_text, + const UChar* text_start) +{ + const UChar *s = text_start; + + while (s >= text) { + if (map[*s]) return (UChar* )s; + + s = onigenc_get_prev_char_head(enc, adjust_text, s); + } + return (UChar* )NULL; +} + +extern int +onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, + OnigOptionType option) +{ + int r; + UChar *prev; + OnigMatchArg msa; + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n; + + THREAD_ATOMIC_END; + n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + goto start; + } + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + + MATCH_ARG_INIT(msa, option, region, at); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = at - str; + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + if (region +#ifdef USE_POSIX_API_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + } + else + r = 0; + + if (r == 0) { + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); + r = match_at(reg, str, end, +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + end, +#endif + at, prev, &msa); + } + + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC_THREAD(reg); + return r; +} + +static int +forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, + UChar* range, UChar** low, UChar** high, UChar** low_prev) +{ + UChar *p, *pprev = (UChar* )NULL; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", + (int )str, (int )end, (int )s, (int )range); +#endif + + p = s; + if (reg->dmin > 0) { + if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { + p += reg->dmin; + } + else { + UChar *q = p + reg->dmin; + while (p < q) p += enclen(reg->enc, p); + } + } + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); + break; + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_ic(reg->enc, reg->case_fold_flag, + reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search(reg->enc, reg->map, p, range); + break; + } + + if (p && p < range) { + if (p - reg->dmin < s) { + retry_gate: + pprev = p; + p += enclen(reg->enc, p); + goto retry; + } + + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + prev = (UChar* )onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) + goto retry_gate; +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) + goto retry_gate; + break; + } + } + + if (reg->dmax == 0) { + *low = p; + if (low_prev) { + if (*low > s) + *low_prev = onigenc_get_prev_char_head(reg->enc, s, p); + else + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), p); + } + } + else { + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low); + } + } + } + /* no needs to adjust *high, *high is used as range check only */ + *high = p - reg->dmin; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", + (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); +#endif + return 1; /* success */ + } + + return 0; /* fail */ +} + +static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, + int** skip)); + +#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 + +static int +backward_search_range(regex_t* reg, const UChar* str, const UChar* end, + UChar* s, const UChar* range, UChar* adjrange, + UChar** low, UChar** high) +{ + int r; + UChar *p; + + range += reg->dmin; + p = s; + + retry: + switch (reg->optimize) { + case ONIG_OPTIMIZE_EXACT: + exact_method: + p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_IC: + p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, + reg->exact, reg->exact_end, + range, adjrange, end, p); + break; + + case ONIG_OPTIMIZE_EXACT_BM: + case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + if (IS_NULL(reg->int_map_backward)) { + if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) + goto exact_method; + + r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, + &(reg->int_map_backward)); + if (r) return r; + } + p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, + end, p); + break; + + case ONIG_OPTIMIZE_MAP: + p = map_search_backward(reg->enc, reg->map, range, adjrange, p); + break; + } + + if (p) { + if (reg->sub_anchor) { + UChar* prev; + + switch (reg->sub_anchor) { + case ANCHOR_BEGIN_LINE: + if (!ON_STR_BEGIN(p)) { + prev = onigenc_get_prev_char_head(reg->enc, str, p); + if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } + } + break; + + case ANCHOR_END_LINE: + if (ON_STR_END(p)) { +#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(prev)) goto fail; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + p = prev; + goto retry; + } +#endif + } + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) { + p = onigenc_get_prev_char_head(reg->enc, adjrange, p); + if (IS_NULL(p)) goto fail; + goto retry; + } + break; + } + } + + /* no needs to adjust *high, *high is used as range check only */ + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + *low = p - reg->dmax; + *high = p - reg->dmin; + *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: low: %d, high: %d\n", + (int )(*low - str), (int )(*high - str)); +#endif + return 1; /* success */ + } + + fail: +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "backward_search_range: fail.\n"); +#endif + return 0; /* fail */ +} + + +extern int +onig_search(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) +{ + int r; + UChar *s, *prev; + OnigMatchArg msa; + const UChar *orig_start = start; +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + const UChar *orig_range = range; +#endif + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; + if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { + ONIG_STATE_INC(reg); + if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_chain_reduce(reg); + ONIG_STATE_INC(reg); + } + } + else { + int n; + + THREAD_ATOMIC_END; + n = 0; + while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { + if (++n > THREAD_PASS_LIMIT_COUNT) + return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; + THREAD_PASS; + } + goto start; + } + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", + (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + if (region +#ifdef USE_POSIX_API_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + r = onig_region_resize_clear(region, reg->num_mem + 1); + if (r) goto finish_no_msa; + } + + if (start > end || start < str) goto mismatch_no_msa; + + +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK(upper_range) \ + r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#else +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_AND_RETURN_CHECK(none) \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ + } +#else +#define MATCH_AND_RETURN_CHECK(none) \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ + + + /* anchor optimize: resume search range */ + if (reg->anchor != 0 && str < end) { + UChar *min_semi_end, *max_semi_end; + + if (reg->anchor & ANCHOR_BEGIN_POSITION) { + /* search start-position only */ + begin_position: + if (range > start) + range = start + 1; + else + range = start; + } + else if (reg->anchor & ANCHOR_BEGIN_BUF) { + /* search str-position only */ + if (range > start) { + if (start != str) goto mismatch_no_msa; + range = str + 1; + } + else { + if (range <= str) { + start = str; + range = str; + } + else + goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_END_BUF) { + min_semi_end = max_semi_end = (UChar* )end; + + end_buf: + if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) + goto mismatch_no_msa; + + if (range > start) { + if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; + if (start < end) + start = onigenc_get_right_adjust_char_head(reg->enc, str, start); + else { /* match with empty at end */ + start = onigenc_get_prev_char_head(reg->enc, str, end); + } + } + if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { + range = max_semi_end - reg->anchor_dmin + 1; + } + + if (start >= range) goto mismatch_no_msa; + } + else { + if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; + } + if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { + start = max_semi_end - reg->anchor_dmin; + start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); + } + if (range > start) goto mismatch_no_msa; + } + } + else if (reg->anchor & ANCHOR_SEMI_END_BUF) { + UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); + + max_semi_end = (UChar* )end; + if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + +#ifdef USE_CRNL_AS_LINE_TERMINATOR + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); + if (IS_NOT_NULL(pre_end) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } +#endif + if (min_semi_end > str && start <= min_semi_end) { + goto end_buf; + } + } + else { + min_semi_end = (UChar* )end; + goto end_buf; + } + } + else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { + goto begin_position; + } + } + else if (str == end) { /* empty string */ + static const UChar* address_for_empty_string = (UChar* )""; + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search: empty string.\n"); +#endif + + if (reg->threshold_len == 0) { + start = end = str = address_for_empty_string; + s = (UChar* )start; + prev = (UChar* )NULL; + + MATCH_ARG_INIT(msa, option, region, start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + msa.state_check_buff = (void* )0; + msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ +#endif + MATCH_AND_RETURN_CHECK(end); + goto mismatch; + } + goto mismatch_no_msa; + } + +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", + (int )(end - str), (int )(start - str), (int )(range - str)); +#endif + + MATCH_ARG_INIT(msa, option, region, orig_start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = (MIN(start, range) - str); + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif + + s = (UChar* )start; + if (range > start) { /* forward search */ + if (s > str) + prev = onigenc_get_prev_char_head(reg->enc, str, s); + else + prev = (UChar* )NULL; + + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *sch_range, *low, *high, *low_prev; + + sch_range = (UChar* )range; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_range = (UChar* )end; + else { + sch_range += reg->dmax; + if (sch_range > end) sch_range = (UChar* )end; + } + } + + if ((end - start) < reg->threshold_len) + goto mismatch; + + if (reg->dmax != ONIG_INFINITE_DISTANCE) { + do { + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, &low_prev)) goto mismatch; + if (s < low) { + s = low; + prev = low_prev; + } + while (s <= high) { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s); + } + } while (s < range); + goto mismatch; + } + else { /* check only. */ + if (! forward_search_range(reg, str, end, s, sch_range, + &low, &high, (UChar** )NULL)) goto mismatch; + + if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + do { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s); + + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { + prev = s; + s += enclen(reg->enc, s); + } + } while (s < range); + goto mismatch; + } + } + } + + do { + MATCH_AND_RETURN_CHECK(orig_range); + prev = s; + s += enclen(reg->enc, s); + } while (s < range); + + if (s == range) { /* because empty match with /$/. */ + MATCH_AND_RETURN_CHECK(orig_range); + } + } + else { /* backward search */ +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + if (orig_start < end) + orig_start += enclen(reg->enc, orig_start); /* is upper range */ +#endif + + if (reg->optimize != ONIG_OPTIMIZE_NONE) { + UChar *low, *high, *adjrange, *sch_start; + + if (range < end) + adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range); + else + adjrange = (UChar* )end; + + if (reg->dmax != ONIG_INFINITE_DISTANCE && + (end - range) >= reg->threshold_len) { + do { + sch_start = s + reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) + goto mismatch; + + if (s > high) + s = high; + + while (s >= low) { + prev = onigenc_get_prev_char_head(reg->enc, str, s); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } + } while (s >= range); + goto mismatch; + } + else { /* check only. */ + if ((end - range) < reg->threshold_len) goto mismatch; + + sch_start = s; + if (reg->dmax != 0) { + if (reg->dmax == ONIG_INFINITE_DISTANCE) + sch_start = (UChar* )end; + else { + sch_start += reg->dmax; + if (sch_start > end) sch_start = (UChar* )end; + else + sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, + start, sch_start); + } + } + if (backward_search_range(reg, str, end, sch_start, range, adjrange, + &low, &high) <= 0) goto mismatch; + } + } + + do { + prev = onigenc_get_prev_char_head(reg->enc, str, s); + MATCH_AND_RETURN_CHECK(orig_start); + s = prev; + } while (s >= range); + } + + mismatch: +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(reg->options)) { + if (msa.best_len >= 0) { + s = msa.best_s; + goto match; + } + } +#endif + r = ONIG_MISMATCH; + + finish: + MATCH_ARG_FREE(msa); + ONIG_STATE_DEC_THREAD(reg); + + /* If result is mismatch and no FIND_NOT_EMPTY option, + then the region is not setted in match_at(). */ + if (IS_FIND_NOT_EMPTY(reg->options) && region +#ifdef USE_POSIX_API_REGION_OPTION + && !IS_POSIX_REGION(option) +#endif + ) { + onig_region_clear(region); + } + +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + mismatch_no_msa: + r = ONIG_MISMATCH; + finish_no_msa: + ONIG_STATE_DEC_THREAD(reg); +#ifdef ONIG_DEBUG + if (r != ONIG_MISMATCH) + fprintf(stderr, "onig_search: error %d\n", r); +#endif + return r; + + match: + ONIG_STATE_DEC_THREAD(reg); + MATCH_ARG_FREE(msa); + return s - str; +} + +extern OnigEncoding +onig_get_encoding(regex_t* reg) +{ + return reg->enc; +} + +extern OnigOptionType +onig_get_options(regex_t* reg) +{ + return reg->options; +} + +extern OnigCaseFoldType +onig_get_case_fold_flag(regex_t* reg) +{ + return reg->case_fold_flag; +} + +extern OnigSyntaxType* +onig_get_syntax(regex_t* reg) +{ + return reg->syntax; +} + +extern int +onig_number_of_captures(regex_t* reg) +{ + return reg->num_mem; +} + +extern int +onig_number_of_capture_histories(regex_t* reg) +{ +#ifdef USE_CAPTURE_HISTORY + int i, n; + + n = 0; + for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { + if (BIT_STATUS_AT(reg->capture_history, i) != 0) + n++; + } + return n; +#else + return 0; +#endif +} + +extern void +onig_copy_encoding(OnigEncoding to, OnigEncoding from) +{ + *to = *from; +} + diff --git a/oniguruma/regext.c b/oniguruma/regext.c new file mode 100644 index 0000000..b1b957b --- /dev/null +++ b/oniguruma/regext.c @@ -0,0 +1,222 @@ +/********************************************************************** + regext.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +static void +conv_ext0be32(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = '\0'; + *conv++ = '\0'; + *conv++ = '\0'; + *conv++ = *s++; + } +} + +static void +conv_ext0le32(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = *s++; + *conv++ = '\0'; + *conv++ = '\0'; + *conv++ = '\0'; + } +} + +static void +conv_ext0be(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = '\0'; + *conv++ = *s++; + } +} + +static void +conv_ext0le(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = *s++; + *conv++ = '\0'; + } +} + +static void +conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = s[3]; + *conv++ = s[2]; + *conv++ = s[1]; + *conv++ = s[0]; + s += 4; + } +} + +static void +conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv) +{ + while (s < end) { + *conv++ = s[1]; + *conv++ = s[0]; + s += 2; + } +} + +static int +conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end, + UChar** conv, UChar** conv_end) +{ + int len = end - s; + + if (to == ONIG_ENCODING_UTF16_BE) { + if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { + *conv = (UChar* )xmalloc(len * 2); + CHECK_NULL_RETURN_MEMERR(*conv); + *conv_end = *conv + (len * 2); + conv_ext0be(s, end, *conv); + return 0; + } + else if (from == ONIG_ENCODING_UTF16_LE) { + swap16: + *conv = (UChar* )xmalloc(len); + CHECK_NULL_RETURN_MEMERR(*conv); + *conv_end = *conv + len; + conv_swap2bytes(s, end, *conv); + return 0; + } + } + else if (to == ONIG_ENCODING_UTF16_LE) { + if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { + *conv = (UChar* )xmalloc(len * 2); + CHECK_NULL_RETURN_MEMERR(*conv); + *conv_end = *conv + (len * 2); + conv_ext0le(s, end, *conv); + return 0; + } + else if (from == ONIG_ENCODING_UTF16_BE) { + goto swap16; + } + } + if (to == ONIG_ENCODING_UTF32_BE) { + if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { + *conv = (UChar* )xmalloc(len * 4); + CHECK_NULL_RETURN_MEMERR(*conv); + *conv_end = *conv + (len * 4); + conv_ext0be32(s, end, *conv); + return 0; + } + else if (from == ONIG_ENCODING_UTF32_LE) { + swap32: + *conv = (UChar* )xmalloc(len); + CHECK_NULL_RETURN_MEMERR(*conv); + *conv_end = *conv + len; + conv_swap4bytes(s, end, *conv); + return 0; + } + } + else if (to == ONIG_ENCODING_UTF32_LE) { + if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) { + *conv = (UChar* )xmalloc(len * 4); + CHECK_NULL_RETURN_MEMERR(*conv); + *conv_end = *conv + (len * 4); + conv_ext0le32(s, end, *conv); + return 0; + } + else if (from == ONIG_ENCODING_UTF32_BE) { + goto swap32; + } + } + + return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION; +} + +extern int +onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) +{ + int r; + UChar *cpat, *cpat_end; + + if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + + if (ci->pattern_enc != ci->target_enc) { + r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end, + &cpat, &cpat_end); + if (r) return r; + } + else { + cpat = (UChar* )pattern; + cpat_end = (UChar* )pattern_end; + } + + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) { + r = ONIGERR_MEMORY; + goto err2; + } + + r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc, + ci->syntax); + if (r) goto err; + + r = onig_compile(*reg, cpat, cpat_end, einfo); + if (r) { + err: + onig_free(*reg); + *reg = NULL; + } + + err2: + if (cpat != pattern) xfree(cpat); + + return r; +} + +#ifdef USE_RECOMPILE_API +extern int +onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end, + OnigCompileInfo* ci, OnigErrorInfo* einfo) +{ + int r; + regex_t *new_reg; + + r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo); + if (r) return r; + if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { + onig_transfer(reg, new_reg); + } + else { + onig_chain_link_add(reg, new_reg); + } + return 0; +} +#endif diff --git a/oniguruma/reggnu.c b/oniguruma/reggnu.c new file mode 100644 index 0000000..4bd18c4 --- /dev/null +++ b/oniguruma/reggnu.c @@ -0,0 +1,167 @@ +/********************************************************************** + reggnu.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#ifndef ONIGGNU_H +#include "oniggnu.h" +#endif + +extern void +re_free_registers(OnigRegion* r) +{ + /* 0: don't free self */ + onig_region_free(r, 0); +} + +extern int +re_adjust_startpos(regex_t* reg, const char* string, int size, + int startpos, int range) +{ + if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) { + UChar *p; + UChar *s = (UChar* )string + startpos; + + if (range > 0) { + p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s); + } + else { + p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s); + } + return p - (UChar* )string; + } + + return startpos; +} + +extern int +re_match(regex_t* reg, const char* str, int size, int pos, + struct re_registers* regs) +{ + return onig_match(reg, (UChar* )str, (UChar* )(str + size), + (UChar* )(str + pos), regs, ONIG_OPTION_NONE); +} + +extern int +re_search(regex_t* bufp, const char* string, int size, int startpos, int range, + struct re_registers* regs) +{ + return onig_search(bufp, (UChar* )string, (UChar* )(string + size), + (UChar* )(string + startpos), + (UChar* )(string + startpos + range), + regs, ONIG_OPTION_NONE); +} + +extern int +re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) +{ + int r; + OnigErrorInfo einfo; + + r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo); + if (r != ONIG_NORMAL) { + if (IS_NOT_NULL(ebuf)) + (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); + } + + return r; +} + +#ifdef USE_RECOMPILE_API +extern int +re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) +{ + int r; + OnigErrorInfo einfo; + OnigEncoding enc; + + /* I think encoding and options should be arguments of this function. + But this is adapted to present re.c. (2002/11/29) + */ + enc = OnigEncDefaultCharEncoding; + + r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size), + reg->options, enc, OnigDefaultSyntax, &einfo); + if (r != ONIG_NORMAL) { + if (IS_NOT_NULL(ebuf)) + (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo); + } + return r; +} +#endif + +extern void +re_free_pattern(regex_t* reg) +{ + onig_free(reg); +} + +extern int +re_alloc_pattern(regex_t** reg) +{ + *reg = (regex_t* )xmalloc(sizeof(regex_t)); + if (IS_NULL(*reg)) return ONIGERR_MEMORY; + + return onig_reg_init(*reg, ONIG_OPTION_DEFAULT, + ONIGENC_CASE_FOLD_DEFAULT, + OnigEncDefaultCharEncoding, + OnigDefaultSyntax); +} + +extern void +re_set_casetable(const char* table) +{ + onigenc_set_default_caseconv_table((UChar* )table); +} + +extern void +re_mbcinit(int mb_code) +{ + OnigEncoding enc; + + switch (mb_code) { + case RE_MBCTYPE_ASCII: + enc = ONIG_ENCODING_ASCII; + break; + case RE_MBCTYPE_EUC: + enc = ONIG_ENCODING_EUC_JP; + break; + case RE_MBCTYPE_SJIS: + enc = ONIG_ENCODING_SJIS; + break; + case RE_MBCTYPE_UTF8: + enc = ONIG_ENCODING_UTF8; + break; + default: + return ; + break; + } + + onigenc_set_default_encoding(enc); +} diff --git a/oniguruma/regint.h b/oniguruma/regint.h new file mode 100644 index 0000000..a0ce491 --- /dev/null +++ b/oniguruma/regint.h @@ -0,0 +1,808 @@ +#ifndef REGINT_H +#define REGINT_H +/********************************************************************** + regint.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* for debug */ +/* #define ONIG_DEBUG_PARSE_TREE */ +/* #define ONIG_DEBUG_COMPILE */ +/* #define ONIG_DEBUG_SEARCH */ +/* #define ONIG_DEBUG_MATCH */ +/* #define ONIG_DONT_OPTIMIZE */ + +/* for byte-code statistical data. */ +/* #define ONIG_DEBUG_STATISTICS */ + +#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ + defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ + defined(ONIG_DEBUG_STATISTICS) +#ifndef ONIG_DEBUG +#define ONIG_DEBUG +#endif +#endif + +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ + (defined(__ppc__) && defined(__APPLE__)) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(__mc68020__) +#define PLATFORM_UNALIGNED_WORD_ACCESS +#endif + +/* config */ +/* spec. config */ +#define USE_NAMED_GROUP +#define USE_SUBEXP_CALL +#define USE_BACKREF_WITH_LEVEL /* \k, \k */ +#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ +#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ +#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +/* #define USE_RECOMPILE_API */ +/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ + +/* internal config */ +#define USE_PARSE_TREE_NODE_RECYCLE +#define USE_OP_PUSH_OR_JUMP_EXACT +#define USE_QTFR_PEEK_NEXT +#define USE_ST_LIBRARY +#define USE_SHARED_CCLASS_TABLE + +#define INIT_MATCH_STACK_SIZE 160 +#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ + +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif + +/* */ +/* escape other system UChar definition */ +#include "config.h" +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif + +#define USE_WORD_BEGIN_END /* "\<", "\>" */ +#define USE_CAPTURE_HISTORY +#define USE_VARIABLE_META_CHARS +#define USE_POSIX_API_REGION_OPTION +#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ + +/* #define USE_MULTI_THREAD_SYSTEM */ +#define THREAD_SYSTEM_INIT /* depend on thread system */ +#define THREAD_SYSTEM_END /* depend on thread system */ +#define THREAD_ATOMIC_START /* depend on thread system */ +#define THREAD_ATOMIC_END /* depend on thread system */ +#define THREAD_PASS /* depend on thread system */ +#define xmalloc malloc +#define xrealloc realloc +#define xcalloc calloc +#define xfree free + +#define CHECK_INTERRUPT_IN_MATCH_AT + +#define st_init_table onig_st_init_table +#define st_init_table_with_size onig_st_init_table_with_size +#define st_init_numtable onig_st_init_numtable +#define st_init_numtable_with_size onig_st_init_numtable_with_size +#define st_init_strtable onig_st_init_strtable +#define st_init_strtable_with_size onig_st_init_strtable_with_size +#define st_delete onig_st_delete +#define st_delete_safe onig_st_delete_safe +#define st_insert onig_st_insert +#define st_lookup onig_st_lookup +#define st_foreach onig_st_foreach +#define st_add_direct onig_st_add_direct +#define st_free_table onig_st_free_table +#define st_cleanup_safe onig_st_cleanup_safe +#define st_copy onig_st_copy +#define st_nothing_key_clone onig_st_nothing_key_clone +#define st_nothing_key_free onig_st_nothing_key_free +/* */ +#define onig_st_is_member st_is_member + +#define STATE_CHECK_STRING_THRESHOLD_LEN 7 +#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 + +#define THREAD_PASS_LIMIT_COUNT 8 +#define xmemset memset +#define xmemcpy memcpy +#define xmemmove memmove + +#if defined(_WIN32) && !defined(__GNUC__) +#define xalloca _alloca +#define xvsnprintf _vsnprintf +#else +#define xalloca alloca +#define xvsnprintf vsnprintf +#endif + + +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) +#define ONIG_STATE_INC(reg) (reg)->state++ +#define ONIG_STATE_DEC(reg) (reg)->state-- + +#define ONIG_STATE_INC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state++;\ + THREAD_ATOMIC_END;\ +} while(0) +#define ONIG_STATE_DEC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state--;\ + THREAD_ATOMIC_END;\ +} while(0) +#else +#define ONIG_STATE_INC(reg) /* Nothing */ +#define ONIG_STATE_DEC(reg) /* Nothing */ +#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ +#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ + +#ifdef HAVE_STDLIB_H +#include +#endif + +#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__) +#include +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#include +#ifdef HAVE_SYS_TYPES_H +#ifndef __BORLANDC__ +#include +#endif +#endif + +#ifdef __BORLANDC__ +#include +#endif + +#ifdef ONIG_DEBUG +# include +#endif + +#include "regenc.h" + +#ifdef MIN +#undef MIN +#endif +#ifdef MAX +#undef MAX +#endif +#define MIN(a,b) (((a)>(b))?(b):(a)) +#define MAX(a,b) (((a)<(b))?(b):(a)) + +#define IS_NULL(p) (((void*)(p)) == (void*)0) +#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) +#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL +#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY +#define NULL_UCHARP ((UChar* )0) + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS + +#define PLATFORM_GET_INC(val,p,type) do{\ + val = *(type* )p;\ + (p) += sizeof(type);\ +} while(0) + +#else + +#define PLATFORM_GET_INC(val,p,type) do{\ + xmemcpy(&val, (p), sizeof(type));\ + (p) += sizeof(type);\ +} while(0) + +/* sizeof(OnigCodePoint) */ +#define WORD_ALIGNMENT_SIZE SIZEOF_LONG + +#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ + (pad_size) = WORD_ALIGNMENT_SIZE \ + - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ + if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ +} while (0) + +#define ALIGNMENT_RIGHT(addr) do {\ + (addr) += (WORD_ALIGNMENT_SIZE - 1);\ + (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ +} while (0) + +#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ + +/* stack pop level */ +#define STACK_POP_LEVEL_FREE 0 +#define STACK_POP_LEVEL_MEM_START 1 +#define STACK_POP_LEVEL_ALL 2 + +/* optimize flags */ +#define ONIG_OPTIMIZE_NONE 0 +#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ +#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ +#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ +#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ +#define ONIG_OPTIMIZE_MAP 5 /* char map */ + +/* bit status */ +typedef unsigned int BitStatusType; + +#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) +#define BIT_STATUS_CLEAR(stats) (stats) = 0 +#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) +#define BIT_STATUS_AT(stats,n) \ + ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) + +#define BIT_STATUS_ON_AT(stats,n) do {\ + if ((n) < (int )BIT_STATUS_BITS_NUM) \ + (stats) |= (1 << (n));\ + else\ + (stats) |= 1;\ +} while (0) + +#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ + if ((n) < (int )BIT_STATUS_BITS_NUM)\ + (stats) |= (1 << (n));\ +} while (0) + + +#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) + +#define DIGITVAL(code) ((code) - '0') +#define ODIGITVAL(code) DIGITVAL(code) +#define XDIGITVAL(enc,code) \ + (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ + : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) + +#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) +#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) +#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) +#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) +#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) +#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) +#define IS_FIND_CONDITION(option) ((option) & \ + (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) +#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) +#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) +#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) + +/* OP_SET_OPTION is required for these options. +#define IS_DYNAMIC_OPTION(option) \ + (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) +*/ +/* ignore-case and multibyte status are included in compiled code. */ +#define IS_DYNAMIC_OPTION(option) 0 + +#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ + ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) + +#define REPEAT_INFINITE -1 +#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) + +/* bitset */ +#define BITS_PER_BYTE 8 +#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) +#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE) +#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) + +#ifdef PLATFORM_UNALIGNED_WORD_ACCESS +typedef unsigned int Bits; +#else +typedef unsigned char Bits; +#endif +typedef Bits BitSet[BITSET_SIZE]; +typedef Bits* BitSetRef; + +#define SIZE_BITSET sizeof(BitSet) + +#define BITSET_CLEAR(bs) do {\ + int i;\ + for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ +} while (0) + +#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] +#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM)) + +#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) +#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) +#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) +#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) + +/* bytes buffer */ +typedef struct _BBuf { + UChar* p; + unsigned int used; + unsigned int alloc; +} BBuf; + +#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) + +#define BBUF_SIZE_INC(buf,inc) do{\ + (buf)->alloc += (inc);\ + (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ +} while (0) + +#define BBUF_EXPAND(buf,low) do{\ + do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ + (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ +} while (0) + +#define BBUF_ENSURE_SIZE(buf,size) do{\ + unsigned int new_alloc = (buf)->alloc;\ + while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ + if ((buf)->alloc != new_alloc) {\ + (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\ + if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ + (buf)->alloc = new_alloc;\ + }\ +} while (0) + +#define BBUF_WRITE(buf,pos,bytes,n) do{\ + int used = (pos) + (n);\ + if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + xmemcpy((buf)->p + (pos), (bytes), (n));\ + if ((buf)->used < (unsigned int )used) (buf)->used = used;\ +} while (0) + +#define BBUF_WRITE1(buf,pos,byte) do{\ + int used = (pos) + 1;\ + if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + (buf)->p[(pos)] = (byte);\ + if ((buf)->used < (unsigned int )used) (buf)->used = used;\ +} while (0) + +#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) +#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) +#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) +#define BBUF_GET_OFFSET_POS(buf) ((buf)->used) + +/* from < to */ +#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ + if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ + xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ + if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ +} while (0) + +/* from > to */ +#define BBUF_MOVE_LEFT(buf,from,to,n) do {\ + xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ +} while (0) + +/* from > to */ +#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ + xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ + (buf)->used -= (from - to);\ +} while (0) + +#define BBUF_INSERT(buf,pos,bytes,n) do {\ + if (pos >= (buf)->used) {\ + BBUF_WRITE(buf,pos,bytes,n);\ + }\ + else {\ + BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ + xmemcpy((buf)->p + (pos), (bytes), (n));\ + }\ +} while (0) + +#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] + + +#define ANCHOR_BEGIN_BUF (1<<0) +#define ANCHOR_BEGIN_LINE (1<<1) +#define ANCHOR_BEGIN_POSITION (1<<2) +#define ANCHOR_END_BUF (1<<3) +#define ANCHOR_SEMI_END_BUF (1<<4) +#define ANCHOR_END_LINE (1<<5) + +#define ANCHOR_WORD_BOUND (1<<6) +#define ANCHOR_NOT_WORD_BOUND (1<<7) +#define ANCHOR_WORD_BEGIN (1<<8) +#define ANCHOR_WORD_END (1<<9) +#define ANCHOR_PREC_READ (1<<10) +#define ANCHOR_PREC_READ_NOT (1<<11) +#define ANCHOR_LOOK_BEHIND (1<<12) +#define ANCHOR_LOOK_BEHIND_NOT (1<<13) + +#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ +#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ + +/* operation code */ +enum OpCode { + OP_FINISH = 0, /* matching process terminator (no more alternative) */ + OP_END = 1, /* pattern code terminator (success end) */ + + OP_EXACT1 = 2, /* single byte, N = 1 */ + OP_EXACT2, /* single byte, N = 2 */ + OP_EXACT3, /* single byte, N = 3 */ + OP_EXACT4, /* single byte, N = 4 */ + OP_EXACT5, /* single byte, N = 5 */ + OP_EXACTN, /* single byte */ + OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ + OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ + OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ + OP_EXACTMB2N, /* mb-length = 2 */ + OP_EXACTMB3N, /* mb-length = 3 */ + OP_EXACTMBN, /* other length */ + + OP_EXACT1_IC, /* single byte, N = 1, ignore case */ + OP_EXACTN_IC, /* single byte, ignore case */ + + OP_CCLASS, + OP_CCLASS_MB, + OP_CCLASS_MIX, + OP_CCLASS_NOT, + OP_CCLASS_MB_NOT, + OP_CCLASS_MIX_NOT, + OP_CCLASS_NODE, /* pointer to CClassNode node */ + + OP_ANYCHAR, /* "." */ + OP_ANYCHAR_ML, /* "." multi-line */ + OP_ANYCHAR_STAR, /* ".*" */ + OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ + OP_ANYCHAR_STAR_PEEK_NEXT, + OP_ANYCHAR_ML_STAR_PEEK_NEXT, + + OP_WORD, + OP_NOT_WORD, + OP_WORD_BOUND, + OP_NOT_WORD_BOUND, + OP_WORD_BEGIN, + OP_WORD_END, + + OP_BEGIN_BUF, + OP_END_BUF, + OP_BEGIN_LINE, + OP_END_LINE, + OP_SEMI_END_BUF, + OP_BEGIN_POSITION, + + OP_BACKREF1, + OP_BACKREF2, + OP_BACKREFN, + OP_BACKREFN_IC, + OP_BACKREF_MULTI, + OP_BACKREF_MULTI_IC, + OP_BACKREF_WITH_LEVEL, /* \k, \k */ + + OP_MEMORY_START, + OP_MEMORY_START_PUSH, /* push back-tracker to stack */ + OP_MEMORY_END_PUSH, /* push back-tracker to stack */ + OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ + OP_MEMORY_END, + OP_MEMORY_END_REC, /* push marker to stack */ + + OP_FAIL, /* pop stack and move */ + OP_JUMP, + OP_PUSH, + OP_POP, + OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ + OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ + OP_REPEAT, /* {n,m} */ + OP_REPEAT_NG, /* {n,m}? (non greedy) */ + OP_REPEAT_INC, + OP_REPEAT_INC_NG, /* non greedy */ + OP_REPEAT_INC_SG, /* search and get in stack */ + OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ + OP_NULL_CHECK_START, /* null loop checker start */ + OP_NULL_CHECK_END, /* null loop checker end */ + OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ + OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ + + OP_PUSH_POS, /* (?=...) start */ + OP_POP_POS, /* (?=...) end */ + OP_PUSH_POS_NOT, /* (?!...) start */ + OP_FAIL_POS, /* (?!...) end */ + OP_PUSH_STOP_BT, /* (?>...) start */ + OP_POP_STOP_BT, /* (?>...) end */ + OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ + OP_PUSH_LOOK_BEHIND_NOT, /* (? */ + OP_RETURN, + + OP_STATE_CHECK_PUSH, /* combination explosion check and push */ + OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ + OP_STATE_CHECK, /* check only */ + OP_STATE_CHECK_ANYCHAR_STAR, + OP_STATE_CHECK_ANYCHAR_ML_STAR, + + /* no need: IS_DYNAMIC_OPTION() == 0 */ + OP_SET_OPTION_PUSH, /* set option and push recover option */ + OP_SET_OPTION /* set option */ +}; + +typedef int RelAddrType; +typedef int AbsAddrType; +typedef int LengthType; +typedef int RepeatNumType; +typedef short int MemNumType; +typedef short int StateCheckNumType; +typedef void* PointerType; + +#define SIZE_OPCODE 1 +#define SIZE_RELADDR sizeof(RelAddrType) +#define SIZE_ABSADDR sizeof(AbsAddrType) +#define SIZE_LENGTH sizeof(LengthType) +#define SIZE_MEMNUM sizeof(MemNumType) +#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType) +#define SIZE_REPEATNUM sizeof(RepeatNumType) +#define SIZE_OPTION sizeof(OnigOptionType) +#define SIZE_CODE_POINT sizeof(OnigCodePoint) +#define SIZE_POINTER sizeof(PointerType) + + +#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) +#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) +#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) +#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) +#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) +#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) +#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) +#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) + +/* code point's address must be aligned address. */ +#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) +#define GET_BYTE_INC(byte,p) do{\ + byte = *(p);\ + (p)++;\ +} while(0) + + +/* op-code + arg size */ +#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE +#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) +#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_POP SIZE_OPCODE +#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) +#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) +#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_PUSH_POS SIZE_OPCODE +#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_POP_POS SIZE_OPCODE +#define SIZE_OP_FAIL_POS SIZE_OPCODE +#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) +#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) +#define SIZE_OP_FAIL SIZE_OPCODE +#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE +#define SIZE_OP_POP_STOP_BT SIZE_OPCODE +#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) +#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) +#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE +#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) +#define SIZE_OP_RETURN SIZE_OPCODE + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#endif + +#define MC_ESC(syn) (syn)->meta_char_table.esc +#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar +#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime +#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time +#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time +#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime + +#define IS_MC_ESC_CODE(code, syn) \ + ((code) == MC_ESC(syn) && \ + !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) + + +#define SYN_POSIX_COMMON_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ + ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ + ONIG_SYN_OP_LINE_ANCHOR | \ + ONIG_SYN_OP_ESC_CONTROL_CHARS ) + +#define SYN_GNU_REGEX_OP \ + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ + ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ + ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ + ONIG_SYN_OP_VBAR_ALT | \ + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ + ONIG_SYN_OP_QMARK_ZERO_ONE | \ + ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ + ONIG_SYN_OP_ESC_W_WORD | \ + ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ + ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ + ONIG_SYN_OP_LINE_ANCHOR ) + +#define SYN_GNU_REGEX_BV \ + ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ + ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ + ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + + +#define NCCLASS_FLAGS(cc) ((cc)->flags) +#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) +#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) +#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) + +/* cclass node */ +#define FLAG_NCCLASS_NOT (1<<0) +#define FLAG_NCCLASS_SHARE (1<<1) + +#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) +#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) +#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) +#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) + +typedef struct { + int type; + /* struct _Node* next; */ + /* unsigned int flags; */ +} NodeBase; + +typedef struct { + NodeBase base; + unsigned int flags; + BitSet bs; + BBuf* mbuf; /* multi-byte info or NULL */ +} CClassNode; + +typedef long OnigStackIndex; + +typedef struct _OnigStackType { + unsigned int type; + union { + struct { + UChar *pcode; /* byte code position */ + UChar *pstr; /* string position */ + UChar *pstr_prev; /* previous char position of pstr */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + unsigned int state_check; +#endif + } state; + struct { + int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ + UChar *pcode; /* byte code position (head of repeated target) */ + int num; /* repeat id */ + } repeat; + struct { + OnigStackIndex si; /* index of stack */ + } repeat_inc; + struct { + int num; /* memory num */ + UChar *pstr; /* start/end position */ + /* Following information is setted, if this stack type is MEM-START */ + OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ + OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ + } mem; + struct { + int num; /* null check id */ + UChar *pstr; /* start position */ + } null_check; +#ifdef USE_SUBEXP_CALL + struct { + UChar *ret_addr; /* byte code position */ + int num; /* null check id */ + UChar *pstr; /* string position */ + } call_frame; +#endif + } u; +} OnigStackType; + +typedef struct { + void* stack_p; + int stack_n; + OnigOptionType options; + OnigRegion* region; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +#ifdef USE_COMBINATION_EXPLOSION_CHECK + void* state_check_buff; + int state_check_buff_size; +#endif +} OnigMatchArg; + + +#define IS_CODE_SB_WORD(enc,code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) + +#ifdef ONIG_DEBUG + +typedef struct { + short int opcode; + char* name; + short int arg_type; +} OnigOpInfoType; + +extern OnigOpInfoType OnigOpInfo[]; + +extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc)); + +#ifdef ONIG_DEBUG_STATISTICS +extern void onig_statistics_init P_((void)); +extern void onig_print_statistics P_((FILE* f)); +#endif +#endif + +extern UChar* onig_error_code_to_format P_((int code)); +extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); +extern int onig_bbuf_init P_((BBuf* buf, int size)); +extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); +extern void onig_chain_reduce P_((regex_t* reg)); +extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); +extern void onig_transfer P_((regex_t* to, regex_t* from)); +extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc)); + +/* strend hash */ +typedef void hash_table_type; +typedef unsigned long hash_data_type; + +extern hash_table_type* onig_st_init_strend_table_with_size P_((int size)); +extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); +extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value)); + +/* encoding property management */ +#define PROPERTY_LIST_ADD_PROP(Name, CR) \ + r = onigenc_property_list_add_property((UChar* )Name, CR,\ + &PropertyNameTable, &PropertyList, &PropertyListNum,\ + &PropertyListSize);\ + if (r != 0) goto end + +#define PROPERTY_LIST_INIT_CHECK \ + if (PropertyInited == 0) {\ + int r = onigenc_property_list_init(init_property_list);\ + if (r != 0) return r;\ + } + +extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize)); + +typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); + +extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)); + +#endif /* REGINT_H */ diff --git a/oniguruma/regparse.c b/oniguruma/regparse.c new file mode 100644 index 0000000..0113130 --- /dev/null +++ b/oniguruma/regparse.c @@ -0,0 +1,5552 @@ +/********************************************************************** + regparse.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regparse.h" +#include "st.h" + +#define WARN_BUFSIZE 256 + +#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + + +OnigSyntaxType OnigSyntaxRuby = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_RUBY | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | + ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | + ONIG_SYN_OP2_ESC_H_XDIGIT ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | + ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | + ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | + ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | + ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; + +extern void onig_null_warn(const char* s ARG_UNUSED) { } + +#ifdef DEFAULT_WARN_FUNCTION +static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; +#else +static OnigWarnFunc onig_warn = onig_null_warn; +#endif + +#ifdef DEFAULT_VERB_WARN_FUNCTION +static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION; +#else +static OnigWarnFunc onig_verb_warn = onig_null_warn; +#endif + +extern void onig_set_warn_func(OnigWarnFunc f) +{ + onig_warn = f; +} + +extern void onig_set_verb_warn_func(OnigWarnFunc f) +{ + onig_verb_warn = f; +} + +static void +bbuf_free(BBuf* bbuf) +{ + if (IS_NOT_NULL(bbuf)) { + if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p); + xfree(bbuf); + } +} + +static int +bbuf_clone(BBuf** rto, BBuf* from) +{ + int r; + BBuf *to; + + *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_MEMERR(to); + r = BBUF_INIT(to, from->alloc); + if (r != 0) return r; + to->used = from->used; + xmemcpy(to->p, from->p, from->used); + return 0; +} + +#define BACKREF_REL_TO_ABS(rel_no, env) \ + ((env)->num_mem + 1 + (rel_no)) + +#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) + +#define MBCODE_START_POS(enc) \ + (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) + +#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ + add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) + +#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ + if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ + r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ + if (r) return r;\ + }\ +} while (0) + + +#define BITSET_IS_EMPTY(bs,empty) do {\ + int i;\ + empty = 1;\ + for (i = 0; i < (int )BITSET_SIZE; i++) {\ + if ((bs)[i] != 0) {\ + empty = 0; break;\ + }\ + }\ +} while (0) + +static void +bitset_set_range(BitSetRef bs, int from, int to) +{ + int i; + for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { + BITSET_SET_BIT(bs, i); + } +} + +#if 0 +static void +bitset_set_all(BitSetRef bs) +{ + int i; + for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); } +} +#endif + +static void +bitset_invert(BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } +} + +static void +bitset_invert_to(BitSetRef from, BitSetRef to) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } +} + +static void +bitset_and(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } +} + +static void +bitset_or(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } +} + +static void +bitset_copy(BitSetRef dest, BitSetRef bs) +{ + int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } +} + +extern int +onig_strncmp(const UChar* s1, const UChar* s2, int n) +{ + int x; + + while (n-- > 0) { + x = *s2++ - *s1++; + if (x) return x; + } + return 0; +} + +extern void +onig_strcpy(UChar* dest, const UChar* src, const UChar* end) +{ + int len = end - src; + if (len > 0) { + xmemcpy(dest, src, len); + dest[len] = (UChar )0; + } +} + +#ifdef USE_NAMED_GROUP +static UChar* +strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) +{ + int slen, term_len, i; + UChar *r; + + slen = end - s; + term_len = ONIGENC_MBC_MINLEN(enc); + + r = (UChar* )xmalloc(slen + term_len); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, slen); + + for (i = 0; i < term_len; i++) + r[slen + i] = (UChar )0; + + return r; +} +#endif + +/* scan pattern methods */ +#define PEND_VALUE 0 + +#define PFETCH_READY UChar* pfetch_prev +#define PEND (p < end ? 0 : 1) +#define PUNFETCH p = pfetch_prev +#define PINC do { \ + pfetch_prev = p; \ + p += ONIGENC_MBC_ENC_LEN(enc, p); \ +} while (0) +#define PFETCH(c) do { \ + c = ONIGENC_MBC_TO_CODE(enc, p, end); \ + pfetch_prev = p; \ + p += ONIGENC_MBC_ENC_LEN(enc, p); \ +} while (0) + +#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) +#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) + +static UChar* +strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, + int capa) +{ + UChar* r; + + if (dest) + r = (UChar* )xrealloc(dest, capa + 1); + else + r = (UChar* )xmalloc(capa + 1); + + CHECK_NULL_RETURN(r); + onig_strcpy(r + (dest_end - dest), src, src_end); + return r; +} + +/* dest on static area */ +static UChar* +strcat_capa_from_static(UChar* dest, UChar* dest_end, + const UChar* src, const UChar* src_end, int capa) +{ + UChar* r; + + r = (UChar* )xmalloc(capa + 1); + CHECK_NULL_RETURN(r); + onig_strcpy(r, dest, dest_end); + onig_strcpy(r + (dest_end - dest), src, src_end); + return r; +} + + +#ifdef USE_ST_LIBRARY + +typedef struct { + UChar* s; + UChar* end; +} st_str_end_key; + +static int +str_end_cmp(st_str_end_key* x, st_str_end_key* y) +{ + UChar *p, *q; + int c; + + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +str_end_hash(st_str_end_key* x) +{ + UChar *p; + int val = 0; + + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} + +extern hash_table_type* +onig_st_init_strend_table_with_size(int size) +{ + static struct st_hash_type hashType = { + str_end_cmp, + str_end_hash, + }; + + return (hash_table_type* ) + onig_st_init_table_with_size(&hashType, size); +} + +extern int +onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, + const UChar* end_key, hash_data_type *value) +{ + st_str_end_key key; + + key.s = (UChar* )str_key; + key.end = (UChar* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +extern int +onig_st_insert_strend(hash_table_type* table, const UChar* str_key, + const UChar* end_key, hash_data_type value) +{ + st_str_end_key* key; + int result; + + key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); + key->s = (UChar* )str_key; + key->end = (UChar* )end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + +#endif /* USE_ST_LIBRARY */ + + +#ifdef USE_NAMED_GROUP + +#define INIT_NAME_BACKREFS_ALLOC_NUM 8 + +typedef struct { + UChar* name; + int name_len; /* byte length */ + int back_num; /* number of backrefs */ + int back_alloc; + int back_ref1; + int* back_refs; +} NameEntry; + +#ifdef USE_ST_LIBRARY + +typedef st_table NameTable; +typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ + +#define NAMEBUF_SIZE 24 +#define NAMEBUF_SIZE_1 25 + +#ifdef ONIG_DEBUG +static int +i_print_name_entry(UChar* key, NameEntry* e, void* arg) +{ + int i; + FILE* fp = (FILE* )arg; + + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) + fputs("-", fp); + else if (e->back_num == 1) + fprintf(fp, "%d", e->back_ref1); + else { + for (i = 0; i < e->back_num; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[i]); + } + } + fputs("\n", fp); + return ST_CONTINUE; +} + +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + fprintf(fp, "name table\n"); + onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); + fputs("\n", fp); + } + return 0; +} +#endif /* ONIG_DEBUG */ + +static int +i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED) +{ + xfree(e->name); + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + xfree(key); + xfree(e); + return ST_DELETE; +} + +static int +names_clear(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_free_name_entry, 0); + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) onig_st_free_table(t); + reg->name_table = (void* )NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, const UChar* name, const UChar* name_end) +{ + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + e = (NameEntry* )NULL; + if (IS_NOT_NULL(t)) { + onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); + } + return e; +} + +typedef struct { + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); + regex_t* reg; + void* arg; + int ret; + OnigEncoding enc; +} INamesArg; + +static int +i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) +{ + int r = (*(arg->func))(e->name, + e->name + e->name_len, + e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + arg->reg, arg->arg); + if (r != 0) { + arg->ret = r; + return ST_STOP; + } + return ST_CONTINUE; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) +{ + INamesArg narg; + NameTable* t = (NameTable* )reg->name_table; + + narg.ret = 0; + if (IS_NOT_NULL(t)) { + narg.func = func; + narg.reg = reg; + narg.arg = arg; + narg.enc = reg->enc; /* should be pattern encoding. */ + onig_st_foreach(t, i_names, (HashDataType )&narg); + } + return narg.ret; +} + +static int +i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) +{ + int i; + + if (e->back_num > 1) { + for (i = 0; i < e->back_num; i++) { + e->back_refs[i] = map[e->back_refs[i]].new_val; + } + } + else if (e->back_num == 1) { + e->back_ref1 = map[e->back_ref1].new_val; + } + + return ST_CONTINUE; +} + +extern int +onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_renumber_name, (HashDataType )map); + } + return 0; +} + + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num_entries; + else + return 0; +} + +#else /* USE_ST_LIBRARY */ + +#define INIT_NAMES_ALLOC_NUM 8 + +typedef struct { + NameEntry* e; + int num; + int alloc; +} NameTable; + +#ifdef ONIG_DEBUG +extern int +onig_print_names(FILE* fp, regex_t* reg) +{ + int i, j; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t) && t->num > 0) { + fprintf(fp, "name table\n"); + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + fprintf(fp, "%s: ", e->name); + if (e->back_num == 0) { + fputs("-", fp); + } + else if (e->back_num == 1) { + fprintf(fp, "%d", e->back_ref1); + } + else { + for (j = 0; j < e->back_num; j++) { + if (j > 0) fprintf(fp, ", "); + fprintf(fp, "%d", e->back_refs[j]); + } + } + fputs("\n", fp); + } + fputs("\n", fp); + } + return 0; +} +#endif + +static int +names_clear(regex_t* reg) +{ + int i; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (IS_NOT_NULL(e->name)) { + xfree(e->name); + e->name = NULL; + e->name_len = 0; + e->back_num = 0; + e->back_alloc = 0; + if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + e->back_refs = (int* )NULL; + } + } + if (IS_NOT_NULL(t->e)) { + xfree(t->e); + t->e = NULL; + } + t->num = 0; + } + return 0; +} + +extern int +onig_names_free(regex_t* reg) +{ + int r; + NameTable* t; + + r = names_clear(reg); + if (r) return r; + + t = (NameTable* )reg->name_table; + if (IS_NOT_NULL(t)) xfree(t); + reg->name_table = NULL; + return 0; +} + +static NameEntry* +name_find(regex_t* reg, UChar* name, UChar* name_end) +{ + int i, len; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + len = name_end - name; + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) + return e; + } + } + return (NameEntry* )NULL; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) +{ + int i, r; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + r = (*func)(e->name, e->name + e->name_len, e->back_num, + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + reg, arg); + if (r != 0) return r; + } + } + return 0; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + NameTable* t = (NameTable* )reg->name_table; + + if (IS_NOT_NULL(t)) + return t->num; + else + return 0; +} + +#endif /* else USE_ST_LIBRARY */ + +static int +name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) +{ + int alloc; + NameEntry* e; + NameTable* t = (NameTable* )reg->name_table; + + if (name_end - name <= 0) + return ONIGERR_EMPTY_GROUP_NAME; + + e = name_find(reg, name, name_end); + if (IS_NULL(e)) { +#ifdef USE_ST_LIBRARY + if (IS_NULL(t)) { + t = onig_st_init_strend_table_with_size(5); + reg->name_table = (void* )t; + } + e = (NameEntry* )xmalloc(sizeof(NameEntry)); + CHECK_NULL_RETURN_MEMERR(e); + + e->name = strdup_with_null(reg->enc, name, name_end); + if (IS_NULL(e->name)) { + xfree(e); return ONIGERR_MEMORY; + } + onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), + (HashDataType )e); + + e->name_len = name_end - name; + e->back_num = 0; + e->back_alloc = 0; + e->back_refs = (int* )NULL; + +#else + + if (IS_NULL(t)) { + alloc = INIT_NAMES_ALLOC_NUM; + t = (NameTable* )xmalloc(sizeof(NameTable)); + CHECK_NULL_RETURN_MEMERR(t); + t->e = NULL; + t->alloc = 0; + t->num = 0; + + t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); + if (IS_NULL(t->e)) { + xfree(t); + return ONIGERR_MEMORY; + } + t->alloc = alloc; + reg->name_table = t; + goto clear; + } + else if (t->num == t->alloc) { + int i; + + alloc = t->alloc * 2; + t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); + CHECK_NULL_RETURN_MEMERR(t->e); + t->alloc = alloc; + + clear: + for (i = t->num; i < t->alloc; i++) { + t->e[i].name = NULL; + t->e[i].name_len = 0; + t->e[i].back_num = 0; + t->e[i].back_alloc = 0; + t->e[i].back_refs = (int* )NULL; + } + } + e = &(t->e[t->num]); + t->num++; + e->name = strdup_with_null(reg->enc, name, name_end); + if (IS_NULL(e->name)) return ONIGERR_MEMORY; + e->name_len = name_end - name; +#endif + } + + if (e->back_num >= 1 && + ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { + onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, + name, name_end); + return ONIGERR_MULTIPLEX_DEFINED_NAME; + } + + e->back_num++; + if (e->back_num == 1) { + e->back_ref1 = backref; + } + else { + if (e->back_num == 2) { + alloc = INIT_NAME_BACKREFS_ALLOC_NUM; + e->back_refs = (int* )xmalloc(sizeof(int) * alloc); + CHECK_NULL_RETURN_MEMERR(e->back_refs); + e->back_alloc = alloc; + e->back_refs[0] = e->back_ref1; + e->back_refs[1] = backref; + } + else { + if (e->back_num > e->back_alloc) { + alloc = e->back_alloc * 2; + e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); + CHECK_NULL_RETURN_MEMERR(e->back_refs); + e->back_alloc = alloc; + } + e->back_refs[e->back_num - 1] = backref; + } + } + + return 0; +} + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + NameEntry* e = name_find(reg, name, name_end); + + if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; + + switch (e->back_num) { + case 0: + break; + case 1: + *nums = &(e->back_ref1); + break; + default: + *nums = e->back_refs; + break; + } + return e->back_num; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion *region) +{ + int i, n, *nums; + + n = onig_name_to_group_numbers(reg, name, name_end, &nums); + if (n < 0) + return n; + else if (n == 0) + return ONIGERR_PARSER_BUG; + else if (n == 1) + return nums[0]; + else { + if (IS_NOT_NULL(region)) { + for (i = n - 1; i >= 0; i--) { + if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) + return nums[i]; + } + } + return nums[n - 1]; + } +} + +#else /* USE_NAMED_GROUP */ + +extern int +onig_name_to_group_numbers(regex_t* reg, const UChar* name, + const UChar* name_end, int** nums) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_name_to_backref_number(regex_t* reg, const UChar* name, + const UChar* name_end, OnigRegion* region) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_foreach_name(regex_t* reg, + int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) +{ + return ONIG_NO_SUPPORT_CONFIG; +} + +extern int +onig_number_of_names(regex_t* reg) +{ + return 0; +} +#endif /* else USE_NAMED_GROUP */ + +extern int +onig_noname_group_capture_is_active(regex_t* reg) +{ + if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) + return 0; + +#ifdef USE_NAMED_GROUP + if (onig_number_of_names(reg) > 0 && + IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + return 0; + } +#endif + + return 1; +} + + +#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 + +static void +scan_env_clear(ScanEnv* env) +{ + int i; + + BIT_STATUS_CLEAR(env->capture_history); + BIT_STATUS_CLEAR(env->bt_mem_start); + BIT_STATUS_CLEAR(env->bt_mem_end); + BIT_STATUS_CLEAR(env->backrefed_mem); + env->error = (UChar* )NULL; + env->error_end = (UChar* )NULL; + env->num_call = 0; + env->num_mem = 0; +#ifdef USE_NAMED_GROUP + env->num_named = 0; +#endif + env->mem_alloc = 0; + env->mem_nodes_dynamic = (Node** )NULL; + + for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) + env->mem_nodes_static[i] = NULL_NODE; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + env->num_comb_exp_check = 0; + env->comb_exp_max_regnum = 0; + env->curr_max_regnum = 0; + env->has_recursion = 0; +#endif +} + +static int +scan_env_add_mem_entry(ScanEnv* env) +{ + int i, need, alloc; + Node** p; + + need = env->num_mem + 1; + if (need >= SCANENV_MEMNODES_SIZE) { + if (env->mem_alloc <= need) { + if (IS_NULL(env->mem_nodes_dynamic)) { + alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; + p = (Node** )xmalloc(sizeof(Node*) * alloc); + xmemcpy(p, env->mem_nodes_static, + sizeof(Node*) * SCANENV_MEMNODES_SIZE); + } + else { + alloc = env->mem_alloc * 2; + p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); + } + CHECK_NULL_RETURN_MEMERR(p); + + for (i = env->num_mem + 1; i < alloc; i++) + p[i] = NULL_NODE; + + env->mem_nodes_dynamic = p; + env->mem_alloc = alloc; + } + } + + env->num_mem++; + return env->num_mem; +} + +static int +scan_env_set_mem_node(ScanEnv* env, int num, Node* node) +{ + if (env->num_mem >= num) + SCANENV_MEM_NODES(env)[num] = node; + else + return ONIGERR_PARSER_BUG; + return 0; +} + + +#ifdef USE_PARSE_TREE_NODE_RECYCLE +typedef struct _FreeNode { + struct _FreeNode* next; +} FreeNode; + +static FreeNode* FreeNodeList = (FreeNode* )NULL; +#endif + +extern void +onig_node_free(Node* node) +{ + start: + if (IS_NULL(node)) return ; + + switch (NTYPE(node)) { + case NT_STR: + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); + } + break; + + case NT_LIST: + case NT_ALT: + onig_node_free(NCAR(node)); + { + Node* next_node = NCDR(node); + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif + node = next_node; + goto start; + } + break; + + case NT_CCLASS: + { + CClassNode* cc = NCCLASS(node); + + if (IS_NCCLASS_SHARE(cc)) return ; + if (cc->mbuf) + bbuf_free(cc->mbuf); + } + break; + + case NT_QTFR: + if (NQTFR(node)->target) + onig_node_free(NQTFR(node)->target); + break; + + case NT_ENCLOSE: + if (NENCLOSE(node)->target) + onig_node_free(NENCLOSE(node)->target); + break; + + case NT_BREF: + if (IS_NOT_NULL(NBREF(node)->back_dynamic)) + xfree(NBREF(node)->back_dynamic); + break; + + case NT_ANCHOR: + if (NANCHOR(node)->target) + onig_node_free(NANCHOR(node)->target); + break; + } + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + { + FreeNode* n = (FreeNode* )node; + + THREAD_ATOMIC_START; + n->next = FreeNodeList; + FreeNodeList = n; + THREAD_ATOMIC_END; + } +#else + xfree(node); +#endif +} + +#ifdef USE_PARSE_TREE_NODE_RECYCLE +extern int +onig_free_node_list(void) +{ + FreeNode* n; + + /* THREAD_ATOMIC_START; */ + while (IS_NOT_NULL(FreeNodeList)) { + n = FreeNodeList; + FreeNodeList = FreeNodeList->next; + xfree(n); + } + /* THREAD_ATOMIC_END; */ + return 0; +} +#endif + +static Node* +node_new(void) +{ + Node* node; + +#ifdef USE_PARSE_TREE_NODE_RECYCLE + THREAD_ATOMIC_START; + if (IS_NOT_NULL(FreeNodeList)) { + node = (Node* )FreeNodeList; + FreeNodeList = FreeNodeList->next; + THREAD_ATOMIC_END; + return node; + } + THREAD_ATOMIC_END; +#endif + + node = (Node* )xmalloc(sizeof(Node)); + /* xmemset(node, 0, sizeof(Node)); */ + return node; +} + + +static void +initialize_cclass(CClassNode* cc) +{ + BITSET_CLEAR(cc->bs); + /* cc->base.flags = 0; */ + cc->flags = 0; + cc->mbuf = NULL; +} + +static Node* +node_new_cclass(void) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CCLASS); + initialize_cclass(NCCLASS(node)); + return node; +} + +static Node* +node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, + const OnigCodePoint ranges[]) +{ + int n, i; + CClassNode* cc; + OnigCodePoint j; + + Node* node = node_new_cclass(); + CHECK_NULL_RETURN(node); + + cc = NCCLASS(node); + if (not != 0) NCCLASS_SET_NOT(cc); + + BITSET_CLEAR(cc->bs); + if (sb_out > 0 && IS_NOT_NULL(ranges)) { + n = ONIGENC_CODE_RANGE_NUM(ranges); + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(ranges, i); + j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) { + if (j >= sb_out) goto sb_end; + + BITSET_SET_BIT(cc->bs, j); + } + } + } + + sb_end: + if (IS_NULL(ranges)) { + is_null: + cc->mbuf = NULL; + } + else { + BBuf* bbuf; + + n = ONIGENC_CODE_RANGE_NUM(ranges); + if (n == 0) goto is_null; + + bbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN(bbuf); + bbuf->alloc = n + 1; + bbuf->used = n + 1; + bbuf->p = (UChar* )((void* )ranges); + + cc->mbuf = bbuf; + } + + return node; +} + +static Node* +node_new_ctype(int type, int not) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CTYPE); + NCTYPE(node)->ctype = type; + NCTYPE(node)->not = not; + return node; +} + +static Node* +node_new_anychar(void) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CANY); + return node; +} + +static Node* +node_new_list(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_LIST); + NCAR(node) = left; + NCDR(node) = right; + return node; +} + +extern Node* +onig_node_new_list(Node* left, Node* right) +{ + return node_new_list(left, right); +} + +extern Node* +onig_node_list_add(Node* list, Node* x) +{ + Node *n; + + n = onig_node_new_list(x, NULL); + if (IS_NULL(n)) return NULL_NODE; + + if (IS_NOT_NULL(list)) { + while (IS_NOT_NULL(NCDR(list))) + list = NCDR(list); + + NCDR(list) = n; + } + + return n; +} + +extern Node* +onig_node_new_alt(Node* left, Node* right) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_ALT); + NCAR(node) = left; + NCDR(node) = right; + return node; +} + +extern Node* +onig_node_new_anchor(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_ANCHOR); + NANCHOR(node)->type = type; + NANCHOR(node)->target = NULL; + NANCHOR(node)->char_len = -1; + return node; +} + +static Node* +node_new_backref(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_WITH_LEVEL + int exist_level, int nest_level, +#endif + ScanEnv* env) +{ + int i; + Node* node = node_new(); + + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_BREF); + NBREF(node)->state = 0; + NBREF(node)->back_num = back_num; + NBREF(node)->back_dynamic = (int* )NULL; + if (by_name != 0) + NBREF(node)->state |= NST_NAME_REF; + +#ifdef USE_BACKREF_WITH_LEVEL + if (exist_level != 0) { + NBREF(node)->state |= NST_NEST_LEVEL; + NBREF(node)->nest_level = nest_level; + } +#endif + + for (i = 0; i < back_num; i++) { + if (backrefs[i] <= env->num_mem && + IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { + NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ + break; + } + } + + if (back_num <= NODE_BACKREFS_SIZE) { + for (i = 0; i < back_num; i++) + NBREF(node)->back_static[i] = backrefs[i]; + } + else { + int* p = (int* )xmalloc(sizeof(int) * back_num); + if (IS_NULL(p)) { + onig_node_free(node); + return NULL; + } + NBREF(node)->back_dynamic = p; + for (i = 0; i < back_num; i++) + p[i] = backrefs[i]; + } + return node; +} + +#ifdef USE_SUBEXP_CALL +static Node* +node_new_call(UChar* name, UChar* name_end, int gnum) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_CALL); + NCALL(node)->state = 0; + NCALL(node)->target = NULL_NODE; + NCALL(node)->name = name; + NCALL(node)->name_end = name_end; + NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ + return node; +} +#endif + +static Node* +node_new_quantifier(int lower, int upper, int by_number) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_QTFR); + NQTFR(node)->state = 0; + NQTFR(node)->target = NULL; + NQTFR(node)->lower = lower; + NQTFR(node)->upper = upper; + NQTFR(node)->greedy = 1; + NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQTFR(node)->head_exact = NULL_NODE; + NQTFR(node)->next_head_exact = NULL_NODE; + NQTFR(node)->is_refered = 0; + if (by_number != 0) + NQTFR(node)->state |= NST_BY_NUMBER; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + NQTFR(node)->comb_exp_check_num = 0; +#endif + + return node; +} + +static Node* +node_new_enclose(int type) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_ENCLOSE); + NENCLOSE(node)->type = type; + NENCLOSE(node)->state = 0; + NENCLOSE(node)->regnum = 0; + NENCLOSE(node)->option = 0; + NENCLOSE(node)->target = NULL; + NENCLOSE(node)->call_addr = -1; + NENCLOSE(node)->opt_count = 0; + return node; +} + +extern Node* +onig_node_new_enclose(int type) +{ + return node_new_enclose(type); +} + +static Node* +node_new_enclose_memory(OnigOptionType option, int is_named) +{ + Node* node = node_new_enclose(ENCLOSE_MEMORY); + CHECK_NULL_RETURN(node); + if (is_named != 0) + SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); + +#ifdef USE_SUBEXP_CALL + NENCLOSE(node)->option = option; +#endif + return node; +} + +static Node* +node_new_option(OnigOptionType option) +{ + Node* node = node_new_enclose(ENCLOSE_OPTION); + CHECK_NULL_RETURN(node); + NENCLOSE(node)->option = option; + return node; +} + +extern int +onig_node_str_cat(Node* node, const UChar* s, const UChar* end) +{ + int addlen = end - s; + + if (addlen > 0) { + int len = NSTR(node)->end - NSTR(node)->s; + + if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + UChar* p; + int capa = len + addlen + NODE_STR_MARGIN; + + if (capa <= NSTR(node)->capa) { + onig_strcpy(NSTR(node)->s + len, s, end); + } + else { + if (NSTR(node)->s == NSTR(node)->buf) + p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, + s, end, capa); + else + p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); + + CHECK_NULL_RETURN_MEMERR(p); + NSTR(node)->s = p; + NSTR(node)->capa = capa; + } + } + else { + onig_strcpy(NSTR(node)->s + len, s, end); + } + NSTR(node)->end = NSTR(node)->s + len + addlen; + } + + return 0; +} + +extern int +onig_node_str_set(Node* node, const UChar* s, const UChar* end) +{ + onig_node_str_clear(node); + return onig_node_str_cat(node, s, end); +} + +static int +node_str_cat_char(Node* node, UChar c) +{ + UChar s[1]; + + s[0] = c; + return onig_node_str_cat(node, s, s + 1); +} + +extern void +onig_node_conv_to_str_node(Node* node, int flag) +{ + SET_NTYPE(node, NT_STR); + NSTR(node)->flag = flag; + NSTR(node)->capa = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; +} + +extern void +onig_node_str_clear(Node* node) +{ + if (NSTR(node)->capa != 0 && + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + xfree(NSTR(node)->s); + } + + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; +} + +static Node* +node_new_str(const UChar* s, const UChar* end) +{ + Node* node = node_new(); + CHECK_NULL_RETURN(node); + + SET_NTYPE(node, NT_STR); + NSTR(node)->capa = 0; + NSTR(node)->flag = 0; + NSTR(node)->s = NSTR(node)->buf; + NSTR(node)->end = NSTR(node)->buf; + if (onig_node_str_cat(node, s, end)) { + onig_node_free(node); + return NULL; + } + return node; +} + +extern Node* +onig_node_new_str(const UChar* s, const UChar* end) +{ + return node_new_str(s, end); +} + +static Node* +node_new_str_raw(UChar* s, UChar* end) +{ + Node* node = node_new_str(s, end); + NSTRING_SET_RAW(node); + return node; +} + +static Node* +node_new_empty(void) +{ + return node_new_str(NULL, NULL); +} + +static Node* +node_new_str_raw_char(UChar c) +{ + UChar p[1]; + + p[0] = c; + return node_new_str_raw(p, p + 1); +} + +static Node* +str_node_split_last_char(StrNode* sn, OnigEncoding enc) +{ + const UChar *p; + Node* n = NULL_NODE; + + if (sn->end > sn->s) { + p = onigenc_get_prev_char_head(enc, sn->s, sn->end); + if (p && p > sn->s) { /* can be splitted. */ + n = node_new_str(p, sn->end); + if ((sn->flag & NSTR_RAW) != 0) + NSTRING_SET_RAW(n); + sn->end = (UChar* )p; + } + } + return n; +} + +static int +str_node_can_be_split(StrNode* sn, OnigEncoding enc) +{ + if (sn->end > sn->s) { + return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0); + } + return 0; +} + +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR +static int +node_str_head_pad(StrNode* sn, int num, UChar val) +{ + UChar buf[NODE_STR_BUF_SIZE]; + int i, len; + + len = sn->end - sn->s; + onig_strcpy(buf, sn->s, sn->end); + onig_strcpy(&(sn->s[num]), buf, buf + len); + sn->end += num; + + for (i = 0; i < num; i++) { + sn->s[i] = val; + } +} +#endif + +extern int +onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) +{ + unsigned int num, val; + OnigCodePoint c; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + val = (unsigned int )DIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 10UL < num) + return -1; /* overflow */ + + num = num * 10 + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { + val = (unsigned int )XDIGITVAL(enc,c); + if ((INT_MAX_LIMIT - val) / 16UL < num) + return -1; /* overflow */ + + num = (num << 4) + XDIGITVAL(enc,c); + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + +static int +scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, + OnigEncoding enc) +{ + OnigCodePoint c; + unsigned int num, val; + UChar* p = *src; + PFETCH_READY; + + num = 0; + while (!PEND && maxlen-- != 0) { + PFETCH(c); + if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { + val = ODIGITVAL(c); + if ((INT_MAX_LIMIT - val) / 8UL < num) + return -1; /* overflow */ + + num = (num << 3) + val; + } + else { + PUNFETCH; + break; + } + } + *src = p; + return num; +} + + +#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ + BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) + +/* data format: + [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] + (all data size is OnigCodePoint) + */ +static int +new_code_range(BBuf** pbuf) +{ +#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5) + int r; + OnigCodePoint n; + BBuf* bbuf; + + bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); + CHECK_NULL_RETURN_MEMERR(*pbuf); + r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); + if (r) return r; + + n = 0; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + return 0; +} + +static int +add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) +{ + int r, inc_n, pos; + int low, high, bound, x; + OnigCodePoint n, *data; + BBuf* bbuf; + + if (from > to) { + n = from; from = to; to = n; + } + + if (IS_NULL(*pbuf)) { + r = new_code_range(pbuf); + if (r) return r; + bbuf = *pbuf; + n = 0; + } + else { + bbuf = *pbuf; + GET_CODE_POINT(n, bbuf->p); + } + data = (OnigCodePoint* )(bbuf->p); + data++; + + for (low = 0, bound = n; low < bound; ) { + x = (low + bound) >> 1; + if (from > data[x*2 + 1]) + low = x + 1; + else + bound = x; + } + + for (high = low, bound = n; high < bound; ) { + x = (high + bound) >> 1; + if (to >= data[x*2] - 1) + high = x + 1; + else + bound = x; + } + + inc_n = low + 1 - high; + if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM) + return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES; + + if (inc_n != 1) { + if (from > data[low*2]) + from = data[low*2]; + if (to < data[(high - 1)*2 + 1]) + to = data[(high - 1)*2 + 1]; + } + + if (inc_n != 0 && (OnigCodePoint )high < n) { + int from_pos = SIZE_CODE_POINT * (1 + high * 2); + int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); + int size = (n - high) * 2 * SIZE_CODE_POINT; + + if (inc_n > 0) { + BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); + } + else { + BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); + } + } + + pos = SIZE_CODE_POINT * (1 + low * 2); + BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); + BBUF_WRITE_CODE_POINT(bbuf, pos, from); + BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); + n += inc_n; + BBUF_WRITE_CODE_POINT(bbuf, 0, n); + + return 0; +} + +static int +add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) +{ + if (from > to) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + return 0; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + + return add_code_range_to_buf(pbuf, from, to); +} + +static int +not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) +{ + int r, i, n; + OnigCodePoint pre, from, *data, to = 0; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf)) { + set_all: + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + + data = (OnigCodePoint* )(bbuf->p); + GET_CODE_POINT(n, data); + data++; + if (n <= 0) goto set_all; + + r = 0; + pre = MBCODE_START_POS(enc); + for (i = 0; i < n; i++) { + from = data[i*2]; + to = data[i*2+1]; + if (pre <= from - 1) { + r = add_code_range_to_buf(pbuf, pre, from - 1); + if (r != 0) return r; + } + if (to == ~((OnigCodePoint )0)) break; + pre = to + 1; + } + if (to < ~((OnigCodePoint )0)) { + r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0)); + } + return r; +} + +#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ + BBuf *tbuf; \ + int tnot; \ + tnot = not1; not1 = not2; not2 = tnot; \ + tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \ +} while (0) + +static int +or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, + BBuf* bbuf2, int not2, BBuf** pbuf) +{ + int r; + OnigCodePoint i, n1, *data1; + OnigCodePoint from, to; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { + if (not1 != 0 || not2 != 0) + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + return 0; + } + + r = 0; + if (IS_NULL(bbuf2)) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + if (IS_NULL(bbuf1)) { + if (not1 != 0) { + return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); + } + else { + if (not2 == 0) { + return bbuf_clone(pbuf, bbuf2); + } + else { + return not_code_range_buf(enc, bbuf2, pbuf); + } + } + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + GET_CODE_POINT(n1, data1); + data1++; + + if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ + r = bbuf_clone(pbuf, bbuf2); + } + else if (not1 == 0) { /* 1 OR (not 2) */ + r = not_code_range_buf(enc, bbuf2, pbuf); + } + if (r != 0) return r; + + for (i = 0; i < n1; i++) { + from = data1[i*2]; + to = data1[i*2+1]; + r = add_code_range_to_buf(pbuf, from, to); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, + OnigCodePoint* data, int n) +{ + int i, r; + OnigCodePoint from2, to2; + + for (i = 0; i < n; i++) { + from2 = data[i*2]; + to2 = data[i*2+1]; + if (from2 < from1) { + if (to2 < from1) continue; + else { + from1 = to2 + 1; + } + } + else if (from2 <= to1) { + if (to2 < to1) { + if (from1 <= from2 - 1) { + r = add_code_range_to_buf(pbuf, from1, from2-1); + if (r != 0) return r; + } + from1 = to2 + 1; + } + else { + to1 = from2 - 1; + } + } + else { + from1 = from2; + } + if (from1 > to1) break; + } + if (from1 <= to1) { + r = add_code_range_to_buf(pbuf, from1, to1); + if (r != 0) return r; + } + return 0; +} + +static int +and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) +{ + int r; + OnigCodePoint i, j, n1, n2, *data1, *data2; + OnigCodePoint from, to, from1, to1, from2, to2; + + *pbuf = (BBuf* )NULL; + if (IS_NULL(bbuf1)) { + if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */ + return bbuf_clone(pbuf, bbuf2); + return 0; + } + else if (IS_NULL(bbuf2)) { + if (not2 != 0) + return bbuf_clone(pbuf, bbuf1); + return 0; + } + + if (not1 != 0) + SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + + data1 = (OnigCodePoint* )(bbuf1->p); + data2 = (OnigCodePoint* )(bbuf2->p); + GET_CODE_POINT(n1, data1); + GET_CODE_POINT(n2, data2); + data1++; + data2++; + + if (not2 == 0 && not1 == 0) { /* 1 AND 2 */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + for (j = 0; j < n2; j++) { + from2 = data2[j*2]; + to2 = data2[j*2+1]; + if (from2 > to1) break; + if (to2 < from1) continue; + from = MAX(from1, from2); + to = MIN(to1, to2); + r = add_code_range_to_buf(pbuf, from, to); + if (r != 0) return r; + } + } + } + else if (not1 == 0) { /* 1 AND (not 2) */ + for (i = 0; i < n1; i++) { + from1 = data1[i*2]; + to1 = data1[i*2+1]; + r = and_code_range1(pbuf, from1, to1, data2, n2); + if (r != 0) return r; + } + } + + return 0; +} + +static int +and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +{ + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_NCCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_NCCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_and(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf); + } + else { + r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(enc, pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) return r; + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + return 0; +} + +static int +or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) +{ + int r, not1, not2; + BBuf *buf1, *buf2, *pbuf; + BitSetRef bsr1, bsr2; + BitSet bs1, bs2; + + not1 = IS_NCCLASS_NOT(dest); + bsr1 = dest->bs; + buf1 = dest->mbuf; + not2 = IS_NCCLASS_NOT(cc); + bsr2 = cc->bs; + buf2 = cc->mbuf; + + if (not1 != 0) { + bitset_invert_to(bsr1, bs1); + bsr1 = bs1; + } + if (not2 != 0) { + bitset_invert_to(bsr2, bs2); + bsr2 = bs2; + } + bitset_or(bsr1, bsr2); + if (bsr1 != dest->bs) { + bitset_copy(dest->bs, bsr1); + bsr1 = dest->bs; + } + if (not1 != 0) { + bitset_invert(dest->bs); + } + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + if (not1 != 0 && not2 != 0) { + r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf); + } + else { + r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf); + if (r == 0 && not1 != 0) { + BBuf *tbuf; + r = not_code_range_buf(enc, pbuf, &tbuf); + if (r != 0) { + bbuf_free(pbuf); + return r; + } + bbuf_free(pbuf); + pbuf = tbuf; + } + } + if (r != 0) return r; + + dest->mbuf = pbuf; + bbuf_free(buf1); + return r; + } + else + return 0; +} + +static int +conv_backslash_value(int c, ScanEnv* env) +{ + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { + switch (c) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'f': return '\f'; + case 'a': return '\007'; + case 'b': return '\010'; + case 'e': return '\033'; + case 'v': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) + return '\v'; + break; + + default: + break; + } + } + return c; +} + +static int +is_invalid_quantifier_target(Node* node) +{ + switch (NTYPE(node)) { + case NT_ANCHOR: + return 1; + break; + + case NT_ENCLOSE: + /* allow enclosed elements */ + /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ + break; + + case NT_LIST: + do { + if (! is_invalid_quantifier_target(NCAR(node))) return 0; + } while (IS_NOT_NULL(node = NCDR(node))); + return 0; + break; + + case NT_ALT: + do { + if (is_invalid_quantifier_target(NCAR(node))) return 1; + } while (IS_NOT_NULL(node = NCDR(node))); + break; + + default: + break; + } + return 0; +} + +/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ +static int +popular_quantifier_num(QtfrNode* q) +{ + if (q->greedy) { + if (q->lower == 0) { + if (q->upper == 1) return 0; + else if (IS_REPEAT_INFINITE(q->upper)) return 1; + } + else if (q->lower == 1) { + if (IS_REPEAT_INFINITE(q->upper)) return 2; + } + } + else { + if (q->lower == 0) { + if (q->upper == 1) return 3; + else if (IS_REPEAT_INFINITE(q->upper)) return 4; + } + else if (q->lower == 1) { + if (IS_REPEAT_INFINITE(q->upper)) return 5; + } + } + return -1; +} + + +enum ReduceType { + RQ_ASIS = 0, /* as is */ + RQ_DEL = 1, /* delete parent */ + RQ_A, /* to '*' */ + RQ_AQ, /* to '*?' */ + RQ_QQ, /* to '??' */ + RQ_P_QQ, /* to '+)??' */ + RQ_PQ_Q /* to '+?)?' */ +}; + +static enum ReduceType ReduceTypeTable[6][6] = { + {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ + {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */ + {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */ + {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */ + {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */ +}; + +extern void +onig_reduce_nested_quantifier(Node* pnode, Node* cnode) +{ + int pnum, cnum; + QtfrNode *p, *c; + + p = NQTFR(pnode); + c = NQTFR(cnode); + pnum = popular_quantifier_num(p); + cnum = popular_quantifier_num(c); + if (pnum < 0 || cnum < 0) return ; + + switch(ReduceTypeTable[cnum][pnum]) { + case RQ_DEL: + *pnode = *cnode; + break; + case RQ_A: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; + break; + case RQ_AQ: + p->target = c->target; + p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; + break; + case RQ_QQ: + p->target = c->target; + p->lower = 0; p->upper = 1; p->greedy = 0; + break; + case RQ_P_QQ: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 0; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; + return ; + break; + case RQ_PQ_Q: + p->target = cnode; + p->lower = 0; p->upper = 1; p->greedy = 1; + c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; + return ; + break; + case RQ_ASIS: + p->target = cnode; + return ; + break; + } + + c->target = NULL_NODE; + onig_node_free(cnode); +} + + +enum TokenSyms { + TK_EOT = 0, /* end of token */ + TK_RAW_BYTE = 1, + TK_CHAR, + TK_STRING, + TK_CODE_POINT, + TK_ANYCHAR, + TK_CHAR_TYPE, + TK_BACKREF, + TK_CALL, + TK_ANCHOR, + TK_OP_REPEAT, + TK_INTERVAL, + TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ + TK_ALT, + TK_SUBEXP_OPEN, + TK_SUBEXP_CLOSE, + TK_CC_OPEN, + TK_QUOTE_OPEN, + TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ + /* in cc */ + TK_CC_CLOSE, + TK_CC_RANGE, + TK_POSIX_BRACKET_OPEN, + TK_CC_AND, /* && */ + TK_CC_CC_OPEN /* [ */ +}; + +typedef struct { + enum TokenSyms type; + int escaped; + int base; /* is number: 8, 16 (used in [....]) */ + UChar* backp; + union { + UChar* s; + int c; + OnigCodePoint code; + int anchor; + int subtype; + struct { + int lower; + int upper; + int greedy; + int possessive; + } repeat; + struct { + int num; + int ref1; + int* refs; + int by_name; +#ifdef USE_BACKREF_WITH_LEVEL + int exist_level; + int level; /* \k */ +#endif + } backref; + struct { + UChar* name; + UChar* name_end; + int gnum; + } call; + struct { + int ctype; + int not; + } prop; + } u; +} OnigToken; + + +static int +fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) +{ + int low, up, syn_allow, non_low = 0; + int r = 0; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); + + if (PEND) { + if (syn_allow) + return 1; /* "....{" : OK! */ + else + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ + } + + if (! syn_allow) { + c = PPEEK; + if (c == ')' || c == '(' || c == '|') { + return ONIGERR_END_PATTERN_AT_LEFT_BRACE; + } + } + + low = onig_scan_unsigned_number(&p, end, env->enc); + if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (low > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == *src) { /* can't read low */ + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { + /* allow {,n} as {0,n} */ + low = 0; + non_low = 1; + } + else + goto invalid; + } + + if (PEND) goto invalid; + PFETCH(c); + if (c == ',') { + UChar* prev = p; + up = onig_scan_unsigned_number(&p, end, env->enc); + if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + if (up > ONIG_MAX_REPEAT_NUM) + return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; + + if (p == prev) { + if (non_low != 0) + goto invalid; + up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ + } + } + else { + if (non_low != 0) + goto invalid; + + PUNFETCH; + up = low; /* {n} : exact n times */ + r = 2; /* fixed */ + } + + if (PEND) goto invalid; + PFETCH(c); + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { + if (c != MC_ESC(env->syntax)) goto invalid; + PFETCH(c); + } + if (c != '}') goto invalid; + + if (!IS_REPEAT_INFINITE(up) && low > up) { + return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; + } + + tok->type = TK_INTERVAL; + tok->u.repeat.lower = low; + tok->u.repeat.upper = up; + *src = p; + return r; /* 0: normal {n,m}, 2: fixed {n} */ + + invalid: + if (syn_allow) + return 1; /* OK */ + else + return ONIGERR_INVALID_REPEAT_RANGE_PATTERN; +} + +/* \M-, \C-, \c, or \... */ +static int +fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) +{ + int v; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar* p = *src; + PFETCH_READY; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + switch (c) { + case 'M': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c != '-') return ONIGERR_META_CODE_SYNTAX; + if (PEND) return ONIGERR_END_PATTERN_AT_META; + PFETCH(c); + if (c == MC_ESC(env->syntax)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + c = ((c & 0xff) | 0x80); + } + else + goto backslash; + break; + + case 'C': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; + goto control; + } + else + goto backslash; + + case 'c': + if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { + control: + if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; + PFETCH(c); + if (c == '?') { + c = 0177; + } + else { + if (c == MC_ESC(env->syntax)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } + c &= 0x9f; + } + break; + } + /* fall through */ + + default: + { + backslash: + c = conv_backslash_value(c, env); + } + break; + } + + *src = p; + return c; +} + +static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); + +static OnigCodePoint +get_name_end_code_point(OnigCodePoint start) +{ + switch (start) { + case '<': return (OnigCodePoint )'>'; break; + case '\'': return (OnigCodePoint )'\''; break; + default: + break; + } + + return (OnigCodePoint )0; +} + +#ifdef USE_NAMED_GROUP +#ifdef USE_BACKREF_WITH_LEVEL +/* + \k, \k + \k, \k + \k<-num+n>, \k<-num-n> +*/ +static int +fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, + int* rback_num, int* rlevel) +{ + int r, sign, is_num, exist_level; + OnigCodePoint end_code; + OnigCodePoint c = 0; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *pnum_head; + UChar *p = *src; + PFETCH_READY; + + *rback_num = 0; + is_num = exist_level = 0; + sign = 1; + pnum_head = *src; + + end_code = get_name_end_code_point(start_code); + + name_end = end; + r = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')' || c == '+' || c == '-') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } + + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (r == 0 && c != end_code) { + if (c == '+' || c == '-') { + int level; + int flag = (c == '-' ? -1 : 1); + + PFETCH(c); + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; + PUNFETCH; + level = onig_scan_unsigned_number(&p, end, enc); + if (level < 0) return ONIGERR_TOO_BIG_NUMBER; + *rlevel = (level * flag); + exist_level = 1; + + PFETCH(c); + if (c == end_code) + goto end; + } + + err: + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + end: + if (r == 0) { + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) goto err; + + *rback_num *= sign; + } + + *rname_end = name_end; + *src = p; + return (exist_level ? 1 : 0); + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_BACKREF_WITH_LEVEL */ + +/* + def: 0 -> define name (don't allow number name) + 1 -> reference name (allow number name) +*/ +static int +fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) +{ + int r, is_num, sign; + OnigCodePoint end_code; + OnigCodePoint c = 0; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *pnum_head; + UChar *p = *src; + PFETCH_READY; + + *rback_num = 0; + + end_code = get_name_end_code_point(start_code); + + name_end = end; + pnum_head = *src; + r = 0; + is_num = 0; + sign = 1; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (ref == 1) + is_num = 1; + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (c == '-') { + if (ref == 1) { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (r == 0) { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } + + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else + r = ONIGERR_INVALID_GROUP_NAME; + + is_num = 0; + } + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + } + + if (c != end_code) { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + + *rback_num *= sign; + } + + *rname_end = name_end; + *src = p; + return 0; + } + else { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') + break; + } + if (PEND) + name_end = end; + + err: + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#else +static int +fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) +{ + int r, is_num, sign; + OnigCodePoint end_code; + OnigCodePoint c = 0; + UChar *name_end; + OnigEncoding enc = env->enc; + UChar *pnum_head; + UChar *p = *src; + PFETCH_READY; + + *rback_num = 0; + + end_code = get_name_end_code_point(start_code); + + *rname_end = name_end = end; + r = 0; + pnum_head = *src; + is_num = 0; + sign = 1; + + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + + PFETCH(c); + if (c == end_code || c == ')') break; + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + if (r == 0 && c != end_code) { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + + if (r == 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + *rback_num *= sign; + + *rname_end = name_end; + *src = p; + return 0; + } + else { + err: + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_NAMED_GROUP */ + +static void +CC_ESC_WARN(ScanEnv* env, UChar *c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { + UChar buf[WARN_BUFSIZE]; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"character class has '%s' without escape", c); + (*onig_warn)((char* )buf); + } +} + +static void +CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) +{ + if (onig_warn == onig_null_warn) return ; + + if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { + UChar buf[WARN_BUFSIZE]; + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, + (env)->pattern, (env)->pattern_end, + (UChar* )"regular expression has '%s' without escape", c); + (*onig_warn)((char* )buf); + } +} + +static UChar* +find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, + UChar **next, OnigEncoding enc) +{ + int i; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + while (p < to) { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enclen(enc, p); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enclen(enc, q); + } + if (i >= n) { + if (IS_NOT_NULL(next)) + *next = q; + return p; + } + } + p = q; + } + return NULL_UCHARP; +} + +static int +str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, + OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn) +{ + int i, in_esc; + OnigCodePoint x; + UChar *q; + UChar *p = from; + + in_esc = 0; + while (p < to) { + if (in_esc) { + in_esc = 0; + p += enclen(enc, p); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + q = p + enclen(enc, p); + if (x == s[0]) { + for (i = 1; i < n && q < to; i++) { + x = ONIGENC_MBC_TO_CODE(enc, q, to); + if (x != s[i]) break; + q += enclen(enc, q); + } + if (i >= n) return 1; + p += enclen(enc, p); + } + else { + x = ONIGENC_MBC_TO_CODE(enc, p, to); + if (x == bad) return 0; + else if (x == MC_ESC(syn)) in_esc = 1; + p = q; + } + } + } + return 0; +} + +static int +fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int num; + OnigCodePoint c, c2; + OnigSyntaxType* syn = env->syntax; + OnigEncoding enc = env->enc; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + PFETCH(c); + tok->type = TK_CHAR; + tok->base = 0; + tok->u.c = c; + tok->escaped = 0; + + if (c == ']') { + tok->type = TK_CC_CLOSE; + } + else if (c == '-') { + tok->type = TK_CC_RANGE; + } + else if (c == MC_ESC(syn)) { + if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) + goto end; + + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + PFETCH(c); + tok->escaped = 1; + tok->u.c = c; + switch (c) { + case 'w': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; + tok->u.prop.not = 0; + break; + case 'W': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; + tok->u.prop.not = 1; + break; + case 'd': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; + tok->u.prop.not = 0; + break; + case 'D': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; + tok->u.prop.not = 1; + break; + case 's': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; + tok->u.prop.not = 0; + break; + case 'S': + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; + tok->u.prop.not = 1; + break; + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 0; + break; + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 1; + break; + + case 'p': + case 'P': + c2 = PPEEK; + if (c2 == '{' && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + c2 = PPEEK; + if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { + PINC; + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '0': + case '1': case '2': case '3': case '4': case '5': case '6': case '7': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + PUNFETCH; + prev = p; + num = scan_unsigned_octal_number(&p, end, 3, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + if (tok->u.c != num) { + tok->u.code = (OnigCodePoint )num; + tok->type = TK_CODE_POINT; + } + break; + } + } + else if (c == '[') { + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { + OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; + tok->backp = p; /* point at '[' is readed */ + PINC; + if (str_exist_check_with_esc(send, 2, p, end, + (OnigCodePoint )']', enc, syn)) { + tok->type = TK_POSIX_BRACKET_OPEN; + } + else { + PUNFETCH; + goto cc_in_cc; + } + } + else { + cc_in_cc: + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { + tok->type = TK_CC_CC_OPEN; + } + else { + CC_ESC_WARN(env, (UChar* )"["); + } + } + } + else if (c == '&') { + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && + !PEND && (PPEEK_IS('&'))) { + PINC; + tok->type = TK_CC_AND; + } + } + + end: + *src = p; + return tok->type; +} + +static int +fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) +{ + int r, num; + OnigCodePoint c; + OnigEncoding enc = env->enc; + OnigSyntaxType* syn = env->syntax; + UChar* prev; + UChar* p = *src; + PFETCH_READY; + + start: + if (PEND) { + tok->type = TK_EOT; + return tok->type; + } + + tok->type = TK_STRING; + tok->base = 0; + tok->backp = p; + + PFETCH(c); + if (IS_MC_ESC_CODE(c, syn)) { + if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; + + tok->backp = p; + PFETCH(c); + + tok->u.c = c; + tok->escaped = 1; + switch (c) { + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + greedy_check: + if (!PEND && PPEEK_IS('?') && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { + PFETCH(c); + tok->u.repeat.greedy = 0; + tok->u.repeat.possessive = 0; + } + else { + possessive_check: + if (!PEND && PPEEK_IS('+') && + ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && + tok->type != TK_INTERVAL) || + (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && + tok->type == TK_INTERVAL))) { + PFETCH(c); + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 1; + } + else { + tok->u.repeat.greedy = 1; + tok->u.repeat.possessive = 0; + } + } + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; + r = fetch_range_quantifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case 'w': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; + tok->u.prop.not = 0; + break; + + case 'W': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_WORD; + tok->u.prop.not = 1; + break; + + case 'b': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BOUND; + break; + + case 'B': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_NOT_WORD_BOUND; + break; + +#ifdef USE_WORD_BEGIN_END + case '<': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_BEGIN; + break; + + case '>': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_WORD_END; + break; +#endif + + case 's': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; + tok->u.prop.not = 0; + break; + + case 'S': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_SPACE; + tok->u.prop.not = 1; + break; + + case 'd': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; + tok->u.prop.not = 0; + break; + + case 'D': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT; + tok->u.prop.not = 1; + break; + + case 'h': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 0; + break; + + case 'H': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; + tok->type = TK_CHAR_TYPE; + tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; + tok->u.prop.not = 1; + break; + + case 'A': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + begin_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_BUF; + break; + + case 'Z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_SEMI_END_BUF; + break; + + case 'z': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; + end_buf: + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_END_BUF; + break; + + case 'G': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = ANCHOR_BEGIN_POSITION; + break; + + case '`': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto begin_buf; + break; + + case '\'': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; + goto end_buf; + break; + + case 'x': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { + PINC; + num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { + PINC; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { + num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 16; + tok->u.c = num; + } + break; + + case 'u': + if (PEND) break; + + prev = p; + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { + num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_CODE_POINT; + tok->base = 16; + tok->u.code = (OnigCodePoint )num; + } + break; + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + PUNFETCH; + prev = p; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { + goto skip_backref; + } + + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && + (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) + return ONIGERR_INVALID_BACKREF; + } + + tok->type = TK_BACKREF; + tok->u.backref.num = 1; + tok->u.backref.ref1 = num; + tok->u.backref.by_name = 0; +#ifdef USE_BACKREF_WITH_LEVEL + tok->u.backref.exist_level = 0; +#endif + break; + } + + skip_backref: + if (c == '8' || c == '9') { + /* normal char */ + p = prev; PINC; + break; + } + + p = prev; + /* fall through */ + case '0': + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { + prev = p; + num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (p == prev) { /* can't read nothing. */ + num = 0; /* but, it's not error */ + } + tok->type = TK_RAW_BYTE; + tok->base = 8; + tok->u.c = num; + } + else if (c != '0') { + PINC; + } + break; + +#ifdef USE_NAMED_GROUP + case 'k': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { + PFETCH(c); + if (c == '<' || c == '\'') { + UChar* name_end; + int* backs; + int back_num; + + prev = p; + +#ifdef USE_BACKREF_WITH_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, + env, &back_num, &tok->u.backref.level); + if (r == 1) tok->u.backref.exist_level = 1; + else tok->u.backref.exist_level = 0; +#else + r = fetch_name(&p, end, &name_end, env, &back_num, 1); +#endif + if (r < 0) return r; + + if (back_num != 0) { + if (back_num < 0) { + back_num = BACKREF_REL_TO_ABS(back_num, env); + if (back_num <= 0) + return ONIGERR_INVALID_BACKREF; + } + + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (back_num > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[back_num])) + return ONIGERR_INVALID_BACKREF; + } + tok->type = TK_BACKREF; + tok->u.backref.by_name = 0; + tok->u.backref.num = 1; + tok->u.backref.ref1 = back_num; + } + else { + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); + if (num <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + int i; + for (i = 0; i < num; i++) { + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + return ONIGERR_INVALID_BACKREF; + } + } + + tok->type = TK_BACKREF; + tok->u.backref.by_name = 1; + if (num == 1) { + tok->u.backref.num = 1; + tok->u.backref.ref1 = backs[0]; + } + else { + tok->u.backref.num = num; + tok->u.backref.refs = backs; + } + } + } + else + PUNFETCH; + } + break; +#endif + +#ifdef USE_SUBEXP_CALL + case 'g': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { + PFETCH(c); + if (c == '<' || c == '\'') { + int gnum; + UChar* name_end; + + prev = p; + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); + if (r < 0) return r; + + tok->type = TK_CALL; + tok->u.call.name = prev; + tok->u.call.name_end = name_end; + tok->u.call.gnum = gnum; + } + else + PUNFETCH; + } + break; +#endif + + case 'Q': + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { + tok->type = TK_QUOTE_OPEN; + } + break; + + case 'p': + case 'P': + if (PPEEK_IS('{') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c); + if (c == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + default: + PUNFETCH; + num = fetch_escaped_value(&p, end, env); + if (num < 0) return num; + /* set_raw: */ + if (tok->u.c != num) { + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { /* string */ + p = tok->backp + enclen(enc, tok->backp); + } + break; + } + } + else { + tok->u.c = c; + tok->escaped = 0; + +#ifdef USE_VARIABLE_META_CHARS + if ((c != ONIG_INEFFECTIVE_META_CHAR) && + IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { + if (c == MC_ANYCHAR(syn)) + goto any_char; + else if (c == MC_ANYTIME(syn)) + goto anytime; + else if (c == MC_ZERO_OR_ONE_TIME(syn)) + goto zero_or_one_time; + else if (c == MC_ONE_OR_MORE_TIME(syn)) + goto one_or_more_time; + else if (c == MC_ANYCHAR_ANYTIME(syn)) { + tok->type = TK_ANYCHAR_ANYTIME; + goto out; + } + } +#endif + + switch (c) { + case '.': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; +#ifdef USE_VARIABLE_META_CHARS + any_char: +#endif + tok->type = TK_ANYCHAR; + break; + + case '*': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; +#ifdef USE_VARIABLE_META_CHARS + anytime: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '+': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; +#ifdef USE_VARIABLE_META_CHARS + one_or_more_time: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 1; + tok->u.repeat.upper = REPEAT_INFINITE; + goto greedy_check; + break; + + case '?': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; +#ifdef USE_VARIABLE_META_CHARS + zero_or_one_time: +#endif + tok->type = TK_OP_REPEAT; + tok->u.repeat.lower = 0; + tok->u.repeat.upper = 1; + goto greedy_check; + break; + + case '{': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; + r = fetch_range_quantifier(&p, end, tok, env); + if (r < 0) return r; /* error */ + if (r == 0) goto greedy_check; + else if (r == 2) { /* {n} */ + if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) + goto possessive_check; + + goto greedy_check; + } + /* r == 1 : normal char */ + break; + + case '|': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break; + tok->type = TK_ALT; + break; + + case '(': + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PPEEK_IS('#')) { + PFETCH(c); + while (1) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + if (c == MC_ESC(syn)) { + if (!PEND) PFETCH(c); + } + else { + if (c == ')') break; + } + } + goto start; + } + PUNFETCH; + } + + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_OPEN; + break; + + case ')': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; + tok->type = TK_SUBEXP_CLOSE; + break; + + case '^': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); + break; + + case '$': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; + tok->type = TK_ANCHOR; + tok->u.subtype = (IS_SINGLELINE(env->option) + ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); + break; + + case '[': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; + tok->type = TK_CC_OPEN; + break; + + case ']': + if (*src > env->pattern) /* /].../ is allowed. */ + CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); + break; + + case '#': + if (IS_EXTEND(env->option)) { + while (!PEND) { + PFETCH(c); + if (ONIGENC_IS_CODE_NEWLINE(enc, c)) + break; + } + goto start; + break; + } + break; + + case ' ': case '\t': case '\n': case '\r': case '\f': + if (IS_EXTEND(env->option)) + goto start; + break; + + default: + /* string */ + break; + } + } + +#ifdef USE_VARIABLE_META_CHARS + out: +#endif + *src = p; + return tok->type; +} + +static int +add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, + OnigEncoding enc ARG_UNUSED, + OnigCodePoint sb_out, const OnigCodePoint mbr[]) +{ + int i, r; + OnigCodePoint j; + + int n = ONIGENC_CODE_RANGE_NUM(mbr); + + if (not == 0) { + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); + j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { + if (j >= sb_out) { + if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++; + else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), j, + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + i++; + } + + goto sb_end; + } + BITSET_SET_BIT(cc->bs, j); + } + } + + sb_end: + for ( ; i < n; i++) { + r = add_code_range_to_buf(&(cc->mbuf), + ONIGENC_CODE_RANGE_FROM(mbr, i), + ONIGENC_CODE_RANGE_TO(mbr, i)); + if (r != 0) return r; + } + } + else { + OnigCodePoint prev = 0; + + for (i = 0; i < n; i++) { + for (j = prev; + j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { + if (j >= sb_out) { + goto sb_end2; + } + BITSET_SET_BIT(cc->bs, j); + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + } + for (j = prev; j < sb_out; j++) { + BITSET_SET_BIT(cc->bs, j); + } + + sb_end2: + prev = sb_out; + + for (i = 0; i < n; i++) { + if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { + r = add_code_range_to_buf(&(cc->mbuf), prev, + ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); + if (r != 0) return r; + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + } + if (prev < 0x7fffffff) { + r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); + if (r != 0) return r; + } + } + + return 0; +} + +static int +add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) +{ + int c, r; + const OnigCodePoint *ranges; + OnigCodePoint sb_out; + OnigEncoding enc = env->enc; + + r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); + if (r == 0) { + return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges); + } + else if (r != ONIG_NO_SUPPORT_CONFIG) { + return r; + } + + r = 0; + switch (ctype) { + case ONIGENC_CTYPE_ALPHA: + case ONIGENC_CTYPE_BLANK: + case ONIGENC_CTYPE_CNTRL: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_LOWER: + case ONIGENC_CTYPE_PUNCT: + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_UPPER: + case ONIGENC_CTYPE_XDIGIT: + case ONIGENC_CTYPE_ASCII: + case ONIGENC_CTYPE_ALNUM: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + case ONIGENC_CTYPE_GRAPH: + case ONIGENC_CTYPE_PRINT: + if (not != 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + break; + + case ONIGENC_CTYPE_WORD: + if (not == 0) { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); + } + else { + for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ + && ! ONIGENC_IS_CODE_WORD(enc, c)) + BITSET_SET_BIT(cc->bs, c); + } + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + return r; +} + +static int +parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) +{ +#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 +#define POSIX_BRACKET_NAME_MIN_LEN 4 + + static PosixBracketEntryType PBS[] = { + { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 }, + { (UChar* )NULL, -1, 0 } + }; + + PosixBracketEntryType *pb; + int not, i, r; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *p = *src; + PFETCH_READY; + + if (PPEEK_IS('^')) { + PINC; + not = 1; + } + else + not = 0; + + if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3) + goto not_posix_bracket; + + for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { + if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { + p = (UChar* )onigenc_step(enc, p, end, pb->len); + if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + + r = add_ctype_to_cc(cc, pb->ctype, not, env); + if (r != 0) return r; + + PINC; PINC; + *src = p; + return 0; + } + } + + not_posix_bracket: + c = 0; + i = 0; + while (!PEND && ((c = PPEEK) != ':') && c != ']') { + PINC; + if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; + } + if (c == ':' && ! PEND) { + PINC; + if (! PEND) { + PFETCH(c); + if (c == ']') + return ONIGERR_INVALID_POSIX_BRACKET_TYPE; + } + } + + return 1; /* 1: is not POSIX bracket, but no error. */ +} + +static int +fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) +{ + int r; + OnigCodePoint c; + OnigEncoding enc = env->enc; + UChar *prev, *start, *p = *src; + PFETCH_READY; + + r = 0; + start = prev = p; + + while (!PEND) { + prev = p; + PFETCH(c); + if (c == '}') { + r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev); + if (r < 0) break; + + *src = p; + return r; + } + else if (c == '(' || c == ')' || c == '{' || c == '|') { + r = ONIGERR_INVALID_CHAR_PROPERTY_NAME; + break; + } + } + + onig_scan_env_set_error_string(env, r, *src, prev); + return r; +} + +static int +parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, ctype; + CClassNode* cc; + + ctype = fetch_char_property_to_ctype(src, end, env); + if (ctype < 0) return ctype; + + *np = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + r = add_ctype_to_cc(cc, ctype, 0, env); + if (r != 0) return r; + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); + + return 0; +} + + +enum CCSTATE { + CCS_VALUE, + CCS_RANGE, + CCS_COMPLETE, + CCS_START +}; + +enum CCVALTYPE { + CCV_SB, + CCV_CODE_POINT, + CCV_CLASS +}; + +static int +next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + if (*state == CCS_RANGE) + return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; + + if (*state == CCS_VALUE && *type != CCV_CLASS) { + if (*type == CCV_SB) + BITSET_SET_BIT(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + } + + *state = CCS_VALUE; + *type = CCV_CLASS; + return 0; +} + +static int +next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, + int* vs_israw, int v_israw, + enum CCVALTYPE intype, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) +{ + int r; + + switch (*state) { + case CCS_VALUE: + if (*type == CCV_SB) + BITSET_SET_BIT(cc->bs, (int )(*vs)); + else if (*type == CCV_CODE_POINT) { + r = add_code_range(&(cc->mbuf), env, *vs, *vs); + if (r < 0) return r; + } + break; + + case CCS_RANGE: + if (intype == *type) { + if (intype == CCV_SB) { + if (*vs > 0xff || v > 0xff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(cc->bs, (int )*vs, (int )v); + } + else { + r = add_code_range(&(cc->mbuf), env, *vs, v); + if (r < 0) return r; + } + } + else { +#if 0 + if (intype == CCV_CODE_POINT && *type == CCV_SB) { +#endif + if (*vs > v) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; + } + bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); + r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); + if (r < 0) return r; +#if 0 + } + else + return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; +#endif + } + ccs_range_end: + *state = CCS_COMPLETE; + break; + + case CCS_COMPLETE: + case CCS_START: + *state = CCS_VALUE; + break; + + default: + break; + } + + *vs_israw = v_israw; + *vs = v; + *type = intype; + return 0; +} + +static int +code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, + ScanEnv* env) +{ + int in_esc; + OnigCodePoint code; + OnigEncoding enc = env->enc; + UChar* p = from; + PFETCH_READY; + + in_esc = 0; + while (! PEND) { + if (ignore_escaped && in_esc) { + in_esc = 0; + } + else { + PFETCH(code); + if (code == c) return 1; + if (code == MC_ESC(env->syntax)) in_esc = 1; + } + } + return 0; +} + +static int +parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, neg, len, fetched, and_start; + OnigCodePoint v, vs; + UChar *p; + Node* node; + CClassNode *cc, *prev_cc; + CClassNode work_cc; + + enum CCSTATE state; + enum CCVALTYPE val_type, in_type; + int val_israw, in_israw; + + prev_cc = (CClassNode* )NULL; + *np = NULL_NODE; + r = fetch_token_in_cc(tok, src, end, env); + if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { + neg = 1; + r = fetch_token_in_cc(tok, src, end, env); + } + else { + neg = 0; + } + + if (r < 0) return r; + if (r == TK_CC_CLOSE) { + if (! code_exist_check((OnigCodePoint )']', + *src, env->pattern_end, 1, env)) + return ONIGERR_EMPTY_CHAR_CLASS; + + CC_ESC_WARN(env, (UChar* )"]"); + r = tok->type = TK_CHAR; /* allow []...] */ + } + + *np = node = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(node); + cc = NCCLASS(node); + + and_start = 0; + state = CCS_START; + p = *src; + while (r != TK_CC_CLOSE) { + fetched = 0; + switch (r) { + case TK_CHAR: + len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c); + if (len > 1) { + in_type = CCV_CODE_POINT; + } + else if (len < 0) { + r = len; + goto err; + } + else { + sb_char: + in_type = CCV_SB; + } + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry2; + break; + + case TK_RAW_BYTE: + /* tok->base != 0 : octal or hexadec. */ + if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; + UChar* psave = p; + int i, base = tok->base; + + buf[0] = tok->u.c; + for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + if (r != TK_RAW_BYTE || tok->base != base) { + fetched = 1; + break; + } + buf[i] = tok->u.c; + } + + if (i < ONIGENC_MBC_MINLEN(env->enc)) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + + len = enclen(env->enc, buf); + if (i < len) { + r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + goto err; + } + else if (i > len) { /* fetch back */ + p = psave; + for (i = 1; i < len; i++) { + r = fetch_token_in_cc(tok, &p, end, env); + } + fetched = 0; + } + + if (i == 1) { + v = (OnigCodePoint )buf[0]; + goto raw_single; + } + else { + v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); + in_type = CCV_CODE_POINT; + } + } + else { + v = (OnigCodePoint )tok->u.c; + raw_single: + in_type = CCV_SB; + } + in_israw = 1; + goto val_entry2; + break; + + case TK_CODE_POINT: + v = tok->u.code; + in_israw = 1; + val_entry: + len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); + if (len < 0) { + r = len; + goto err; + } + in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); + val_entry2: + r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, + &state, env); + if (r != 0) goto err; + break; + + case TK_POSIX_BRACKET_OPEN: + r = parse_posix_bracket(cc, &p, end, env); + if (r < 0) goto err; + if (r == 1) { /* is not POSIX bracket */ + CC_ESC_WARN(env, (UChar* )"["); + p = tok->backp; + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + goto val_entry; + } + goto next_class; + break; + + case TK_CHAR_TYPE: + r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env); + if (r != 0) return r; + + next_class: + r = next_state_class(cc, &vs, &val_type, &state, env); + if (r != 0) goto err; + break; + + case TK_CHAR_PROPERTY: + { + int ctype; + + ctype = fetch_char_property_to_ctype(&p, end, env); + if (ctype < 0) return ctype; + r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); + if (r != 0) return r; + goto next_class; + } + break; + + case TK_CC_RANGE: + if (state == CCS_VALUE) { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) { /* allow [x-] */ + range_end_val: + v = (OnigCodePoint )'-'; + in_israw = 0; + goto val_entry; + } + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + state = CCS_RANGE; + } + else if (state == CCS_START) { + /* [-xa] is allowed */ + v = (OnigCodePoint )tok->u.c; + in_israw = 0; + + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + /* [--x] or [a&&-x] is warned. */ + if (r == TK_CC_RANGE || and_start != 0) + CC_ESC_WARN(env, (UChar* )"-"); + + goto val_entry; + } + else if (state == CCS_RANGE) { + CC_ESC_WARN(env, (UChar* )"-"); + goto sb_char; /* [!--x] is allowed */ + } + else { /* CCS_COMPLETE */ + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + fetched = 1; + if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ + else if (r == TK_CC_AND) { + CC_ESC_WARN(env, (UChar* )"-"); + goto range_end_val; + } + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { + CC_ESC_WARN(env, (UChar* )"-"); + goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ + } + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + goto err; + } + break; + + case TK_CC_CC_OPEN: /* [ */ + { + Node *anode; + CClassNode* acc; + + r = parse_char_class(&anode, tok, &p, end, env); + if (r != 0) goto cc_open_err; + acc = NCCLASS(anode); + r = or_cclass(cc, acc, env->enc); + + onig_node_free(anode); + cc_open_err: + if (r != 0) goto err; + } + break; + + case TK_CC_AND: /* && */ + { + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + /* initialize local variables */ + and_start = 1; + state = CCS_START; + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env->enc); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + } + else { + prev_cc = cc; + cc = &work_cc; + } + initialize_cclass(cc); + } + break; + + case TK_EOT: + r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS; + goto err; + break; + default: + r = ONIGERR_PARSER_BUG; + goto err; + break; + } + + if (fetched) + r = tok->type; + else { + r = fetch_token_in_cc(tok, &p, end, env); + if (r < 0) goto err; + } + } + + if (state == CCS_VALUE) { + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, + &val_type, &state, env); + if (r != 0) goto err; + } + + if (IS_NOT_NULL(prev_cc)) { + r = and_cclass(prev_cc, cc, env->enc); + if (r != 0) goto err; + bbuf_free(cc->mbuf); + cc = prev_cc; + } + + if (neg != 0) + NCCLASS_SET_NOT(cc); + else + NCCLASS_CLEAR_NOT(cc); + if (IS_NCCLASS_NOT(cc) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { + int is_empty; + + is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); + if (is_empty != 0) + BITSET_IS_EMPTY(cc->bs, is_empty); + + if (is_empty == 0) { +#define NEWLINE_CODE 0x0a + + if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { + if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) + BITSET_SET_BIT(cc->bs, NEWLINE_CODE); + else + add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); + } + } + } + *src = p; + return 0; + + err: + if (cc != NCCLASS(*np)) + bbuf_free(cc->mbuf); + onig_node_free(*np); + return r; +} + +static int parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env); + +static int +parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) +{ + int r, num; + Node *target; + OnigOptionType option; + OnigCodePoint c; + OnigEncoding enc = env->enc; + +#ifdef USE_NAMED_GROUP + int list_capture; +#endif + + UChar* p = *src; + PFETCH_READY; + + *np = NULL; + if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + + option = env->option; + if (PPEEK_IS('?') && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + PINC; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + PFETCH(c); + switch (c) { + case ':': /* (?:...) grouping only */ + group: + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(np, tok, term, &p, end, env); + if (r < 0) return r; + *src = p; + return 1; /* group */ + break; + + case '=': + *np = onig_node_new_anchor(ANCHOR_PREC_READ); + break; + case '!': /* preceding read */ + *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); + break; + case '>': /* (?>...) stop backtrack */ + *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + break; + +#ifdef USE_NAMED_GROUP + case '\'': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + goto named_group1; + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; +#endif + + case '<': /* look behind (?<=...), (?syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + UChar *name; + UChar *name_end; + + PUNFETCH; + c = '<'; + + named_group1: + list_capture = 0; + + named_group2: + name = p; + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); + if (r < 0) return r; + + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + + r = name_add(env->reg, name, name_end, num, env); + if (r != 0) return r; + *np = node_new_enclose_memory(env->option, 1); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->regnum = num; + if (list_capture != 0) + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + env->num_named++; + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + } +#else + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } +#endif + break; + + case '@': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { +#ifdef USE_NAMED_GROUP + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { + PFETCH(c); + if (c == '<' || c == '\'') { + list_capture = 1; + goto named_group2; /* (?@...) */ + } + PUNFETCH; + } +#endif + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); + num = scan_env_add_mem_entry(env); + if (num < 0) { + onig_node_free(*np); + return num; + } + else if (num >= (int )BIT_STATUS_BITS_NUM) { + onig_node_free(*np); + return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; + } + NENCLOSE(*np)->regnum = num; + BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + break; + +#ifdef USE_POSIXLINE_OPTION + case 'p': +#endif + case '-': case 'i': case 'm': case 's': case 'x': + { + int neg = 0; + + while (1) { + switch (c) { + case ':': + case ')': + break; + + case '-': neg = 1; break; + case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; + case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; + case 's': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; + + case 'm': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { + ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); + } + else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { + ONOFF(option, ONIG_OPTION_MULTILINE, neg); + } + else + return ONIGERR_UNDEFINED_GROUP_OPTION; + break; +#ifdef USE_POSIXLINE_OPTION + case 'p': + ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); + break; +#endif + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + + if (c == ')') { + *np = node_new_option(option); + CHECK_NULL_RETURN_MEMERR(*np); + *src = p; + return 2; /* option only */ + } + else if (c == ':') { + OnigOptionType prev = env->option; + + env->option = option; + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + env->option = prev; + if (r < 0) return r; + *np = node_new_option(option); + CHECK_NULL_RETURN_MEMERR(*np); + NENCLOSE(*np)->target = target; + *src = p; + return 0; + } + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + } + } + break; + + default: + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + } + else { + if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) + goto group; + + *np = node_new_enclose_memory(env->option, 0); + CHECK_NULL_RETURN_MEMERR(*np); + num = scan_env_add_mem_entry(env); + if (num < 0) return num; + NENCLOSE(*np)->regnum = num; + } + + CHECK_NULL_RETURN_MEMERR(*np); + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, &p, end, env); + if (r < 0) return r; + + if (NTYPE(*np) == NT_ANCHOR) + NANCHOR(*np)->target = target; + else { + NENCLOSE(*np)->target = target; + if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { + /* Don't move this to previous of parse_subexp() */ + r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); + if (r != 0) return r; + } + } + + *src = p; + return 0; +} + +static const char* PopularQStr[] = { + "?", "*", "+", "??", "*?", "+?" +}; + +static const char* ReduceQStr[] = { + "", "", "*", "*?", "??", "+ and ??", "+? and ?" +}; + +static int +set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) +{ + QtfrNode* qn; + + qn = NQTFR(qnode); + if (qn->lower == 1 && qn->upper == 1) { + return 1; + } + + switch (NTYPE(target)) { + case NT_STR: + if (! group) { + StrNode* sn = NSTR(target); + if (str_node_can_be_split(sn, env->enc)) { + Node* n = str_node_split_last_char(sn, env->enc); + if (IS_NOT_NULL(n)) { + qn->target = n; + return 2; + } + } + } + break; + + case NT_QTFR: + { /* check redundant double repeat. */ + /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ + QtfrNode* qnt = NQTFR(target); + int nestq_num = popular_quantifier_num(qn); + int targetq_num = popular_quantifier_num(qnt); + +#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR + if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { + UChar buf[WARN_BUFSIZE]; + + switch(ReduceTypeTable[targetq_num][nestq_num]) { + case RQ_ASIS: + break; + + case RQ_DEL: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"redundant nested repeat operator"); + (*onig_verb_warn)((char* )buf); + } + goto warn_exit; + break; + + default: + if (onig_verb_warn != onig_null_warn) { + onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, + env->pattern, env->pattern_end, + (UChar* )"nested repeat operator %s and %s was replaced with '%s'", + PopularQStr[targetq_num], PopularQStr[nestq_num], + ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); + (*onig_verb_warn)((char* )buf); + } + goto warn_exit; + break; + } + } + + warn_exit: +#endif + if (targetq_num >= 0) { + if (nestq_num >= 0) { + onig_reduce_nested_quantifier(qnode, target); + goto q_exit; + } + else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ + /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ + if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + } + } + } + } + break; + + default: + break; + } + + qn->target = target; + q_exit: + return 0; +} + + +#ifdef USE_SHARED_CCLASS_TABLE + +#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 + +/* for ctype node hash table */ + +typedef struct { + OnigEncoding enc; + int not; + int type; +} type_cclass_key; + +static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) +{ + if (x->type != y->type) return 1; + if (x->enc != y->enc) return 1; + if (x->not != y->not) return 1; + return 0; +} + +static int type_cclass_hash(type_cclass_key* key) +{ + int i, val; + UChar *p; + + val = 0; + + p = (UChar* )&(key->enc); + for (i = 0; i < (int )sizeof(key->enc); i++) { + val = val * 997 + (int )*p++; + } + + p = (UChar* )(&key->type); + for (i = 0; i < (int )sizeof(key->type); i++) { + val = val * 997 + (int )*p++; + } + + val += key->not; + return val + (val >> 5); +} + +static struct st_hash_type type_type_cclass_hash = { + type_cclass_cmp, + type_cclass_hash, +}; + +static st_table* OnigTypeCClassTable; + + +static int +i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED) +{ + if (IS_NOT_NULL(node)) { + CClassNode* cc = NCCLASS(node); + if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); + xfree(node); + } + + if (IS_NOT_NULL(key)) xfree(key); + return ST_DELETE; +} + +extern int +onig_free_shared_cclass_table(void) +{ + if (IS_NOT_NULL(OnigTypeCClassTable)) { + onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); + onig_st_free_table(OnigTypeCClassTable); + OnigTypeCClassTable = NULL; + } + + return 0; +} + +#endif /* USE_SHARED_CCLASS_TABLE */ + + +#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS +static int +clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) +{ + BBuf *tbuf; + int r; + + if (IS_NCCLASS_NOT(cc)) { + bitset_invert(cc->bs); + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + r = not_code_range_buf(enc, cc->mbuf, &tbuf); + if (r != 0) return r; + + bbuf_free(cc->mbuf); + cc->mbuf = tbuf; + } + + NCCLASS_CLEAR_NOT(cc); + } + + return 0; +} +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ + +typedef struct { + ScanEnv* env; + CClassNode* cc; + Node* alt_root; + Node** ptail; +} IApplyCaseFoldArg; + +static int +i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], + int to_len, void* arg) +{ + IApplyCaseFoldArg* iarg; + ScanEnv* env; + CClassNode* cc; + BitSetRef bs; + + iarg = (IApplyCaseFoldArg* )arg; + env = iarg->env; + cc = iarg->cc; + bs = cc->bs; + + if (to_len == 1) { + int is_in = onig_is_code_in_cc(env->enc, from, cc); +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || + (is_in == 0 && IS_NCCLASS_NOT(cc))) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + add_code_range(&(cc->mbuf), env, *to, *to); + } + else { + BITSET_SET_BIT(bs, *to); + } + } +#else + if (is_in != 0) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); + add_code_range(&(cc->mbuf), env, *to, *to); + } + else { + if (IS_NCCLASS_NOT(cc)) { + BITSET_CLEAR_BIT(bs, *to); + } + else + BITSET_SET_BIT(bs, *to); + } + } +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ + } + else { + int r, i, len; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + Node *snode = NULL_NODE; + + if (onig_is_code_in_cc(env->enc, from, cc) +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + && !IS_NCCLASS_NOT(cc) +#endif + ) { + for (i = 0; i < to_len; i++) { + len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); + if (i == 0) { + snode = onig_node_new_str(buf, buf + len); + CHECK_NULL_RETURN_MEMERR(snode); + + /* char-class expanded multi-char only + compare with string folded at match time. */ + NSTRING_SET_AMBIG(snode); + } + else { + r = onig_node_str_cat(snode, buf, buf + len); + if (r < 0) { + onig_node_free(snode); + return r; + } + } + } + + *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); + CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); + iarg->ptail = &(NCDR((*(iarg->ptail)))); + } + } + + return 0; +} + +static int +parse_exp(Node** np, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r, len, group = 0; + Node* qn; + Node** targetp; + + *np = NULL; + if (tok->type == (enum TokenSyms )term) + goto end_of_token; + + switch (tok->type) { + case TK_ALT: + case TK_EOT: + end_of_token: + *np = node_new_empty(); + return tok->type; + break; + + case TK_SUBEXP_OPEN: + r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); + if (r < 0) return r; + if (r == 1) group = 1; + else if (r == 2) { /* option only */ + Node* target; + OnigOptionType prev = env->option; + + env->option = NENCLOSE(*np)->option; + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(&target, tok, term, src, end, env); + env->option = prev; + if (r < 0) return r; + NENCLOSE(*np)->target = target; + return tok->type; + } + break; + + case TK_SUBEXP_CLOSE: + if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) + return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; + + if (tok->escaped) goto tk_raw_byte; + else goto tk_byte; + break; + + case TK_STRING: + tk_byte: + { + *np = node_new_str(tok->backp, *src); + CHECK_NULL_RETURN_MEMERR(*np); + + while (1) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_STRING) break; + + r = onig_node_str_cat(*np, tok->backp, *src); + if (r < 0) return r; + } + + string_end: + targetp = np; + goto repeat; + } + break; + + case TK_RAW_BYTE: + tk_raw_byte: + { + *np = node_new_str_raw_char((UChar )tok->u.c); + CHECK_NULL_RETURN_MEMERR(*np); + len = 1; + while (1) { + if (len >= ONIGENC_MBC_MINLEN(env->enc)) { + if (len == enclen(env->enc, NSTR(*np)->s)) { + r = fetch_token(tok, src, end, env); + NSTRING_CLEAR_RAW(*np); + goto string_end; + } + } + + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + if (r != TK_RAW_BYTE) { + /* Don't use this, it is wrong for little endian encodings. */ +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR + int rem; + if (len < ONIGENC_MBC_MINLEN(env->enc)) { + rem = ONIGENC_MBC_MINLEN(env->enc) - len; + (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); + if (len + rem == enclen(env->enc, NSTR(*np)->s)) { + NSTRING_CLEAR_RAW(*np); + goto string_end; + } + } +#endif + return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; + } + + r = node_str_cat_char(*np, (UChar )tok->u.c); + if (r < 0) return r; + + len++; + } + } + break; + + case TK_CODE_POINT: + { + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; + int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); + if (num < 0) return num; +#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG + *np = node_new_str_raw(buf, buf + num); +#else + *np = node_new_str(buf, buf + num); +#endif + CHECK_NULL_RETURN_MEMERR(*np); + } + break; + + case TK_QUOTE_OPEN: + { + OnigCodePoint end_op[2]; + UChar *qstart, *qend, *nextp; + + end_op[0] = (OnigCodePoint )MC_ESC(env->syntax); + end_op[1] = (OnigCodePoint )'E'; + qstart = *src; + qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); + if (IS_NULL(qend)) { + nextp = qend = end; + } + *np = node_new_str(qstart, qend); + CHECK_NULL_RETURN_MEMERR(*np); + *src = nextp; + } + break; + + case TK_CHAR_TYPE: + { + switch (tok->u.prop.ctype) { + case ONIGENC_CTYPE_WORD: + *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not); + CHECK_NULL_RETURN_MEMERR(*np); + break; + + case ONIGENC_CTYPE_SPACE: + case ONIGENC_CTYPE_DIGIT: + case ONIGENC_CTYPE_XDIGIT: + { + CClassNode* cc; + +#ifdef USE_SHARED_CCLASS_TABLE + const OnigCodePoint *mbr; + OnigCodePoint sb_out; + + r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype, + &sb_out, &mbr); + if (r == 0 && + ONIGENC_CODE_RANGE_NUM(mbr) + >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { + type_cclass_key key; + type_cclass_key* new_key; + + key.enc = env->enc; + key.not = tok->u.prop.not; + key.type = tok->u.prop.ctype; + + THREAD_ATOMIC_START; + + if (IS_NULL(OnigTypeCClassTable)) { + OnigTypeCClassTable + = onig_st_init_table_with_size(&type_type_cclass_hash, 10); + if (IS_NULL(OnigTypeCClassTable)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + } + else { + if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, + (st_data_t* )np)) { + THREAD_ATOMIC_END; + break; + } + } + + *np = node_new_cclass_by_codepoint_range(tok->u.prop.not, + sb_out, mbr); + if (IS_NULL(*np)) { + THREAD_ATOMIC_END; + return ONIGERR_MEMORY; + } + + cc = NCCLASS(*np); + NCCLASS_SET_SHARE(cc); + new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); + xmemcpy(new_key, &key, sizeof(type_cclass_key)); + onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, + (st_data_t )*np); + + THREAD_ATOMIC_END; + } + else { +#endif + *np = node_new_cclass(); + CHECK_NULL_RETURN_MEMERR(*np); + cc = NCCLASS(*np); + add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); + if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); +#ifdef USE_SHARED_CCLASS_TABLE + } +#endif + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + } + break; + + case TK_CHAR_PROPERTY: + r = parse_char_property(np, tok, src, end, env); + if (r != 0) return r; + break; + + case TK_CC_OPEN: + { + CClassNode* cc; + + r = parse_char_class(np, tok, src, end, env); + if (r != 0) return r; + + cc = NCCLASS(*np); + if (IS_IGNORECASE(env->option)) { + IApplyCaseFoldArg iarg; + + iarg.env = env; + iarg.cc = cc; + iarg.alt_root = NULL_NODE; + iarg.ptail = &(iarg.alt_root); + + r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, + i_apply_case_fold, &iarg); + if (r != 0) { + onig_node_free(iarg.alt_root); + return r; + } + if (IS_NOT_NULL(iarg.alt_root)) { + Node* work = onig_node_new_alt(*np, iarg.alt_root); + if (IS_NULL(work)) { + onig_node_free(iarg.alt_root); + return ONIGERR_MEMORY; + } + *np = work; + } + } + } + break; + + case TK_ANYCHAR: + *np = node_new_anychar(); + CHECK_NULL_RETURN_MEMERR(*np); + break; + + case TK_ANYCHAR_ANYTIME: + *np = node_new_anychar(); + CHECK_NULL_RETURN_MEMERR(*np); + qn = node_new_quantifier(0, REPEAT_INFINITE, 0); + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->target = *np; + *np = qn; + break; + + case TK_BACKREF: + len = tok->u.backref.num; + *np = node_new_backref(len, + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, +#ifdef USE_BACKREF_WITH_LEVEL + tok->u.backref.exist_level, + tok->u.backref.level, +#endif + env); + CHECK_NULL_RETURN_MEMERR(*np); + break; + +#ifdef USE_SUBEXP_CALL + case TK_CALL: + { + int gnum = tok->u.call.gnum; + + if (gnum < 0) { + gnum = BACKREF_REL_TO_ABS(gnum, env); + if (gnum <= 0) + return ONIGERR_INVALID_BACKREF; + } + *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); + CHECK_NULL_RETURN_MEMERR(*np); + env->num_call++; + } + break; +#endif + + case TK_ANCHOR: + *np = onig_node_new_anchor(tok->u.anchor); + break; + + case TK_OP_REPEAT: + case TK_INTERVAL: + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; + else + *np = node_new_empty(); + } + else { + goto tk_byte; + } + break; + + default: + return ONIGERR_PARSER_BUG; + break; + } + + { + targetp = np; + + re_entry: + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + + repeat: + if (r == TK_OP_REPEAT || r == TK_INTERVAL) { + if (is_invalid_quantifier_target(*targetp)) + return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; + + qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, + (r == TK_INTERVAL ? 1 : 0)); + CHECK_NULL_RETURN_MEMERR(qn); + NQTFR(qn)->greedy = tok->u.repeat.greedy; + r = set_quantifier(qn, *targetp, group, env); + if (r < 0) { + onig_node_free(qn); + return r; + } + + if (tok->u.repeat.possessive != 0) { + Node* en; + en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + if (IS_NULL(en)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + NENCLOSE(en)->target = qn; + qn = en; + } + + if (r == 0) { + *targetp = qn; + } + else if (r == 1) { + onig_node_free(qn); + } + else if (r == 2) { /* split case: /abc+/ */ + Node *tmp; + + *targetp = node_new_list(*targetp, NULL); + if (IS_NULL(*targetp)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + tmp = NCDR(*targetp) = node_new_list(qn, NULL); + if (IS_NULL(tmp)) { + onig_node_free(qn); + return ONIGERR_MEMORY; + } + targetp = &(NCAR(tmp)); + } + goto re_entry; + } + } + + return r; +} + +static int +parse_branch(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) return r; + + if (r == TK_EOT || r == term || r == TK_ALT) { + *top = node; + } + else { + *top = node_new_list(node, NULL); + headp = &(NCDR(*top)); + while (r != TK_EOT && r != term && r != TK_ALT) { + r = parse_exp(&node, tok, term, src, end, env); + if (r < 0) return r; + + if (NTYPE(node) == NT_LIST) { + *headp = node; + while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); + headp = &(NCDR(node)); + } + else { + *headp = node_new_list(node, NULL); + headp = &(NCDR(*headp)); + } + } + } + + return r; +} + +/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ +static int +parse_subexp(Node** top, OnigToken* tok, int term, + UChar** src, UChar* end, ScanEnv* env) +{ + int r; + Node *node, **headp; + + *top = NULL; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) { + onig_node_free(node); + return r; + } + + if (r == term) { + *top = node; + } + else if (r == TK_ALT) { + *top = onig_node_new_alt(node, NULL); + headp = &(NCDR(*top)); + while (r == TK_ALT) { + r = fetch_token(tok, src, end, env); + if (r < 0) return r; + r = parse_branch(&node, tok, term, src, end, env); + if (r < 0) return r; + + *headp = onig_node_new_alt(node, NULL); + headp = &(NCDR(*headp)); + } + + if (tok->type != (enum TokenSyms )term) + goto err; + } + else { + err: + if (term == TK_SUBEXP_CLOSE) + return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; + else + return ONIGERR_PARSER_BUG; + } + + return r; +} + +static int +parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) +{ + int r; + OnigToken tok; + + r = fetch_token(&tok, src, end, env); + if (r < 0) return r; + r = parse_subexp(top, &tok, TK_EOT, src, end, env); + if (r < 0) return r; + return 0; +} + +extern int +onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, + regex_t* reg, ScanEnv* env) +{ + int r; + UChar* p; + +#ifdef USE_NAMED_GROUP + names_clear(reg); +#endif + + scan_env_clear(env); + env->option = reg->options; + env->case_fold_flag = reg->case_fold_flag; + env->enc = reg->enc; + env->syntax = reg->syntax; + env->pattern = (UChar* )pattern; + env->pattern_end = (UChar* )end; + env->reg = reg; + + *root = NULL; + p = (UChar* )pattern; + r = parse_regexp(root, &p, (UChar* )end, env); + reg->num_mem = env->num_mem; + return r; +} + +extern void +onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, + UChar* arg, UChar* arg_end) +{ + env->error = arg; + env->error_end = arg_end; +} diff --git a/oniguruma/regparse.h b/oniguruma/regparse.h new file mode 100644 index 0000000..0c5c2c9 --- /dev/null +++ b/oniguruma/regparse.h @@ -0,0 +1,351 @@ +#ifndef REGPARSE_H +#define REGPARSE_H +/********************************************************************** + regparse.h - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +/* node type */ +#define NT_STR 0 +#define NT_CCLASS 1 +#define NT_CTYPE 2 +#define NT_CANY 3 +#define NT_BREF 4 +#define NT_QTFR 5 +#define NT_ENCLOSE 6 +#define NT_ANCHOR 7 +#define NT_LIST 8 +#define NT_ALT 9 +#define NT_CALL 10 + +/* node type bit */ +#define NTYPE2BIT(type) (1<<(type)) + +#define BIT_NT_STR NTYPE2BIT(NT_STR) +#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS) +#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE) +#define BIT_NT_CANY NTYPE2BIT(NT_CANY) +#define BIT_NT_BREF NTYPE2BIT(NT_BREF) +#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR) +#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE) +#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR) +#define BIT_NT_LIST NTYPE2BIT(NT_LIST) +#define BIT_NT_ALT NTYPE2BIT(NT_ALT) +#define BIT_NT_CALL NTYPE2BIT(NT_CALL) + +#define IS_NODE_TYPE_SIMPLE(type) \ + ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ + BIT_NT_CANY | BIT_NT_BREF)) != 0) + +#define NTYPE(node) ((node)->u.base.type) +#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype) + +#define NSTR(node) (&((node)->u.str)) +#define NCCLASS(node) (&((node)->u.cclass)) +#define NCTYPE(node) (&((node)->u.ctype)) +#define NBREF(node) (&((node)->u.bref)) +#define NQTFR(node) (&((node)->u.qtfr)) +#define NENCLOSE(node) (&((node)->u.enclose)) +#define NANCHOR(node) (&((node)->u.anchor)) +#define NCONS(node) (&((node)->u.cons)) +#define NCALL(node) (&((node)->u.call)) + +#define NCAR(node) (NCONS(node)->car) +#define NCDR(node) (NCONS(node)->cdr) + + + +#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) +#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) + +#define ENCLOSE_MEMORY (1<<0) +#define ENCLOSE_OPTION (1<<1) +#define ENCLOSE_STOP_BACKTRACK (1<<2) + +#define NODE_STR_MARGIN 16 +#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_BACKREFS_SIZE 6 + +#define NSTR_RAW (1<<0) /* by backslashed number */ +#define NSTR_AMBIG (1<<1) +#define NSTR_DONT_GET_OPT_INFO (1<<2) + +#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) +#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW +#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW +#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG +#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ + (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO +#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) +#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) +#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ + (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0) + +#define BACKREFS_P(br) \ + (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); + +#define NQ_TARGET_ISNOT_EMPTY 0 +#define NQ_TARGET_IS_EMPTY 1 +#define NQ_TARGET_IS_EMPTY_MEM 2 +#define NQ_TARGET_IS_EMPTY_REC 3 + +/* status bits */ +#define NST_MIN_FIXED (1<<0) +#define NST_MAX_FIXED (1<<1) +#define NST_CLEN_FIXED (1<<2) +#define NST_MARK1 (1<<3) +#define NST_MARK2 (1<<4) +#define NST_MEM_BACKREFED (1<<5) +#define NST_STOP_BT_SIMPLE_REPEAT (1<<6) +#define NST_RECURSION (1<<7) +#define NST_CALLED (1<<8) +#define NST_ADDR_FIXED (1<<9) +#define NST_NAMED_GROUP (1<<10) +#define NST_NAME_REF (1<<11) +#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ +#define NST_NEST_LEVEL (1<<13) +#define NST_BY_NUMBER (1<<14) /* {n,m} */ + +#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f) +#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f) + +#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0) +#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) +#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0) +#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0) +#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0) +#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) +#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) +#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) +#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ + (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) +#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) + +#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION +#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) +#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) +#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) +#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) +#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) + +#define CALLNODE_REFNUM_UNDEF -1 + +typedef struct { + NodeBase base; + UChar* s; + UChar* end; + unsigned int flag; + int capa; /* (allocated size - 1) or 0: use buf[] */ + UChar buf[NODE_STR_BUF_SIZE]; +} StrNode; + +typedef struct { + NodeBase base; + int state; + struct _Node* target; + int lower; + int upper; + int greedy; + int target_empty_info; + struct _Node* head_exact; + struct _Node* next_head_exact; + int is_refered; /* include called node. don't eliminate even if {0} */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ +#endif +} QtfrNode; + +typedef struct { + NodeBase base; + int state; + int type; + int regnum; + OnigOptionType option; + struct _Node* target; + AbsAddrType call_addr; + /* for multiple call reference */ + OnigDistance min_len; /* min length (byte) */ + OnigDistance max_len; /* max length (byte) */ + int char_len; /* character length */ + int opt_count; /* referenced count in optimize_node_left() */ +} EncloseNode; + +#ifdef USE_SUBEXP_CALL + +typedef struct { + int offset; + struct _Node* target; +} UnsetAddr; + +typedef struct { + int num; + int alloc; + UnsetAddr* us; +} UnsetAddrList; + +typedef struct { + NodeBase base; + int state; + int group_num; + UChar* name; + UChar* name_end; + struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ + UnsetAddrList* unset_addr_list; +} CallNode; + +#endif + +typedef struct { + NodeBase base; + int state; + int back_num; + int back_static[NODE_BACKREFS_SIZE]; + int* back_dynamic; + int nest_level; +} BRefNode; + +typedef struct { + NodeBase base; + int type; + struct _Node* target; + int char_len; +} AnchorNode; + +typedef struct { + NodeBase base; + struct _Node* car; + struct _Node* cdr; +} ConsAltNode; + +typedef struct { + NodeBase base; + int ctype; + int not; +} CtypeNode; + +typedef struct _Node { + union { + NodeBase base; + StrNode str; + CClassNode cclass; + QtfrNode qtfr; + EncloseNode enclose; + BRefNode bref; + AnchorNode anchor; + ConsAltNode cons; + CtypeNode ctype; +#ifdef USE_SUBEXP_CALL + CallNode call; +#endif + } u; +} Node; + + +#define NULL_NODE ((Node* )0) + +#define SCANENV_MEMNODES_SIZE 8 +#define SCANENV_MEM_NODES(senv) \ + (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ + (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) + +typedef struct { + OnigOptionType option; + OnigCaseFoldType case_fold_flag; + OnigEncoding enc; + OnigSyntaxType* syntax; + BitStatusType capture_history; + BitStatusType bt_mem_start; + BitStatusType bt_mem_end; + BitStatusType backrefed_mem; + UChar* pattern; + UChar* pattern_end; + UChar* error; + UChar* error_end; + regex_t* reg; /* for reg->names only */ + int num_call; +#ifdef USE_SUBEXP_CALL + UnsetAddrList* unset_addr_list; +#endif + int num_mem; +#ifdef USE_NAMED_GROUP + int num_named; +#endif + int mem_alloc; + Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; + Node** mem_nodes_dynamic; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int num_comb_exp_check; + int comb_exp_max_regnum; + int curr_max_regnum; + int has_recursion; +#endif +} ScanEnv; + + +#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) +#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) +#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) + +#ifdef USE_NAMED_GROUP +typedef struct { + int new_val; +} GroupNumRemap; + +extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); +#endif + +extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); +extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end)); +extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); +extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); +extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); +extern void onig_node_conv_to_str_node P_((Node* node, int raw)); +extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); +extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); +extern void onig_node_free P_((Node* node)); +extern Node* onig_node_new_enclose P_((int type)); +extern Node* onig_node_new_anchor P_((int type)); +extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); +extern Node* onig_node_new_list P_((Node* left, Node* right)); +extern Node* onig_node_list_add P_((Node* list, Node* x)); +extern Node* onig_node_new_alt P_((Node* left, Node* right)); +extern void onig_node_str_clear P_((Node* node)); +extern int onig_free_node_list P_((void)); +extern int onig_names_free P_((regex_t* reg)); +extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); +extern int onig_free_shared_cclass_table P_((void)); + +#ifdef ONIG_DEBUG +#ifdef USE_NAMED_GROUP +extern int onig_print_names(FILE*, regex_t*); +#endif +#endif + +#endif /* REGPARSE_H */ diff --git a/oniguruma/regposerr.c b/oniguruma/regposerr.c new file mode 100644 index 0000000..56f75ab --- /dev/null +++ b/oniguruma/regposerr.c @@ -0,0 +1,98 @@ +/********************************************************************** + regposerr.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2007 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "onigposix.h" + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif + +static char* ESTRING[] = { + NULL, + "failed to match", /* REG_NOMATCH */ + "Invalid regular expression", /* REG_BADPAT */ + "invalid collating element referenced", /* REG_ECOLLATE */ + "invalid character class type referenced", /* REG_ECTYPE */ + "bad backslash-escape sequence", /* REG_EESCAPE */ + "invalid back reference number", /* REG_ESUBREG */ + "imbalanced [ and ]", /* REG_EBRACK */ + "imbalanced ( and )", /* REG_EPAREN */ + "imbalanced { and }", /* REG_EBRACE */ + "invalid repeat range {n,m}", /* REG_BADBR */ + "invalid range", /* REG_ERANGE */ + "Out of memory", /* REG_ESPACE */ + "? * + not preceded by valid regular expression", /* REG_BADRPT */ + + /* Extended errors */ + "internal error", /* REG_EONIG_INTERNAL */ + "invalid wide char value", /* REG_EONIG_BADWC */ + "invalid argument", /* REG_EONIG_BADARG */ + "multi-thread error" /* REG_EONIG_THREAD */ +}; + +#include + + +extern size_t +regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf, + size_t size) +{ + char* s; + char tbuf[35]; + size_t len; + + if (posix_ecode > 0 + && posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) { + s = ESTRING[posix_ecode]; + } + else if (posix_ecode == 0) { + s = ""; + } + else { + sprintf(tbuf, "undefined error code (%d)", posix_ecode); + s = tbuf; + } + + len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */ + + if (buf != NULL && size > 0) { + strncpy(buf, s, size - 1); + buf[size - 1] = '\0'; + } + return len; +} diff --git a/oniguruma/regposix.c b/oniguruma/regposix.c new file mode 100644 index 0000000..7d1857c --- /dev/null +++ b/oniguruma/regposix.c @@ -0,0 +1,303 @@ +/********************************************************************** + regposix.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define regex_t onig_regex_t +#include "regint.h" +#undef regex_t +#include "onigposix.h" + +#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig)) +#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig)) + +/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */ +#define ENC_STRING_LEN(enc,s,len) do { \ + if (ONIGENC_MBC_MINLEN(enc) == 1) { \ + UChar* tmps = (UChar* )(s); \ + while (*tmps != 0) tmps++; \ + len = tmps - (UChar* )(s); \ + } \ + else { \ + len = onigenc_str_bytelen_null(enc, (UChar* )s); \ + } \ +} while(0) + +typedef struct { + int onig_err; + int posix_err; +} O2PERR; + +static int +onig2posix_error_code(int code) +{ + static const O2PERR o2p[] = { + { ONIG_MISMATCH, REG_NOMATCH }, + { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, + { ONIGERR_MEMORY, REG_ESPACE }, + { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, + { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, + { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL }, + { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL }, + { ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG }, + { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG }, + { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG }, + { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE }, + { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK }, + { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE }, + { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE }, + { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE }, + { ONIGERR_META_CODE_SYNTAX, REG_BADPAT }, + { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT }, + { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE }, + { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE }, + { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT }, + { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT }, + { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT }, + { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN }, + { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN }, + { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT }, + { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT }, + { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT }, + { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT }, + { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT }, + { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT }, + { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR }, + { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR }, + { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE }, + { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE }, + { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE }, + { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT }, + { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG }, + { ONIGERR_INVALID_BACKREF, REG_ESUBREG }, + { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT }, + { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC }, + { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC }, + { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT }, + { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT }, + { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT }, + { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT }, + { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT }, + { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT }, + { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT }, + { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, + { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT }, + { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT }, + { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG }, + { ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD } + + }; + + int i; + + if (code >= 0) return 0; + + for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) { + if (code == o2p[i].onig_err) + return o2p[i].posix_err; + } + + return REG_EONIG_INTERNAL; /* but, unknown error code */ +} + +extern int +regcomp(regex_t* reg, const char* pattern, int posix_options) +{ + int r, len; + OnigSyntaxType* syntax = OnigDefaultSyntax; + OnigOptionType options; + + if ((posix_options & REG_EXTENDED) == 0) + syntax = ONIG_SYNTAX_POSIX_BASIC; + + options = syntax->options; + if ((posix_options & REG_ICASE) != 0) + ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE); + if ((posix_options & REG_NEWLINE) != 0) { + ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE); + ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE); + } + + reg->comp_options = posix_options; + + ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len); + r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len), + options, OnigEncDefaultCharEncoding, syntax, + (OnigErrorInfo* )NULL); + if (r != ONIG_NORMAL) { + return onig2posix_error_code(r); + } + + reg->re_nsub = ONIG_C(reg)->num_mem; + return 0; +} + +extern int +regexec(regex_t* reg, const char* str, size_t nmatch, + regmatch_t pmatch[], int posix_options) +{ + int r, i, len; + UChar* end; + regmatch_t* pm; + OnigOptionType options; + + options = ONIG_OPTION_POSIX_REGION; + if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL; + if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL; + + if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) { + pm = (regmatch_t* )NULL; + nmatch = 0; + } + else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) { + pm = (regmatch_t* )xmalloc(sizeof(regmatch_t) + * (ONIG_C(reg)->num_mem + 1)); + if (pm == NULL) + return REG_ESPACE; + } + else { + pm = pmatch; + } + + ENC_STRING_LEN(ONIG_C(reg)->enc, str, len); + end = (UChar* )(str + len); + r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, + (OnigRegion* )pm, options); + + if (r >= 0) { + r = 0; /* Match */ + if (pm != pmatch && pm != NULL) { + xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch); + } + } + else if (r == ONIG_MISMATCH) { + r = REG_NOMATCH; + for (i = 0; i < (int )nmatch; i++) + pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS; + } + else { + r = onig2posix_error_code(r); + } + + if (pm != pmatch && pm != NULL) + xfree(pm); + +#if 0 + if (reg->re_nsub > nmatch - 1) + reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1); +#endif + + return r; +} + +extern void +regfree(regex_t* reg) +{ + onig_free(ONIG_C(reg)); +} + + +extern void +reg_set_encoding(int mb_code) +{ + OnigEncoding enc; + + switch (mb_code) { + case REG_POSIX_ENCODING_ASCII: + enc = ONIG_ENCODING_ASCII; + break; + case REG_POSIX_ENCODING_EUC_JP: + enc = ONIG_ENCODING_EUC_JP; + break; + case REG_POSIX_ENCODING_SJIS: + enc = ONIG_ENCODING_SJIS; + break; + case REG_POSIX_ENCODING_UTF8: + enc = ONIG_ENCODING_UTF8; + break; + case REG_POSIX_ENCODING_UTF16_BE: + enc = ONIG_ENCODING_UTF16_BE; + break; + case REG_POSIX_ENCODING_UTF16_LE: + enc = ONIG_ENCODING_UTF16_LE; + break; + + default: + return ; + break; + } + + onigenc_set_default_encoding(enc); +} + +extern int +reg_name_to_group_numbers(regex_t* reg, + const unsigned char* name, const unsigned char* name_end, int** nums) +{ + return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums); +} + +typedef struct { + int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*); + regex_t* reg; + void* arg; +} i_wrap; + +static int +i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs, + onig_regex_t* reg ARG_UNUSED, void* arg) +{ + i_wrap* warg = (i_wrap* )arg; + + return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg); +} + +extern int +reg_foreach_name(regex_t* reg, + int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), + void* arg) +{ + i_wrap warg; + + warg.func = func; + warg.reg = reg; + warg.arg = arg; + + return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg); +} + +extern int +reg_number_of_names(regex_t* reg) +{ + return onig_number_of_names(ONIG_C(reg)); +} diff --git a/oniguruma/regsyntax.c b/oniguruma/regsyntax.c new file mode 100644 index 0000000..ade5b55 --- /dev/null +++ b/oniguruma/regsyntax.c @@ -0,0 +1,315 @@ +/********************************************************************** + regsyntax.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2006 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +OnigSyntaxType OnigSyntaxASIS = { + 0 + , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE + , 0 + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +OnigSyntaxType OnigSyntaxPosixBasic = { + ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_BRACE_INTERVAL ) + , 0 + , 0 + , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +OnigSyntaxType OnigSyntaxPosixExtended = { + ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP | + ONIG_SYN_OP_BRACE_INTERVAL | + ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT ) + , 0 + , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | + ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | + ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP | + ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) + , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE ) + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +OnigSyntaxType OnigSyntaxEmacs = { + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | + ONIG_SYN_OP_ESC_BRACE_INTERVAL | + ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | + ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF | + ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS ) + , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR + , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +OnigSyntaxType OnigSyntaxGrep = { + ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | + ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_VBAR_ALT | + ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | + ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | + ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND | + ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF ) + , 0 + , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC ) + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +OnigSyntaxType OnigSyntaxGnuRegex = { + SYN_GNU_REGEX_OP + , 0 + , SYN_GNU_REGEX_BV + , ONIG_OPTION_NONE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +OnigSyntaxType OnigSyntaxJava = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | + ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | + ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP | + ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY ) + , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND ) + , ONIG_OPTION_SINGLELINE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +OnigSyntaxType OnigSyntaxPerl = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | + ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) + , SYN_GNU_REGEX_BV + , ONIG_OPTION_SINGLELINE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + +/* Perl + named group */ +OnigSyntaxType OnigSyntaxPerl_NG = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | + ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | + ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) + , ONIG_OPTION_SINGLELINE + , + { + (OnigCodePoint )'\\' /* esc */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ + , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ + } +}; + + + +extern int +onig_set_default_syntax(OnigSyntaxType* syntax) +{ + if (IS_NULL(syntax)) + syntax = ONIG_SYNTAX_RUBY; + + OnigDefaultSyntax = syntax; + return 0; +} + +extern void +onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) +{ + *to = *from; +} + +extern void +onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op) +{ + syntax->op = op; +} + +extern void +onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2) +{ + syntax->op2 = op2; +} + +extern void +onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) +{ + syntax->behavior = behavior; +} + +extern void +onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) +{ + syntax->options = options; +} + +extern unsigned int +onig_get_syntax_op(OnigSyntaxType* syntax) +{ + return syntax->op; +} + +extern unsigned int +onig_get_syntax_op2(OnigSyntaxType* syntax) +{ + return syntax->op2; +} + +extern unsigned int +onig_get_syntax_behavior(OnigSyntaxType* syntax) +{ + return syntax->behavior; +} + +extern OnigOptionType +onig_get_syntax_options(OnigSyntaxType* syntax) +{ + return syntax->options; +} + +#ifdef USE_VARIABLE_META_CHARS +extern int onig_set_meta_char(OnigSyntaxType* enc, + unsigned int what, OnigCodePoint code) +{ + switch (what) { + case ONIG_META_CHAR_ESCAPE: + enc->meta_char_table.esc = code; + break; + case ONIG_META_CHAR_ANYCHAR: + enc->meta_char_table.anychar = code; + break; + case ONIG_META_CHAR_ANYTIME: + enc->meta_char_table.anytime = code; + break; + case ONIG_META_CHAR_ZERO_OR_ONE_TIME: + enc->meta_char_table.zero_or_one_time = code; + break; + case ONIG_META_CHAR_ONE_OR_MORE_TIME: + enc->meta_char_table.one_or_more_time = code; + break; + case ONIG_META_CHAR_ANYCHAR_ANYTIME: + enc->meta_char_table.anychar_anytime = code; + break; + default: + return ONIGERR_INVALID_ARGUMENT; + break; + } + return 0; +} +#endif /* USE_VARIABLE_META_CHARS */ diff --git a/oniguruma/regtrav.c b/oniguruma/regtrav.c new file mode 100644 index 0000000..58a17f5 --- /dev/null +++ b/oniguruma/regtrav.c @@ -0,0 +1,76 @@ +/********************************************************************** + regtrav.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2004 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "regint.h" + +#ifdef USE_CAPTURE_HISTORY + +static int +capture_tree_traverse(OnigCaptureTreeNode* node, int at, + int(*callback_func)(int,int,int,int,int,void*), + int level, void* arg) +{ + int r, i; + + if (node == (OnigCaptureTreeNode* )0) + return 0; + + if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) { + r = (*callback_func)(node->group, node->beg, node->end, + level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg); + if (r != 0) return r; + } + + for (i = 0; i < node->num_childs; i++) { + r = capture_tree_traverse(node->childs[i], at, + callback_func, level + 1, arg); + if (r != 0) return r; + } + + if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) { + r = (*callback_func)(node->group, node->beg, node->end, + level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg); + if (r != 0) return r; + } + + return 0; +} +#endif /* USE_CAPTURE_HISTORY */ + +extern int +onig_capture_tree_traverse(OnigRegion* region, int at, + int(*callback_func)(int,int,int,int,int,void*), void* arg) +{ +#ifdef USE_CAPTURE_HISTORY + return capture_tree_traverse(region->history_root, at, + callback_func, 0, arg); +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} diff --git a/oniguruma/regversion.c b/oniguruma/regversion.c new file mode 100644 index 0000000..113fbae --- /dev/null +++ b/oniguruma/regversion.c @@ -0,0 +1,56 @@ +/********************************************************************** + regversion.c - Oniguruma (regular expression library) +**********************************************************************/ +/*- + * Copyright (c) 2002-2008 K.Kosako + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "oniguruma.h" +#include + +extern const char* +onig_version(void) +{ + static char s[12]; + + sprintf(s, "%d.%d.%d", + ONIGURUMA_VERSION_MAJOR, + ONIGURUMA_VERSION_MINOR, + ONIGURUMA_VERSION_TEENY); + return s; +} + +extern const char* +onig_copyright(void) +{ + static char s[58]; + + sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako", + ONIGURUMA_VERSION_MAJOR, + ONIGURUMA_VERSION_MINOR, + ONIGURUMA_VERSION_TEENY); + return s; +} diff --git a/oniguruma/sample/Makefile.am b/oniguruma/sample/Makefile.am new file mode 100644 index 0000000..af33057 --- /dev/null +++ b/oniguruma/sample/Makefile.am @@ -0,0 +1,25 @@ +noinst_PROGRAMS = encode listcap names posix simple sql syntax crnl + +libname = $(top_builddir)/libonig.la +LDADD = $(libname) +INCLUDES = -I$(top_srcdir) -I$(includedir) + +encode_SOURCES = encode.c +listcap_SOURCES = listcap.c +names_SOURCES = names.c +posix_SOURCES = posix.c +simple_SOURCES = simple.c +sql_SOURCES = sql.c +syntax_SOURCES = syntax.c + + +sampledir = $(top_builddir)/sample + +test: encode listcap names posix simple sql syntax + @$(sampledir)/encode + @$(sampledir)/listcap + @$(sampledir)/names + @$(sampledir)/posix + @$(sampledir)/simple + @$(sampledir)/sql + @$(sampledir)/syntax diff --git a/oniguruma/sample/Makefile.in b/oniguruma/sample/Makefile.in new file mode 100644 index 0000000..ec00bfd --- /dev/null +++ b/oniguruma/sample/Makefile.in @@ -0,0 +1,518 @@ +# Makefile.in generated by automake 1.10 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +noinst_PROGRAMS = encode$(EXEEXT) listcap$(EXEEXT) names$(EXEEXT) \ + posix$(EXEEXT) simple$(EXEEXT) sql$(EXEEXT) syntax$(EXEEXT) \ + crnl$(EXEEXT) +subdir = sample +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.in +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +PROGRAMS = $(noinst_PROGRAMS) +crnl_SOURCES = crnl.c +crnl_OBJECTS = crnl.$(OBJEXT) +crnl_LDADD = $(LDADD) +crnl_DEPENDENCIES = $(libname) +am_encode_OBJECTS = encode.$(OBJEXT) +encode_OBJECTS = $(am_encode_OBJECTS) +encode_LDADD = $(LDADD) +encode_DEPENDENCIES = $(libname) +am_listcap_OBJECTS = listcap.$(OBJEXT) +listcap_OBJECTS = $(am_listcap_OBJECTS) +listcap_LDADD = $(LDADD) +listcap_DEPENDENCIES = $(libname) +am_names_OBJECTS = names.$(OBJEXT) +names_OBJECTS = $(am_names_OBJECTS) +names_LDADD = $(LDADD) +names_DEPENDENCIES = $(libname) +am_posix_OBJECTS = posix.$(OBJEXT) +posix_OBJECTS = $(am_posix_OBJECTS) +posix_LDADD = $(LDADD) +posix_DEPENDENCIES = $(libname) +am_simple_OBJECTS = simple.$(OBJEXT) +simple_OBJECTS = $(am_simple_OBJECTS) +simple_LDADD = $(LDADD) +simple_DEPENDENCIES = $(libname) +am_sql_OBJECTS = sql.$(OBJEXT) +sql_OBJECTS = $(am_sql_OBJECTS) +sql_LDADD = $(LDADD) +sql_DEPENDENCIES = $(libname) +am_syntax_OBJECTS = syntax.$(OBJEXT) +syntax_OBJECTS = $(am_syntax_OBJECTS) +syntax_LDADD = $(LDADD) +syntax_DEPENDENCIES = $(libname) +DEFAULT_INCLUDES = -I. -I$(top_builddir)@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = crnl.c $(encode_SOURCES) $(listcap_SOURCES) $(names_SOURCES) \ + $(posix_SOURCES) $(simple_SOURCES) $(sql_SOURCES) \ + $(syntax_SOURCES) +DIST_SOURCES = crnl.c $(encode_SOURCES) $(listcap_SOURCES) \ + $(names_SOURCES) $(posix_SOURCES) $(simple_SOURCES) \ + $(sql_SOURCES) $(syntax_SOURCES) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GREP = @GREP@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LTVERSION = @LTVERSION@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +RUBYDIR = @RUBYDIR@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STATISTICS = @STATISTICS@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +libname = $(top_builddir)/libonig.la +LDADD = $(libname) +INCLUDES = -I$(top_srcdir) -I$(includedir) +encode_SOURCES = encode.c +listcap_SOURCES = listcap.c +names_SOURCES = names.c +posix_SOURCES = posix.c +simple_SOURCES = simple.c +sql_SOURCES = sql.c +syntax_SOURCES = syntax.c +sampledir = $(top_builddir)/sample +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sample/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --foreign sample/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +crnl$(EXEEXT): $(crnl_OBJECTS) $(crnl_DEPENDENCIES) + @rm -f crnl$(EXEEXT) + $(LINK) $(crnl_OBJECTS) $(crnl_LDADD) $(LIBS) +encode$(EXEEXT): $(encode_OBJECTS) $(encode_DEPENDENCIES) + @rm -f encode$(EXEEXT) + $(LINK) $(encode_OBJECTS) $(encode_LDADD) $(LIBS) +listcap$(EXEEXT): $(listcap_OBJECTS) $(listcap_DEPENDENCIES) + @rm -f listcap$(EXEEXT) + $(LINK) $(listcap_OBJECTS) $(listcap_LDADD) $(LIBS) +names$(EXEEXT): $(names_OBJECTS) $(names_DEPENDENCIES) + @rm -f names$(EXEEXT) + $(LINK) $(names_OBJECTS) $(names_LDADD) $(LIBS) +posix$(EXEEXT): $(posix_OBJECTS) $(posix_DEPENDENCIES) + @rm -f posix$(EXEEXT) + $(LINK) $(posix_OBJECTS) $(posix_LDADD) $(LIBS) +simple$(EXEEXT): $(simple_OBJECTS) $(simple_DEPENDENCIES) + @rm -f simple$(EXEEXT) + $(LINK) $(simple_OBJECTS) $(simple_LDADD) $(LIBS) +sql$(EXEEXT): $(sql_OBJECTS) $(sql_DEPENDENCIES) + @rm -f sql$(EXEEXT) + $(LINK) $(sql_OBJECTS) $(sql_LDADD) $(LIBS) +syntax$(EXEEXT): $(syntax_OBJECTS) $(syntax_DEPENDENCIES) + @rm -f syntax$(EXEEXT) + $(LINK) $(syntax_OBJECTS) $(syntax_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crnl.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encode.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/listcap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/names.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/posix.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/simple.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sql.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/syntax.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-exec-am: + +install-html: install-html-am + +install-info: install-info-am + +install-man: + +install-pdf: install-pdf-am + +install-ps: install-ps-am + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libtool clean-noinstPROGRAMS ctags distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags uninstall uninstall-am + + +test: encode listcap names posix simple sql syntax + @$(sampledir)/encode + @$(sampledir)/listcap + @$(sampledir)/names + @$(sampledir)/posix + @$(sampledir)/simple + @$(sampledir)/sql + @$(sampledir)/syntax +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/oniguruma/sample/crnl.c b/oniguruma/sample/crnl.c new file mode 100644 index 0000000..3f511e9 --- /dev/null +++ b/oniguruma/sample/crnl.c @@ -0,0 +1,127 @@ +/* + * crnl.c 2007/05/30 K.Kosako + * + * !!! You should enable USE_CRNL_AS_LINE_TERMINATOR. !!! + * + * USE_CRNL_AS_LINE_TERMINATOR config test program. + */ +#include +#include +#include "oniguruma.h" + +static int nfail = 0; + +static void result(int no, int from, int to, + int expected_from, int expected_to) +{ + fprintf(stderr, "%3d: ", no); + if (from == expected_from && to == expected_to) { + fprintf(stderr, "Success\n"); + } + else { + fprintf(stderr, "Fail: expected: (%d-%d), result: (%d-%d)\n", + expected_from, expected_to, from, to); + + nfail++; + } +} + +static int +x(int no, char* pattern_arg, char* str_arg, + int expected_from, int expected_to) +{ + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + UChar *pattern, *str; + + pattern = (UChar* )pattern_arg; + str = (UChar* )str_arg; + + r = onig_new(®, pattern, pattern + strlen((char* )pattern), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + region = onig_region_new(); + + end = str + strlen((char* )str); + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0 || r == ONIG_MISMATCH) { + result(no, region->beg[0], region->end[0], expected_from, expected_to); + } + else if (r == ONIG_MISMATCH) { + result(no, r, -1, expected_from, expected_to); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + return 0; +} + +static int +f(int no, char* pattern_arg, char* str_arg) +{ + return x(no, pattern_arg, str_arg, -1, -1); +} + +extern int main(int argc, char* argv[]) +{ + x( 1, "", "\r\n", 0, 0); + x( 2, ".", "\r\n", 0, 1); + f( 3, "..", "\r\n"); + x( 4, "^", "\r\n", 0, 0); + x( 5, "\\n^", "\r\nf", 1, 2); + x( 6, "\\n^a", "\r\na", 1, 3); + x( 7, "$", "\r\n", 0, 0); + x( 8, "T$", "T\r\n", 0, 1); + x( 9, "T$", "T\raT\r\n", 3, 4); + x(10, "\\z", "\r\n", 2, 2); + f(11, "a\\z", "a\r\n"); + x(12, "\\Z", "\r\n", 0, 0); + x(13, "\\Z", "\r\na", 3, 3); + x(14, "\\Z", "\r\n\r\n\n", 4, 4); + x(15, "\\Z", "\r\n\r\nX", 5, 5); + x(16, "a\\Z", "a\r\n", 0, 1); + x(17, "aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15); + x(18, "a|$", "b\r\n", 1, 1); + x(19, "$|b", "\rb", 1, 2); + x(20, "a$|ab$", "\r\nab\r\n", 2, 4); + + x(21, "a|\\Z", "b\r\n", 1, 1); + x(22, "\\Z|b", "\rb", 1, 2); + x(23, "a\\Z|ab\\Z", "\r\nab\r\n", 2, 4); + x(24, "(?=a$).", "a\r\n", 0, 1); + f(25, "(?=a$).", "a\r"); + x(26, "(?!a$)..", "a\r", 0, 2); + x(27, "(?<=a$).\\n", "a\r\n", 1, 3); + f(28, "(? 0) { + fprintf(stderr, "\n"); + fprintf(stderr, "!!! You have to enable USE_CRNL_AS_LINE_TERMINATOR\n"); + fprintf(stderr, "!!! in regenc.h for this test program.\n"); + fprintf(stderr, "\n"); + } + + return 0; +} diff --git a/oniguruma/sample/encode.c b/oniguruma/sample/encode.c new file mode 100644 index 0000000..d3f2228 --- /dev/null +++ b/oniguruma/sample/encode.c @@ -0,0 +1,298 @@ +/* + * encode.c + */ +#include +#include "oniguruma.h" + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end) +{ + int r; + unsigned char *start, *range; + OnigRegion *region; + + region = onig_region_new(); + + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stderr, "match at %d (%s)\n", r, + ONIGENC_NAME(onig_get_encoding(reg))); + for (i = 0; i < region->num_regs; i++) { + fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stderr, "search fail (%s)\n", + ONIGENC_NAME(onig_get_encoding(reg))); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r); + fprintf(stderr, "ERROR: %s\n", s); + fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return 0; +} + +static int +exec(OnigEncoding enc, OnigOptionType options, + char* apattern, char* astr) +{ + int r; + unsigned char *end; + regex_t* reg; + OnigErrorInfo einfo; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + + r = onig_new(®, pattern, + pattern + onigenc_str_bytelen_null(enc, pattern), + options, enc, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + end = str + onigenc_str_bytelen_null(enc, str); + r = search(reg, str, end); + + onig_free(reg); + onig_end(); + return 0; +} + +static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN; + +#if 0 +static void +set_case_fold(OnigCaseFoldType cf) +{ + CF = cf; +} +#endif + +static int +exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, + OnigOptionType options, char* apattern, char* astr) +{ + int r; + unsigned char *end; + regex_t* reg; + OnigCompileInfo ci; + OnigErrorInfo einfo; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + + ci.num_of_elements = 5; + ci.pattern_enc = pattern_enc; + ci.target_enc = str_enc; + ci.syntax = ONIG_SYNTAX_DEFAULT; + ci.option = options; + ci.case_fold_flag = CF; + + r = onig_new_deluxe(®, pattern, + pattern + onigenc_str_bytelen_null(pattern_enc, pattern), + &ci, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + end = str + onigenc_str_bytelen_null(str_enc, str); + r = search(reg, str, end); + + onig_free(reg); + onig_end(); + return 0; +} + +extern int main(int argc, char* argv[]) +{ + int r; + /* ISO 8859-1 test */ + static unsigned char str[] = { 0xc7, 0xd6, 0xfe, 0xea, 0xe0, 0xe2, 0x00 }; + static unsigned char pattern[] = { 0xe7, 0xf6, 0xde, '\\', 'w', '+', 0x00 }; + + r = exec(ONIG_ENCODING_CP1251, ONIG_OPTION_IGNORECASE, + "aBc", " AbC"); + + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + " [a-c\337z] ", " SS "); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + " [\330-\341] ", " SS "); + + r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE, + "\337 ", " Ss "); + r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE, + "SS ", " \337 "); + r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE, + "\\A\\S\\z", "ss"); + + r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + + r = exec(ONIG_ENCODING_ISO_8859_3, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_4, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_5, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_6, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_7, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_8, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_9, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_10, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_11, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_13, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_14, ONIG_OPTION_IGNORECASE, + "[ac]+", "bbbaAaCCC"); + r = exec(ONIG_ENCODING_ISO_8859_15, ONIG_OPTION_IGNORECASE, + (char* )pattern, (char* )str); + r = exec(ONIG_ENCODING_ISO_8859_16, ONIG_OPTION_IGNORECASE, + (char* )pattern, (char* )str); + + r = exec(ONIG_ENCODING_KOI8_R, ONIG_OPTION_NONE, "a+", "bbbaaaccc"); + r = exec(ONIG_ENCODING_EUC_TW, ONIG_OPTION_NONE, "b*a+?c+", "bbbaaaccc"); + r = exec(ONIG_ENCODING_EUC_KR, ONIG_OPTION_NONE, "a+", "bbbaaaccc"); + r = exec(ONIG_ENCODING_EUC_CN, ONIG_OPTION_NONE, "c+", "bbbaaaccc"); + r = exec(ONIG_ENCODING_BIG5, ONIG_OPTION_NONE, "a+", "bbbaaaccc"); + + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "\337", "SS"); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "SS", "\337"); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "SSb\337ssc", "a\337bSS\337cd"); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "[a\337]{0,2}", "aSS"); + r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE, + "is", "iss"); + + r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_NONE, "a+", + "\000b\000a\000a\000a\000c\000c\000\000"); + + r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_LE, + ONIG_OPTION_NONE, "a+", + "b\000a\000a\000a\000a\000c\000\000\000"); + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_LE, + ONIG_OPTION_NONE, + "\000b\000a\000a\000a\000c\000c\000\000", + "x\000b\000a\000a\000a\000c\000c\000\000\000"); + + r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + "\337", "\000S\000S\000\000"); + + r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + "SS", "\000\337\000\000"); + + r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_LE, + ONIG_OPTION_IGNORECASE, + "\337", "S\000S\000\000\000"); + + r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_BE, + ONIG_OPTION_IGNORECASE, + "SS", "\000\000\000\337\000\000\000\000"); + + r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_LE, + ONIG_OPTION_IGNORECASE, + "\337", "S\000\000\000S\000\000\000\000\000\000\000"); + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE, + "\000[\000[\000:\000a\000l\000n\000u\000m\000:\000]\000]\000+\000\000", + "\000#\002\120\000a\000Z\012\077\012\076\012\075\000\000"); + /* 0x0a3d == \012\075 : is not alnum */ + /* 0x0a3e == \012\076 : is alnum */ + + r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE, + "\000\\\000d\000+\000\000", + "\0003\0001\377\020\377\031\377\032\000\000"); + + r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_IGNORECASE, + "(Aa\\d)+", "BaA5Aa0234"); + + r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_NONE, + "^\\P{Hiragana}\\p{^Hiragana}(\\p{Hiragana}+)$", + "\060\100\060\240\060\101\060\102\060\226\060\237\000\000"); + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + "\000[\000\337\000]\000\000", "\000S\000S\000\000"); + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + "\000[\000\337\000]\000\000", "\000s\000S\000\000"); + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + "\000^\000[\000\001\000-\377\375\000]\000$\000\000", + "\000s\000S\000\000"); + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + "\000S\000S\000\000", + "\000S\000T\000\337\000\000"); + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + "\000S\000T\000S\000S\000\000", + "\000S\000t\000s\000S\000\000"); + + { + UChar pat[] = { 0x1f, 0xfc, 0x00, 0x00 }; + UChar str1[] = { 0x21, 0x26, 0x1f, 0xbe, 0x00, 0x00 }; + UChar str2[] = { 0x1f, 0xf3, 0x00, 0x00 }; + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + (char* )pat, (char* )str1); + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + (char* )pat, (char* )str2); + } + +#if 0 + /* You should define USE_UNICODE_CASE_FOLD_TURKISH_AZERI in regenc.h. */ + + set_case_fold(ONIGENC_CASE_FOLD_TURKISH_AZERI); + + r = exec_deluxe(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8, + ONIG_OPTION_IGNORECASE, + "Ii", "\304\261\304\260"); + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + "\000I\000i\000\000", "\001\061\001\060\000\000"); + + r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE, + ONIG_OPTION_IGNORECASE, + "\001\061\001\060\000\000", "\000I\000i\000\000"); + + set_case_fold(ONIGENC_CASE_FOLD_MIN); +#endif + + return 0; +} diff --git a/oniguruma/sample/listcap.c b/oniguruma/sample/listcap.c new file mode 100644 index 0000000..6a71c17 --- /dev/null +++ b/oniguruma/sample/listcap.c @@ -0,0 +1,107 @@ +/* + * listcap.c + * + * capture history (?@...) sample. + */ +#include +#include +#include "oniguruma.h" + +static int +node_callback(int group, int beg, int end, int level, int at, void* arg) +{ + int i; + + if (at != ONIG_TRAVERSE_CALLBACK_AT_FIRST) + return -1; /* error */ + + /* indent */ + for (i = 0; i < level * 2; i++) + fputc(' ', stderr); + + fprintf(stderr, "%d: (%d-%d)\n", group, beg, end); + return 0; +} + +extern int ex(unsigned char* str, unsigned char* pattern, + OnigSyntaxType* syntax) +{ + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + + r = onig_new(®, pattern, pattern + strlen((char* )pattern), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + fprintf(stderr, "number of captures: %d\n", onig_number_of_captures(reg)); + fprintf(stderr, "number of capture histories: %d\n", + onig_number_of_capture_histories(reg)); + + region = onig_region_new(); + + end = str + strlen((char* )str); + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stderr, "match at %d\n", r); + for (i = 0; i < region->num_regs; i++) { + fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + fprintf(stderr, "\n"); + + r = onig_capture_tree_traverse(region, ONIG_TRAVERSE_CALLBACK_AT_FIRST, + node_callback, (void* )0); + } + else if (r == ONIG_MISMATCH) { + fprintf(stderr, "search fail\n"); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + return 0; +} + + +extern int main(int argc, char* argv[]) +{ + int r; + OnigSyntaxType syn; + + static UChar* str1 = (UChar* )"((())())"; + static UChar* pattern1 + = (UChar* )"\\g

(?@

\\(\\g\\)){0}(?@(?:\\g

)*|){0}"; + + static UChar* str2 = (UChar* )"x00x00x00"; + static UChar* pattern2 = (UChar* )"(?@x(?@\\d+))+"; + + static UChar* str3 = (UChar* )"0123"; + static UChar* pattern3 = (UChar* )"(?@.)(?@.)(?@.)(?@.)"; + + /* enable capture hostory */ + onig_copy_syntax(&syn, ONIG_SYNTAX_DEFAULT); + onig_set_syntax_op2(&syn, + onig_get_syntax_op2(&syn) | ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY); + + r = ex(str1, pattern1, &syn); + r = ex(str2, pattern2, &syn); + r = ex(str3, pattern3, &syn); + + onig_end(); + return 0; +} diff --git a/oniguruma/sample/names.c b/oniguruma/sample/names.c new file mode 100644 index 0000000..6667eb5 --- /dev/null +++ b/oniguruma/sample/names.c @@ -0,0 +1,72 @@ +/* + * names.c -- example of group name callback. + */ +#include +#include +#include "oniguruma.h" + +static int +name_callback(const UChar* name, const UChar* name_end, + int ngroup_num, int* group_nums, + regex_t* reg, void* arg) +{ + int i, gn, ref; + char* s; + OnigRegion *region = (OnigRegion* )arg; + + for (i = 0; i < ngroup_num; i++) { + gn = group_nums[i]; + ref = onig_name_to_backref_number(reg, name, name_end, region); + s = (ref == gn ? "*" : ""); + fprintf(stderr, "%s (%d): ", name, gn); + fprintf(stderr, "(%d-%d) %s\n", region->beg[gn], region->end[gn], s); + } + return 0; /* 0: continue */ +} + +extern int main(int argc, char* argv[]) +{ + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + + static UChar* pattern = (UChar* )"(?a*)(?b*)(?c*)"; + static UChar* str = (UChar* )"aaabbbbcc"; + + r = onig_new(®, pattern, pattern + strlen((char* )pattern), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + fprintf(stderr, "number of names: %d\n", onig_number_of_names(reg)); + + region = onig_region_new(); + + end = str + strlen((char* )str); + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + fprintf(stderr, "match at %d\n\n", r); + r = onig_foreach_name(reg, name_callback, (void* )region); + } + else if (r == ONIG_MISMATCH) { + fprintf(stderr, "search fail\n"); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + onig_end(); + return 0; +} diff --git a/oniguruma/sample/posix.c b/oniguruma/sample/posix.c new file mode 100644 index 0000000..d24ee35 --- /dev/null +++ b/oniguruma/sample/posix.c @@ -0,0 +1,93 @@ +/* + * posix.c + */ +#include +#include "onigposix.h" + +typedef unsigned char UChar; + +static int x(regex_t* reg, unsigned char* pattern, unsigned char* str) +{ + int r, i; + char buf[200]; + regmatch_t pmatch[20]; + + r = regexec(reg, (char* )str, reg->re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, reg, buf, sizeof(buf)); + fprintf(stderr, "ERROR: %s\n", buf); + return -1; + } + + if (r == REG_NOMATCH) { + fprintf(stderr, "FAIL: /%s/ '%s'\n", pattern, str); + } + else { + fprintf(stderr, "OK: /%s/ '%s'\n", pattern, str); + for (i = 0; i <= (int )reg->re_nsub; i++) { + fprintf(stderr, "%d: %d-%d\n", i, pmatch[i].rm_so, pmatch[i].rm_eo); + } + } + return 0; +} + +extern int main(int argc, char* argv[]) +{ + int r; + char buf[200]; + regex_t reg; + UChar* pattern; + + /* default syntax (ONIG_SYNTAX_RUBY) */ + pattern = (UChar* )"^a+b{2,7}[c-f]?$|uuu"; + r = regcomp(®, (char* )pattern, REG_EXTENDED); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(stderr, "ERROR: %s\n", buf); + return -1; + } + x(®, pattern, (UChar* )"aaabbbbd"); + + /* POSIX Basic RE (REG_EXTENDED is not specified.) */ + pattern = (UChar* )"^a+b{2,7}[c-f]?|uuu"; + r = regcomp(®, (char* )pattern, 0); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(stderr, "ERROR: %s\n", buf); + return -1; + } + x(®, pattern, (UChar* )"a+b{2,7}d?|uuu"); + + /* POSIX Basic RE (REG_EXTENDED is not specified.) */ + pattern = (UChar* )"^a*b\\{2,7\\}\\([c-f]\\)$"; + r = regcomp(®, (char* )pattern, 0); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(stderr, "ERROR: %s\n", buf); + return -1; + } + x(®, pattern, (UChar* )"aaaabbbbbbd"); + + /* POSIX Extended RE */ + onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED); + pattern = (UChar* )"^a+b{2,7}[c-f]?)$|uuu"; + r = regcomp(®, (char* )pattern, REG_EXTENDED); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(stderr, "ERROR: %s\n", buf); + return -1; + } + x(®, pattern, (UChar* )"aaabbbbd)"); + + pattern = (UChar* )"^b."; + r = regcomp(®, (char* )pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(stderr, "ERROR: %s\n", buf); + return -1; + } + x(®, pattern, (UChar* )"a\nb\n"); + + regfree(®); + return 0; +} diff --git a/oniguruma/sample/simple.c b/oniguruma/sample/simple.c new file mode 100644 index 0000000..948a542 --- /dev/null +++ b/oniguruma/sample/simple.c @@ -0,0 +1,56 @@ +/* + * simple.c + */ +#include +#include +#include "oniguruma.h" + +extern int main(int argc, char* argv[]) +{ + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + + static UChar* pattern = (UChar* )"a(.*)b|[e-f]+"; + static UChar* str = (UChar* )"zzzzaffffffffb"; + + r = onig_new(®, pattern, pattern + strlen((char* )pattern), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + region = onig_region_new(); + + end = str + strlen((char* )str); + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stderr, "match at %d\n", r); + for (i = 0; i < region->num_regs; i++) { + fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stderr, "search fail\n"); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + onig_end(); + return 0; +} diff --git a/oniguruma/sample/sql.c b/oniguruma/sample/sql.c new file mode 100644 index 0000000..e0ce62c --- /dev/null +++ b/oniguruma/sample/sql.c @@ -0,0 +1,73 @@ +/* + * sql.c + */ +#include +#include +#include "oniguruma.h" + +extern int main(int argc, char* argv[]) +{ + static OnigSyntaxType SQLSyntax; + + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + + static UChar* pattern = (UChar* )"\\_%\\\\__zz"; + static UChar* str = (UChar* )"a_abcabcabc\\ppzz"; + + onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS); + onig_set_syntax_op2 (&SQLSyntax, 0); + onig_set_syntax_behavior(&SQLSyntax, 0); + onig_set_syntax_options (&SQLSyntax, ONIG_OPTION_MULTILINE); + onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ESCAPE, (OnigCodePoint )'\\'); + onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR, (OnigCodePoint )'_'); + onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYTIME, + ONIG_INEFFECTIVE_META_CHAR); + onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ZERO_OR_ONE_TIME, + ONIG_INEFFECTIVE_META_CHAR); + onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ONE_OR_MORE_TIME, + ONIG_INEFFECTIVE_META_CHAR); + onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR_ANYTIME, + (OnigCodePoint )'%'); + + r = onig_new(®, pattern, pattern + strlen((char* )pattern), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, &SQLSyntax, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + region = onig_region_new(); + + end = str + strlen((char* )str); + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stderr, "match at %d\n", r); + for (i = 0; i < region->num_regs; i++) { + fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stderr, "search fail\n"); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + onig_end(); + return 0; +} diff --git a/oniguruma/sample/syntax.c b/oniguruma/sample/syntax.c new file mode 100644 index 0000000..d55e4a8 --- /dev/null +++ b/oniguruma/sample/syntax.c @@ -0,0 +1,74 @@ +/* + * syntax.c + */ +#include +#include +#include "oniguruma.h" + +extern int exec(OnigSyntaxType* syntax, + char* apattern, char* astr) +{ + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + + r = onig_new(®, pattern, pattern + strlen((char* )pattern), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + region = onig_region_new(); + + end = str + strlen((char* )str); + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stderr, "match at %d\n", r); + for (i = 0; i < region->num_regs; i++) { + fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stderr, "search fail\n"); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r); + fprintf(stderr, "ERROR: %s\n", s); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + onig_end(); + return 0; +} + +extern int main(int argc, char* argv[]) +{ + int r; + + r = exec(ONIG_SYNTAX_PERL, + "\\p{XDigit}\\P{XDigit}\\p{^XDigit}\\P{^XDigit}\\p{XDigit}", + "bgh3a"); + + r = exec(ONIG_SYNTAX_JAVA, + "\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc"); + + r = exec(ONIG_SYNTAX_ASIS, + "abc def* e+ g?ddd[a-rvvv] (vv){3,7}hv\\dvv(?:aczui ss)\\W\\w$", + "abc def* e+ g?ddd[a-rvvv] (vv){3,7}hv\\dvv(?:aczui ss)\\W\\w$"); + onig_end(); + return 0; +} diff --git a/oniguruma/st.c b/oniguruma/st.c new file mode 100644 index 0000000..022880a --- /dev/null +++ b/oniguruma/st.c @@ -0,0 +1,578 @@ +/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ + +/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */ + +#include +#include +#include + +#ifdef _WIN32 +#include +#endif + +#include "regint.h" +#include "st.h" + +typedef struct st_table_entry st_table_entry; + +struct st_table_entry { + unsigned int hash; + st_data_t key; + st_data_t record; + st_table_entry *next; +}; + +#define ST_DEFAULT_MAX_DENSITY 5 +#define ST_DEFAULT_INIT_TABLE_SIZE 11 + + /* + * DEFAULT_MAX_DENSITY is the default for the largest we allow the + * average number of items per bin before increasing the number of + * bins + * + * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins + * allocated initially + * + */ + +static int numcmp(long, long); +static int numhash(long); +static struct st_hash_type type_numhash = { + numcmp, + numhash, +}; + +/* extern int strcmp(const char *, const char *); */ +static int strhash(const char *); +static struct st_hash_type type_strhash = { + strcmp, + strhash, +}; + +static void rehash(st_table *); + +#define alloc(type) (type*)xmalloc((unsigned)sizeof(type)) +#define Calloc(n,s) (char*)xcalloc((n),(s)) + +#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) + +#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) +#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) + +/* + * MINSIZE is the minimum size of a dictionary. + */ + +#define MINSIZE 8 + +/* +Table of prime numbers 2^n+a, 2<=n<=30. +*/ +static const long primes[] = { + 8 + 3, + 16 + 3, + 32 + 5, + 64 + 3, + 128 + 3, + 256 + 27, + 512 + 9, + 1024 + 9, + 2048 + 5, + 4096 + 3, + 8192 + 27, + 16384 + 43, + 32768 + 3, + 65536 + 45, + 131072 + 29, + 262144 + 3, + 524288 + 21, + 1048576 + 7, + 2097152 + 17, + 4194304 + 15, + 8388608 + 9, + 16777216 + 43, + 33554432 + 35, + 67108864 + 15, + 134217728 + 29, + 268435456 + 3, + 536870912 + 11, + 1073741824 + 85, + 0 +}; + +static int +new_size(size) + int size; +{ + int i; + +#if 0 + for (i=3; i<31; i++) { + if ((1< size) return 1< size) return primes[i]; + } + /* Ran out of polynomials */ + return -1; /* should raise exception */ +#endif +} + +#ifdef HASH_LOG +static int collision = 0; +static int init_st = 0; + +static void +stat_col() +{ + FILE *f = fopen("/tmp/col", "w"); + fprintf(f, "collision: %d\n", collision); + fclose(f); +} +#endif + +st_table* +st_init_table_with_size(type, size) + struct st_hash_type *type; + int size; +{ + st_table *tbl; + +#ifdef HASH_LOG + if (init_st == 0) { + init_st = 1; + atexit(stat_col); + } +#endif + + size = new_size(size); /* round up to prime number */ + + tbl = alloc(st_table); + tbl->type = type; + tbl->num_entries = 0; + tbl->num_bins = size; + tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); + + return tbl; +} + +st_table* +st_init_table(type) + struct st_hash_type *type; +{ + return st_init_table_with_size(type, 0); +} + +st_table* +st_init_numtable(void) +{ + return st_init_table(&type_numhash); +} + +st_table* +st_init_numtable_with_size(size) + int size; +{ + return st_init_table_with_size(&type_numhash, size); +} + +st_table* +st_init_strtable(void) +{ + return st_init_table(&type_strhash); +} + +st_table* +st_init_strtable_with_size(size) + int size; +{ + return st_init_table_with_size(&type_strhash, size); +} + +void +st_free_table(table) + st_table *table; +{ + register st_table_entry *ptr, *next; + int i; + + for(i = 0; i < table->num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { + next = ptr->next; + free(ptr); + ptr = next; + } + } + free(table->bins); + free(table); +} + +#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ +((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) + +#ifdef HASH_LOG +#define COLLISION collision++ +#else +#define COLLISION +#endif + +#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\ + bin_pos = hash_val%(table)->num_bins;\ + ptr = (table)->bins[bin_pos];\ + if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\ + COLLISION;\ + while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ + ptr = ptr->next;\ + }\ + ptr = ptr->next;\ + }\ +} while (0) + +int +st_lookup(table, key, value) + st_table *table; + register st_data_t key; + st_data_t *value; +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + return 0; + } + else { + if (value != 0) *value = ptr->record; + return 1; + } +} + +#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ +do {\ + st_table_entry *entry;\ + if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\ + rehash(table);\ + bin_pos = hash_val % table->num_bins;\ + }\ + \ + entry = alloc(st_table_entry);\ + \ + entry->hash = hash_val;\ + entry->key = key;\ + entry->record = value;\ + entry->next = table->bins[bin_pos];\ + table->bins[bin_pos] = entry;\ + table->num_entries++;\ +} while (0) + +int +st_insert(table, key, value) + register st_table *table; + register st_data_t key; + st_data_t value; +{ + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; + + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); + + if (ptr == 0) { + ADD_DIRECT(table, key, value, hash_val, bin_pos); + return 0; + } + else { + ptr->record = value; + return 1; + } +} + +void +st_add_direct(table, key, value) + st_table *table; + st_data_t key; + st_data_t value; +{ + unsigned int hash_val, bin_pos; + + hash_val = do_hash(key, table); + bin_pos = hash_val % table->num_bins; + ADD_DIRECT(table, key, value, hash_val, bin_pos); +} + +static void +rehash(table) + register st_table *table; +{ + register st_table_entry *ptr, *next, **new_bins; + int i, old_num_bins = table->num_bins, new_num_bins; + unsigned int hash_val; + + new_num_bins = new_size(old_num_bins+1); + new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); + + for(i = 0; i < old_num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { + next = ptr->next; + hash_val = ptr->hash % new_num_bins; + ptr->next = new_bins[hash_val]; + new_bins[hash_val] = ptr; + ptr = next; + } + } + free(table->bins); + table->num_bins = new_num_bins; + table->bins = new_bins; +} + +st_table* +st_copy(old_table) + st_table *old_table; +{ + st_table *new_table; + st_table_entry *ptr, *entry; + int i, num_bins = old_table->num_bins; + + new_table = alloc(st_table); + if (new_table == 0) { + return 0; + } + + *new_table = *old_table; + new_table->bins = (st_table_entry**) + Calloc((unsigned)num_bins, sizeof(st_table_entry*)); + + if (new_table->bins == 0) { + free(new_table); + return 0; + } + + for(i = 0; i < num_bins; i++) { + new_table->bins[i] = 0; + ptr = old_table->bins[i]; + while (ptr != 0) { + entry = alloc(st_table_entry); + if (entry == 0) { + free(new_table->bins); + free(new_table); + return 0; + } + *entry = *ptr; + entry->next = new_table->bins[i]; + new_table->bins[i] = entry; + ptr = ptr->next; + } + } + return new_table; +} + +int +st_delete(table, key, value) + register st_table *table; + register st_data_t *key; + st_data_t *value; +{ + unsigned int hash_val; + st_table_entry *tmp; + register st_table_entry *ptr; + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + if (ptr == 0) { + if (value != 0) *value = 0; + return 0; + } + + if (EQUAL(table, *key, ptr->key)) { + table->bins[hash_val] = ptr->next; + table->num_entries--; + if (value != 0) *value = ptr->record; + *key = ptr->key; + free(ptr); + return 1; + } + + for(; ptr->next != 0; ptr = ptr->next) { + if (EQUAL(table, ptr->next->key, *key)) { + tmp = ptr->next; + ptr->next = ptr->next->next; + table->num_entries--; + if (value != 0) *value = tmp->record; + *key = tmp->key; + free(tmp); + return 1; + } + } + + return 0; +} + +int +st_delete_safe(table, key, value, never) + register st_table *table; + register st_data_t *key; + st_data_t *value; + st_data_t never; +{ + unsigned int hash_val; + register st_table_entry *ptr; + + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; + + if (ptr == 0) { + if (value != 0) *value = 0; + return 0; + } + + for(; ptr != 0; ptr = ptr->next) { + if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { + table->num_entries--; + *key = ptr->key; + if (value != 0) *value = ptr->record; + ptr->key = ptr->record = never; + return 1; + } + } + + return 0; +} + +static int +#if defined(__GNUC__) +delete_never(st_data_t key __attribute__ ((unused)), st_data_t value, + st_data_t never) +#else +delete_never(key, value, never) + st_data_t key, value, never; +#endif +{ + if (value == never) return ST_DELETE; + return ST_CONTINUE; +} + +void +st_cleanup_safe(table, never) + st_table *table; + st_data_t never; +{ + int num_entries = table->num_entries; + + st_foreach(table, delete_never, never); + table->num_entries = num_entries; +} + +int +st_foreach(table, func, arg) + st_table *table; + int (*func)(); + st_data_t arg; +{ + st_table_entry *ptr, *last, *tmp; + enum st_retval retval; + int i; + + for(i = 0; i < table->num_bins; i++) { + last = 0; + for(ptr = table->bins[i]; ptr != 0;) { + retval = (*func)(ptr->key, ptr->record, arg); + switch (retval) { + case ST_CHECK: /* check if hash is modified during iteration */ + tmp = 0; + if (i < table->num_bins) { + for (tmp = table->bins[i]; tmp; tmp=tmp->next) { + if (tmp == ptr) break; + } + } + if (!tmp) { + /* call func with error notice */ + return 1; + } + /* fall through */ + case ST_CONTINUE: + last = ptr; + ptr = ptr->next; + break; + case ST_STOP: + return 0; + case ST_DELETE: + tmp = ptr; + if (last == 0) { + table->bins[i] = ptr->next; + } + else { + last->next = ptr->next; + } + ptr = ptr->next; + free(tmp); + table->num_entries--; + } + } + } + return 0; +} + +static int +strhash(string) + register const char *string; +{ + register int c; + +#ifdef HASH_ELFHASH + register unsigned int h = 0, g; + + while ((c = *string++) != '\0') { + h = ( h << 4 ) + c; + if ( g = h & 0xF0000000 ) + h ^= g >> 24; + h &= ~g; + } + return h; +#elif HASH_PERL + register int val = 0; + + while ((c = *string++) != '\0') { + val += c; + val += (val << 10); + val ^= (val >> 6); + } + val += (val << 3); + val ^= (val >> 11); + + return val + (val << 15); +#else + register int val = 0; + + while ((c = *string++) != '\0') { + val = val*997 + c; + } + + return val + (val>>5); +#endif +} + +static int +numcmp(x, y) + long x, y; +{ + return x != y; +} + +static int +numhash(n) + long n; +{ + return n; +} diff --git a/oniguruma/st.h b/oniguruma/st.h new file mode 100644 index 0000000..da65e7f --- /dev/null +++ b/oniguruma/st.h @@ -0,0 +1,63 @@ +/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */ + +/* @(#) st.h 5.1 89/12/14 */ + +#ifndef ST_INCLUDED + +#define ST_INCLUDED + +typedef unsigned long st_data_t; +#define ST_DATA_T_DEFINED + +typedef struct st_table st_table; + +struct st_hash_type { + int (*compare)(); + int (*hash)(); +}; + +struct st_table { + struct st_hash_type *type; + int num_bins; + int num_entries; + struct st_table_entry **bins; +}; + +#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) + +enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; + +#ifndef _ +# define _(args) args +#endif +#ifndef ANYARGS +# ifdef __cplusplus +# define ANYARGS ... +# else +# define ANYARGS +# endif +#endif + +st_table *st_init_table _((struct st_hash_type *)); +st_table *st_init_table_with_size _((struct st_hash_type *, int)); +st_table *st_init_numtable _((void)); +st_table *st_init_numtable_with_size _((int)); +st_table *st_init_strtable _((void)); +st_table *st_init_strtable_with_size _((int)); +int st_delete _((st_table *, st_data_t *, st_data_t *)); +int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); +int st_insert _((st_table *, st_data_t, st_data_t)); +int st_lookup _((st_table *, st_data_t, st_data_t *)); +int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); +void st_add_direct _((st_table *, st_data_t, st_data_t)); +void st_free_table _((st_table *)); +void st_cleanup_safe _((st_table *, st_data_t)); +st_table *st_copy _((st_table *)); + +#define ST_NUMCMP ((int (*)()) 0) +#define ST_NUMHASH ((int (*)()) -2) + +#define st_numcmp ST_NUMCMP +#define st_numhash ST_NUMHASH + +#endif /* ST_INCLUDED */ diff --git a/oniguruma/testc.c b/oniguruma/testc.c new file mode 100644 index 0000000..6a8c778 --- /dev/null +++ b/oniguruma/testc.c @@ -0,0 +1,863 @@ +/* + * This program was generated by testconv.rb. + */ +#include "config.h" +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#include + +#ifdef POSIX_TEST +#include "onigposix.h" +#else +#include "oniguruma.h" +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#define SLEN(s) strlen(s) + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +#ifndef POSIX_TEST +static OnigRegion* region; +#endif + +static void xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + +#ifdef POSIX_TEST + regex_t reg; + char buf[200]; + regmatch_t pmatch[25]; + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); + +#else + regex_t* reg; + OnigErrorInfo einfo; + + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_EUC_JP, ONIG_SYNTAX_DEFAULT, &einfo); + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)), + (UChar* )str, (UChar* )(str + SLEN(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, region->beg[mem], region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +#endif +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ + err_file = stdout; + +#ifdef POSIX_TEST + reg_set_encoding(REG_POSIX_ENCODING_EUC_JP); +#else + region = onig_region_new(); +#endif + + x2("", "", 0, 0); + x2("^", "", 0, 0); + x2("$", "", 0, 0); + x2("\\G", "", 0, 0); + x2("\\A", "", 0, 0); + x2("\\Z", "", 0, 0); + x2("\\z", "", 0, 0); + x2("^$", "", 0, 0); + x2("\\ca", "\001", 0, 1); + x2("\\C-b", "\002", 0, 1); + x2("\\c\\\\", "\034", 0, 1); + x2("q[\\c\\\\]", "q\034", 0, 2); + x2("", "a", 0, 0); + x2("a", "a", 0, 1); + x2("\\x61", "a", 0, 1); + x2("aa", "aa", 0, 2); + x2("aaa", "aaa", 0, 3); + x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); + x2("ab", "ab", 0, 2); + x2("b", "ab", 1, 2); + x2("bc", "abc", 1, 3); + x2("(?i:#RET#)", "#INS##RET#", 5, 10); + x2("\\17", "\017", 0, 1); + x2("\\x1f", "\x1f", 0, 1); + x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); + x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); + x2(".", "a", 0, 1); + n(".", ""); + x2("..", "ab", 0, 2); + x2("\\w", "e", 0, 1); + n("\\W", "e"); + x2("\\s", " ", 0, 1); + x2("\\S", "b", 0, 1); + x2("\\d", "4", 0, 1); + n("\\D", "4"); + x2("\\b", "z ", 0, 0); + x2("\\b", " z", 1, 1); + x2("\\B", "zz ", 1, 1); + x2("\\B", "z ", 2, 2); + x2("\\B", " z", 0, 0); + x2("[ab]", "b", 0, 1); + n("[ab]", "c"); + x2("[a-z]", "t", 0, 1); + n("[^a]", "a"); + x2("[^a]", "\n", 0, 1); + x2("[]]", "]", 0, 1); + n("[^]]", "]"); + x2("[\\^]+", "0^^1", 1, 3); + x2("[b-]", "b", 0, 1); + x2("[b-]", "-", 0, 1); + x2("[\\w]", "z", 0, 1); + n("[\\w]", " "); + x2("[\\W]", "b$", 1, 2); + x2("[\\d]", "5", 0, 1); + n("[\\d]", "e"); + x2("[\\D]", "t", 0, 1); + n("[\\D]", "3"); + x2("[\\s]", " ", 0, 1); + n("[\\s]", "a"); + x2("[\\S]", "b", 0, 1); + n("[\\S]", " "); + x2("[\\w\\d]", "2", 0, 1); + n("[\\w\\d]", " "); + x2("[[:upper:]]", "B", 0, 1); + x2("[*[:xdigit:]+]", "+", 0, 1); + x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); + x2("[*[:xdigit:]+]", "-@^+", 3, 4); + n("[[:upper]]", "A"); + x2("[[:upper]]", ":", 0, 1); + x2("[\\044-\\047]", "\046", 0, 1); + x2("[\\x5a-\\x5c]", "\x5b", 0, 1); + x2("[\\x6A-\\x6D]", "\x6c", 0, 1); + n("[\\x6A-\\x6D]", "\x6E"); + n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); + x2("[\\[]", "[", 0, 1); + x2("[\\]]", "]", 0, 1); + x2("[&]", "&", 0, 1); + x2("[[ab]]", "b", 0, 1); + x2("[[ab]c]", "c", 0, 1); + n("[[^a]]", "a"); + n("[^[a]]", "a"); + x2("[[ab]&&bc]", "b", 0, 1); + n("[[ab]&&bc]", "a"); + n("[[ab]&&bc]", "c"); + x2("[a-z&&b-y&&c-x]", "w", 0, 1); + n("[^a-z&&b-y&&c-x]", "w"); + x2("[[^a&&a]&&a-z]", "b", 0, 1); + n("[[^a&&a]&&a-z]", "a"); + x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); + n("[[^a-z&&bcdef]&&[^c-g]]", "c"); + x2("[^[^abc]&&[^cde]]", "c", 0, 1); + x2("[^[^abc]&&[^cde]]", "e", 0, 1); + n("[^[^abc]&&[^cde]]", "f"); + x2("[a-&&-a]", "-", 0, 1); + n("[a\\-&&\\-a]", "&"); + n("\\wabc", " abc"); + x2("a\\Wbc", "a bc", 0, 4); + x2("a.b.c", "aabbc", 0, 5); + x2(".\\wb\\W..c", "abb bcc", 0, 7); + x2("\\s\\wzzz", " zzzz", 0, 5); + x2("aa.b", "aabb", 0, 4); + n(".a", "ab"); + x2(".a", "aa", 0, 2); + x2("^a", "a", 0, 1); + x2("^a$", "a", 0, 1); + x2("^\\w$", "a", 0, 1); + n("^\\w$", " "); + x2("^\\wab$", "zab", 0, 3); + x2("^\\wabcdef$", "zabcdef", 0, 7); + x2("^\\w...def$", "zabcdef", 0, 7); + x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); + x2("\\A\\Z", "", 0, 0); + x2("\\Axyz", "xyz", 0, 3); + x2("xyz\\Z", "xyz", 0, 3); + x2("xyz\\z", "xyz", 0, 3); + x2("a\\Z", "a", 0, 1); + x2("\\Gaz", "az", 0, 2); + n("\\Gz", "bza"); + n("az\\G", "az"); + n("az\\A", "az"); + n("a\\Az", "az"); + x2("\\^\\$", "^$", 0, 2); + x2("^x?y", "xy", 0, 2); + x2("^(x?y)", "xy", 0, 2); + x2("\\w", "_", 0, 1); + n("\\W", "_"); + x2("(?=z)z", "z", 0, 1); + n("(?=z).", "a"); + x2("(?!z)a", "a", 0, 1); + n("(?!z)a", "z"); + x2("(?i:a)", "a", 0, 1); + x2("(?i:a)", "A", 0, 1); + x2("(?i:A)", "a", 0, 1); + n("(?i:A)", "b"); + x2("(?i:[A-Z])", "a", 0, 1); + x2("(?i:[f-m])", "H", 0, 1); + x2("(?i:[f-m])", "h", 0, 1); + n("(?i:[f-m])", "e"); + x2("(?i:[A-c])", "D", 0, 1); + n("(?i:[^a-z])", "A"); + n("(?i:[^a-z])", "a"); + x2("(?i:[!-k])", "Z", 0, 1); + x2("(?i:[!-k])", "7", 0, 1); + x2("(?i:[T-}])", "b", 0, 1); + x2("(?i:[T-}])", "{", 0, 1); + x2("(?i:\\?a)", "?A", 0, 2); + x2("(?i:\\*A)", "*a", 0, 2); + n(".", "\n"); + x2("(?m:.)", "\n", 0, 1); + x2("(?m:a.)", "a\n", 0, 2); + x2("(?m:.b)", "a\nb", 1, 3); + x2(".*abc", "dddabdd\nddabc", 8, 13); + x2("(?m:.*abc)", "dddabddabc", 0, 10); + n("(?i)(?-i)a", "A"); + n("(?i)(?-i:a)", "A"); + x2("a?", "", 0, 0); + x2("a?", "b", 0, 0); + x2("a?", "a", 0, 1); + x2("a*", "", 0, 0); + x2("a*", "a", 0, 1); + x2("a*", "aaa", 0, 3); + x2("a*", "baaaa", 0, 0); + n("a+", ""); + x2("a+", "a", 0, 1); + x2("a+", "aaaa", 0, 4); + x2("a+", "aabbb", 0, 2); + x2("a+", "baaaa", 1, 5); + x2(".?", "", 0, 0); + x2(".?", "f", 0, 1); + x2(".?", "\n", 0, 0); + x2(".*", "", 0, 0); + x2(".*", "abcde", 0, 5); + x2(".+", "z", 0, 1); + x2(".+", "zdswer\n", 0, 6); + x2("(.*)a\\1f", "babfbac", 0, 4); + x2("(.*)a\\1f", "bacbabf", 3, 7); + x2("((.*)a\\2f)", "bacbabf", 3, 7); + x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); + x2("a|b", "a", 0, 1); + x2("a|b", "b", 0, 1); + x2("|a", "a", 0, 0); + x2("(|a)", "a", 0, 0); + x2("ab|bc", "ab", 0, 2); + x2("ab|bc", "bc", 0, 2); + x2("z(?:ab|bc)", "zbc", 0, 3); + x2("a(?:ab|bc)c", "aabc", 0, 4); + x2("ab|(?:ac|az)", "az", 0, 2); + x2("a|b|c", "dc", 1, 2); + x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); + n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); + x2("a|^z", "ba", 1, 2); + x2("a|^z", "za", 0, 1); + x2("a|\\Gz", "bza", 2, 3); + x2("a|\\Gz", "za", 0, 1); + x2("a|\\Az", "bza", 2, 3); + x2("a|\\Az", "za", 0, 1); + x2("a|b\\Z", "ba", 1, 2); + x2("a|b\\Z", "b", 0, 1); + x2("a|b\\z", "ba", 1, 2); + x2("a|b\\z", "b", 0, 1); + x2("\\w|\\s", " ", 0, 1); + n("\\w|\\w", " "); + x2("\\w|%", "%", 0, 1); + x2("\\w|[&$]", "&", 0, 1); + x2("[b-d]|[^e-z]", "a", 0, 1); + x2("(?:a|[c-f])|bz", "dz", 0, 1); + x2("(?:a|[c-f])|bz", "bz", 0, 2); + x2("abc|(?=zz)..f", "zzf", 0, 3); + x2("abc|(?!zz)..f", "abf", 0, 3); + x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); + n("(?>a|abd)c", "abdc"); + x2("(?>abd|a)c", "abdc", 0, 4); + x2("a?|b", "a", 0, 1); + x2("a?|b", "b", 0, 0); + x2("a?|b", "", 0, 0); + x2("a*|b", "aa", 0, 2); + x2("a*|b*", "ba", 0, 0); + x2("a*|b*", "ab", 0, 1); + x2("a+|b*", "", 0, 0); + x2("a+|b*", "bbb", 0, 3); + x2("a+|b*", "abbb", 0, 1); + n("a+|b+", ""); + x2("(a|b)?", "b", 0, 1); + x2("(a|b)*", "ba", 0, 2); + x2("(a|b)+", "bab", 0, 3); + x2("(ab|ca)+", "caabbc", 0, 4); + x2("(ab|ca)+", "aabca", 1, 5); + x2("(ab|ca)+", "abzca", 0, 2); + x2("(a|bab)+", "ababa", 0, 5); + x2("(a|bab)+", "ba", 1, 2); + x2("(a|bab)+", "baaaba", 1, 4); + x2("(?:a|b)(?:a|b)", "ab", 0, 2); + x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); + x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); + x2("(?:a+|b+){2}", "aaabbb", 0, 6); + x2("h{0,}", "hhhh", 0, 4); + x2("(?:a+|b+){1,2}", "aaabbb", 0, 6); + n("ax{2}*a", "0axxxa1"); + n("a.{0,2}a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXXa0"); + x2("^a{2,}?a$", "aaa", 0, 3); + x2("^[a-z]{2,}?$", "aaa", 0, 3); + x2("(?:a+|\\Ab*)cc", "cc", 0, 2); + n("(?:a+|\\Ab*)cc", "abcc"); + x2("(?:^a+|b+)*c", "aabbbabc", 6, 8); + x2("(?:^a+|b+)*c", "aabbbbc", 0, 7); + x2("a|(?i)c", "C", 0, 1); + x2("(?i)c|a", "C", 0, 1); + x2("(?i)c|a", "A", 0, 1); + x2("(?i:c)|a", "C", 0, 1); + n("(?i:c)|a", "A"); + x2("[abc]?", "abc", 0, 1); + x2("[abc]*", "abc", 0, 3); + x2("[^abc]*", "abc", 0, 0); + n("[^abc]+", "abc"); + x2("a?\?", "aaa", 0, 0); + x2("ba?\?b", "bab", 0, 3); + x2("a*?", "aaa", 0, 0); + x2("ba*?", "baa", 0, 1); + x2("ba*?b", "baab", 0, 4); + x2("a+?", "aaa", 0, 1); + x2("ba+?", "baa", 0, 2); + x2("ba+?b", "baab", 0, 4); + x2("(?:a?)?\?", "a", 0, 0); + x2("(?:a?\?)?", "a", 0, 0); + x2("(?:a?)+?", "aaa", 0, 1); + x2("(?:a+)?\?", "aaa", 0, 0); + x2("(?:a+)?\?b", "aaab", 0, 4); + x2("(?:ab)?{2}", "", 0, 0); + x2("(?:ab)?{2}", "ababa", 0, 4); + x2("(?:ab)*{0}", "ababa", 0, 0); + x2("(?:ab){3,}", "abababab", 0, 8); + n("(?:ab){3,}", "abab"); + x2("(?:ab){2,4}", "ababab", 0, 6); + x2("(?:ab){2,4}", "ababababab", 0, 8); + x2("(?:ab){2,4}?", "ababababab", 0, 4); + x2("(?:ab){,}", "ab{,}", 0, 5); + x2("(?:abc)+?{2}", "abcabcabc", 0, 6); + x2("(?:X*)(?i:xa)", "XXXa", 0, 4); + x2("(d+)([^abc]z)", "dddz", 0, 4); + x2("([^abc]*)([^abc]z)", "dddz", 0, 4); + x2("(\\w+)(\\wz)", "dddz", 0, 4); + x3("(a)", "a", 0, 1, 1); + x3("(ab)", "ab", 0, 2, 1); + x2("((ab))", "ab", 0, 2); + x3("((ab))", "ab", 0, 2, 1); + x3("((ab))", "ab", 0, 2, 2); + x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); + x3("(ab)(cd)", "abcd", 0, 2, 1); + x3("(ab)(cd)", "abcd", 2, 4, 2); + x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); + x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); + x2("(^a)", "a", 0, 1); + x3("(a)|(a)", "ba", 1, 2, 1); + x3("(^a)|(a)", "ba", 1, 2, 2); + x3("(a?)", "aaa", 0, 1, 1); + x3("(a*)", "aaa", 0, 3, 1); + x3("(a*)", "", 0, 0, 1); + x3("(a+)", "aaaaaaa", 0, 7, 1); + x3("(a+|b*)", "bbbaa", 0, 3, 1); + x3("(a+|b?)", "bbbaa", 0, 1, 1); + x3("(abc)?", "abc", 0, 3, 1); + x3("(abc)*", "abc", 0, 3, 1); + x3("(abc)+", "abc", 0, 3, 1); + x3("(xyz|abc)+", "abc", 0, 3, 1); + x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); + x3("((?i:abc))", "AbC", 0, 3, 1); + x2("(abc)(?i:\\1)", "abcABC", 0, 6); + x3("((?m:a.c))", "a\nc", 0, 3, 1); + x3("((?=az)a)", "azb", 0, 1, 1); + x3("abc|(.abd)", "zabd", 0, 4, 1); + x2("(?:abc)|(ABC)", "abc", 0, 3); + x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); + x3("a*(.)", "aaaaz", 4, 5, 1); + x3("a*?(.)", "aaaaz", 0, 1, 1); + x3("a*?(c)", "aaaac", 4, 5, 1); + x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); + x3("(\\Abb)cc", "bbcc", 0, 2, 1); + n("(\\Abb)cc", "zbbcc"); + x3("(^bb)cc", "bbcc", 0, 2, 1); + n("(^bb)cc", "zbbcc"); + x3("cc(bb$)", "ccbb", 2, 4, 1); + n("cc(bb$)", "ccbbb"); + n("(\\1)", ""); + n("\\1(a)", "aa"); + n("(a(b)\\1)\\2+", "ababb"); + n("(?:(?:\\1|z)(a))+$", "zaa"); + x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); + x2("(a)(?=\\1)", "aa", 0, 1); + n("(a)$|\\1", "az"); + x2("(a)\\1", "aa", 0, 2); + n("(a)\\1", "ab"); + x2("(a?)\\1", "aa", 0, 2); + x2("(a?\?)\\1", "aa", 0, 0); + x2("(a*)\\1", "aaaaa", 0, 4); + x3("(a*)\\1", "aaaaa", 0, 2, 1); + x2("a(b*)\\1", "abbbb", 0, 5); + x2("a(b*)\\1", "ab", 0, 1); + x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); + x2("(a*)(b*)\\2", "aaabbbb", 0, 7); + x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); + x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); + x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); + x2("([a-d])\\1", "cc", 0, 2); + x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); + n("(\\w\\d\\s)\\1", "f5 f5"); + x2("(who|[a-c]{3})\\1", "whowho", 0, 6); + x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); + x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); + x2("(^a)\\1", "aa", 0, 2); + n("(^a)\\1", "baa"); + n("(a$)\\1", "aa"); + n("(ab\\Z)\\1", "ab"); + x2("(a*\\Z)\\1", "a", 1, 1); + x2(".(a*\\Z)\\1", "ba", 1, 2); + x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); + x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); + x2("((?i:az))\\1", "AzAz", 0, 4); + n("((?i:az))\\1", "Azaz"); + x2("(?<=a)b", "ab", 1, 2); + n("(?<=a)b", "bb"); + x2("(?<=a|b)b", "bb", 1, 2); + x2("(?<=a|bc)b", "bcb", 2, 3); + x2("(?<=a|bc)b", "ab", 1, 2); + x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); + x2("(a)\\g<1>", "aa", 0, 2); + x2("(?a)", "a", 0, 1); + x2("(?ab)\\g", "abab", 0, 4); + x2("(?.zv.)\\k", "azvbazvb", 0, 8); + x2("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); + x2("(?|a\\g)+", "", 0, 0); + x2("(?|\\(\\g\\))+$", "()(())", 0, 6); + x3("\\g(?.){0}", "X", 0, 1, 1); + x2("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); + x2("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); + x2("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); + x2("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); + x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); + x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); + x2("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); + x2("(?:(?)|(?efg))\\k", "", 0, 0); + x2("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); + n("(?:(?abc)|(?efg))\\k", "abcefg"); + x2("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); + x3("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); + x3("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); + x2("(?a|\\(\\g\\))", "a", 0, 1); + x2("(?a|\\(\\g\\))", "((((((a))))))", 0, 13); + x3("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); + x2("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); + x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); + x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); + x2("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); + x2("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); + x2("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); + x2("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); + x2("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); + x2("()*\\1", "", 0, 0); + x2("(?:()|())*\\1\\2", "", 0, 0); + x3("(?:\\1a|())*", "a", 0, 0, 1); + x2("x((.)*)*x", "0x1x2x3", 1, 6); + x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); + x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); + x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); + x2("\\xED\\xF2", "\xed\xf2", 0, 2); + x2("", "、「", 0, 0); + x2("、「", "、「", 0, 2); + n("、、", "、「"); + x2("、ヲ、ヲ", "、ヲ、ヲ", 0, 4); + x2("、「、、、ヲ", "、「、、、ヲ", 0, 6); + x2("、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ", "、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ、ウ", 0, 70); + x2("、「", "、、、「", 2, 4); + x2("、、、ヲ", "、「、、、ヲ", 2, 6); + x2("\\xca\\xb8", "\xca\xb8", 0, 2); + x2(".", "、「", 0, 2); + x2("..", "、ォ、ュ", 0, 4); + x2("\\w", "、ェ", 0, 2); + n("\\W", "、「"); + x2("[\\W]", "、ヲ$", 2, 3); + x2("\\S", "、ス", 0, 2); + x2("\\S", "エチ", 0, 2); + x2("\\b", "オ、 ", 0, 0); + x2("\\b", " 、ロ", 1, 1); + x2("\\B", "、サ、ス ", 2, 2); + x2("\\B", "、ヲ ", 3, 3); + x2("\\B", " 、、", 0, 0); + x2("[、ソ、チ]", "、チ", 0, 2); + n("[、ハ、ヒ]", "、フ"); + x2("[、ヲ-、ェ]", "、ィ", 0, 2); + n("[^、ア]", "、ア"); + x2("[\\w]", "、ヘ", 0, 2); + n("[\\d]", "、ユ"); + x2("[\\D]", "、マ", 0, 2); + n("[\\s]", "、ッ"); + x2("[\\S]", "、リ", 0, 2); + x2("[\\w\\d]", "、", 0, 2); + x2("[\\w\\d]", " 、", 3, 5); + n("\\wオエシヨ", " オエシヨ"); + x2("オエ\\Wシヨ", "オエ シヨ", 0, 5); + x2("、「.、、.、ヲ", "、「、「、、、、、ヲ", 0, 10); + x2(".\\w、ヲ\\W..、セ", "、ィ、ヲ、ヲ 、ヲ、セ、セ", 0, 13); + x2("\\s\\w、ウ、ウ、ウ", " 、ウ、ウ、ウ、ウ", 0, 9); + x2("、「、「.、ア", "、「、「、ア、ア", 0, 8); + n(".、、", "、、、ィ"); + x2(".、ェ", "、ェ、ェ", 0, 4); + x2("^、「", "、「", 0, 2); + x2("^、$", "、", 0, 2); + x2("^\\w$", "、ヒ", 0, 2); + x2("^\\w、ォ、ュ、ッ、ア、ウ$", "z、ォ、ュ、ッ、ア、ウ", 0, 11); + x2("^\\w...、ヲ、ィ、ェ$", "z、「、、、ヲ、ヲ、ィ、ェ", 0, 13); + x2("\\w\\w\\s\\W、ェ、ェ、ェ\\d", "a、ェ 、ェ、ェ、ェ4", 0, 12); + x2("\\A、ソ、チ、ト", "、ソ、チ、ト", 0, 6); + x2("、爨皃秉\Z", "、爨皃", 0, 6); + x2("、ォ、ュ、ッ\\z", "、ォ、ュ、ッ", 0, 6); + x2("、ォ、ュ、ッ\\Z", "、ォ、ュ、ッ\n", 0, 6); + x2("\\G、ン、ヤ", "、ン、ヤ", 0, 4); + n("\\G、ィ", "、ヲ、ィ、ェ"); + n("、ネ、ニ\\G", "、ネ、ニ"); + n("、゙、゚\\A", "、゙、゚"); + n("、゙\\A、゚", "、゙、゚"); + x2("(?=、サ)、サ", "、サ", 0, 2); + n("(?=、ヲ).", "、、"); + x2("(?!、ヲ)、ォ", "、ォ", 0, 2); + n("(?!、ネ)、「", "、ネ"); + x2("(?i:、「)", "、「", 0, 2); + x2("(?i:、ヨ、ル)", "、ヨ、ル", 0, 4); + n("(?i:、、)", "、ヲ"); + x2("(?m:、.)", "、鐔n", 0, 3); + x2("(?m:.、)", "、゙\n、", 2, 5); + x2("、「?", "", 0, 0); + x2("ハム?", "イス", 0, 0); + x2("ハム?", "ハム", 0, 2); + x2("ホフ*", "", 0, 0); + x2("ホフ*", "ホフ", 0, 2); + x2("サメ*", "サメサメサメ", 0, 6); + x2("ヌマ*", "シッヌマヌマヌマヌマ", 0, 0); + n("サウ+", ""); + x2("イマ+", "イマ", 0, 2); + x2("サ+", "ササササ", 0, 8); + x2("、ィ+", "、ィ、ィ、ヲ、ヲ、ヲ", 0, 4); + x2("、ヲ+", "、ェ、ヲ、ヲ、ヲ、ヲ", 2, 10); + x2(".?", "、ソ", 0, 2); + x2(".*", "、ム、ヤ、ラ、レ", 0, 8); + x2(".+", "、", 0, 2); + x2(".+", "、、、ヲ、ィ、ォ\n", 0, 8); + x2("、「|、、", "、「", 0, 2); + x2("、「|、、", "、、", 0, 2); + x2("、「、、|、、、ヲ", "、「、、", 0, 4); + x2("、「、、|、、、ヲ", "、、、ヲ", 0, 4); + x2("、(?:、ォ、ュ|、ュ、ッ)", "、、ォ、ュ", 0, 6); + x2("、(?:、ォ、ュ|、ュ、ッ)、ア", "、、ュ、ッ、ア", 0, 8); + x2("、「、、|(?:、「、ヲ|、「、)", "、「、", 0, 4); + x2("、「|、、|、ヲ", "、ィ、ヲ", 2, 4); + x2("、「|、、|、ヲ、ィ|、ェ、ォ、ュ|、ッ|、ア、ウ、オ|、キ、ケ、サ|、ス|、ソ、チ|、ト、ニ、ネ、ハ、ヒ|、フ、ヘ", "、キ、ケ、サ", 0, 6); + n("、「|、、|、ヲ、ィ|、ェ、ォ、ュ|、ッ|、ア、ウ、オ|、キ、ケ、サ|、ス|、ソ、チ|、ト、ニ、ネ、ハ、ヒ|、フ、ヘ", "、ケ、サ"); + x2("、「|^、", "、ヨ、「", 2, 4); + x2("、「|^、", "、、「", 0, 2); + x2("オエ|\\Gシヨ", "、アシヨオエ", 4, 6); + x2("オエ|\\Gシヨ", "シヨオエ", 0, 2); + x2("オエ|\\Aシヨ", "bシヨオエ", 3, 5); + x2("オエ|\\Aシヨ", "シヨ", 0, 2); + x2("オエ|シヨ\\Z", "シヨオエ", 2, 4); + x2("オエ|シヨ\\Z", "シヨ", 0, 2); + x2("オエ|シヨ\\Z", "シヨ\n", 0, 2); + x2("オエ|シヨ\\z", "シヨオエ", 2, 4); + x2("オエ|シヨ\\z", "シヨ", 0, 2); + x2("\\w|\\s", "、ェ", 0, 2); + x2("\\w|%", "%、ェ", 0, 1); + x2("\\w|[&$]", "、ヲ&", 0, 2); + x2("[、、-、ア]", "、ヲ", 0, 2); + x2("[、、-、ア]|[^、ォ-、ウ]", "、「", 0, 2); + x2("[、、-、ア]|[^、ォ-、ウ]", "、ォ", 0, 2); + x2("[^、「]", "\n", 0, 1); + x2("(?:、「|[、ヲ-、ュ])|、、、", "、ヲ、", 0, 2); + x2("(?:、「|[、ヲ-、ュ])|、、、", "、、、", 0, 4); + x2("、「、、、ヲ|(?=、ア、ア)..、ロ", "、ア、ア、ロ", 0, 6); + x2("、「、、、ヲ|(?!、ア、ア)..、ロ", "、「、、、ロ", 0, 6); + x2("(?=、、「)..、「|(?=、、)..、「", "、、、「", 0, 6); + x2("(?<=、「|、、、ヲ)、、", "、、、ヲ、、", 4, 6); + n("(?>、「|、「、、、ィ)、ヲ", "、「、、、ィ、ヲ"); + x2("(?>、「、、、ィ|、「)、ヲ", "、「、、、ィ、ヲ", 0, 8); + x2("、「?|、、", "、「", 0, 2); + x2("、「?|、、", "、、", 0, 0); + x2("、「?|、、", "", 0, 0); + x2("、「*|、、", "、「、「", 0, 4); + x2("、「*|、、*", "、、、「", 0, 0); + x2("、「*|、、*", "、「、、", 0, 2); + x2("[a、「]*|、、*", "a、「、、、、、、", 0, 3); + x2("、「+|、、*", "", 0, 0); + x2("、「+|、、*", "、、、、、、", 0, 6); + x2("、「+|、、*", "、「、、、、、、", 0, 2); + x2("、「+|、、*", "a、「、、、、、、", 0, 0); + n("、「+|、、+", ""); + x2("(、「|、、)?", "、、", 0, 2); + x2("(、「|、、)*", "、、、「", 0, 4); + x2("(、「|、、)+", "、、、「、、", 0, 6); + x2("(、「、、|、ヲ、「)+", "、ヲ、「、「、、、ヲ、ィ", 0, 8); + x2("(、「、、|、ヲ、ィ)+", "、ヲ、「、「、、、ヲ、ィ", 4, 12); + x2("(、「、、|、ヲ、「)+", "、「、「、、、ヲ、「", 2, 10); + x2("(、「、、|、ヲ、「)+", "、「、、、、ヲ、「", 0, 4); + x2("(、「、、|、ヲ、「)+", "$$zzzz、「、、、、ヲ、「", 6, 10); + x2("(、「|、、、「、、)+", "、「、、、「、、、「", 0, 10); + x2("(、「|、、、「、、)+", "、、、「", 2, 4); + x2("(、「|、、、「、、)+", "、、、「、「、「、、、「", 2, 8); + x2("(?:、「|、、)(?:、「|、、)", "、「、、", 0, 4); + x2("(?:、「*|、、*)(?:、「*|、、*)", "、「、「、「、、、、、、", 0, 6); + x2("(?:、「*|、、*)(?:、「+|、、+)", "、「、「、「、、、、、、", 0, 12); + x2("(?:、「+|、、+){2}", "、「、「、「、、、、、、", 0, 12); + x2("(?:、「+|、、+){1,2}", "、「、「、「、、、、、、", 0, 12); + x2("(?:、「+|\\A、、*)、ヲ、ヲ", "、ヲ、ヲ", 0, 4); + n("(?:、「+|\\A、、*)、ヲ、ヲ", "、「、、、ヲ、ヲ"); + x2("(?:^、「+|、、+)*、ヲ", "、「、「、、、、、、、「、、、ヲ", 12, 16); + x2("(?:^、「+|、、+)*、ヲ", "、「、「、、、、、、、、、ヲ", 0, 14); + x2("、ヲ{0,}", "、ヲ、ヲ、ヲ、ヲ", 0, 8); + x2("、「|(?i)c", "C", 0, 1); + x2("(?i)c|、「", "C", 0, 1); + x2("(?i:、「)|a", "a", 0, 1); + n("(?i:、「)|a", "A"); + x2("[、「、、、ヲ]?", "、「、、、ヲ", 0, 2); + x2("[、「、、、ヲ]*", "、「、、、ヲ", 0, 6); + x2("[^、「、、、ヲ]*", "、「、、、ヲ", 0, 0); + n("[^、「、、、ヲ]+", "、「、、、ヲ"); + x2("、「?\?", "、「、「、「", 0, 0); + x2("、、、「?\?、、", "、、、「、、", 0, 6); + x2("、「*?", "、「、「、「", 0, 0); + x2("、、、「*?", "、、、「、「", 0, 2); + x2("、、、「*?、、", "、、、「、「、、", 0, 8); + x2("、「+?", "、「、「、「", 0, 2); + x2("、、、「+?", "、、、「、「", 0, 4); + x2("、、、「+?、、", "、、、「、「、、", 0, 8); + x2("(?:ナキ?)?\?", "ナキ", 0, 0); + x2("(?:ナキ?\?)?", "ナキ", 0, 0); + x2("(?:フエ?)+?", "フエフエフエ", 0, 2); + x2("(?:ノ+)?\?", "ノノノ", 0, 0); + x2("(?:タ+)?\?チ", "タ翅翅翆", 0, 8); + x2("(?:、「、、)?{2}", "", 0, 0); + x2("(?:オエシヨ)?{2}", "オエシヨオエシヨオエ", 0, 8); + x2("(?:オエシヨ)*{0}", "オエシヨオエシヨオエ", 0, 0); + x2("(?:オエシヨ){3,}", "オエシヨオエシヨオエシヨオエシヨ", 0, 16); + n("(?:オエシヨ){3,}", "オエシヨオエシヨ"); + x2("(?:オエシヨ){2,4}", "オエシヨオエシヨオエシヨ", 0, 12); + x2("(?:オエシヨ){2,4}", "オエシヨオエシヨオエシヨオエシヨオエシヨ", 0, 16); + x2("(?:オエシヨ){2,4}?", "オエシヨオエシヨオエシヨオエシヨオエシヨ", 0, 8); + x2("(?:オエシヨ){,}", "オエシヨ{,}", 0, 7); + x2("(?:、ォ、ュ、ッ)+?{2}", "、ォ、ュ、ッ、ォ、ュ、ッ、ォ、ュ、ッ", 0, 12); + x3("(イミ)", "イミ", 0, 2, 1); + x3("(イミソ)", "イミソ", 0, 4, 1); + x2("((サエヨ))", "サエヨ", 0, 4); + x3("((ノソ))", "ノソ", 0, 4, 1); + x3("((コニ))", "コニ", 0, 4, 2); + x3("((((((((((((((((((((ホフサメ))))))))))))))))))))", "ホフサメ", 0, 4, 20); + x3("(、「、、)(、ヲ、ィ)", "、「、、、ヲ、ィ", 0, 4, 1); + x3("(、「、、)(、ヲ、ィ)", "、「、、、ヲ、ィ", 4, 8, 2); + x3("()(、「)、、、ヲ(、ィ、ェ、ォ)、ュ、ッ、ア、ウ", "、「、、、ヲ、ィ、ェ、ォ、ュ、ッ、ア、ウ", 6, 12, 3); + x3("(()(、「)、、、ヲ(、ィ、ェ、ォ)、ュ、ッ、ア、ウ)", "、「、、、ヲ、ィ、ェ、ォ、ュ、ッ、ア、ウ", 6, 12, 4); + x3(".*(・ユ・ゥ)・。ヲ・゙(・()・キ・螂ソ)・、・", "・ユ・ゥ・。ヲ・゙・・キ・螂ソ・、・", 10, 18, 2); + x2("(^、「)", "、「", 0, 2); + x3("(、「)|(、「)", "、、、「", 2, 4, 1); + x3("(^、「)|(、「)", "、、、「", 2, 4, 2); + x3("(、「?)", "、「、「、「", 0, 2, 1); + x3("(、゙*)", "、゙、゙、゙", 0, 6, 1); + x3("(、ネ*)", "", 0, 0, 1); + x3("(、+)", "、、、、、、、", 0, 14, 1); + x3("(、ユ+|、リ*)", "、ユ、ユ、ユ、リ、リ", 0, 6, 1); + x3("(、「+|、、?)", "、、、、、、、「、「", 0, 2, 1); + x3("(、「、、、ヲ)?", "、「、、、ヲ", 0, 6, 1); + x3("(、「、、、ヲ)*", "、「、、、ヲ", 0, 6, 1); + x3("(、「、、、ヲ)+", "、「、、、ヲ", 0, 6, 1); + x3("(、オ、キ、ケ|、「、、、ヲ)+", "、「、、、ヲ", 0, 6, 1); + x3("([、ハ、ヒ、フ][、ォ、ュ、ッ]|、ォ、ュ、ッ)+", "、ォ、ュ、ッ", 0, 6, 1); + x3("((?i:、「、、、ヲ))", "、「、、、ヲ", 0, 6, 1); + x3("((?m:、「.、ヲ))", "、「\n、ヲ", 0, 5, 1); + x3("((?=、「、)、「)", "、「、、、", 0, 2, 1); + x3("、「、、、ヲ|(.、「、、、ィ)", "、、「、、、ィ", 0, 8, 1); + x3("、「*(.)", "、「、「、「、「、", 8, 10, 1); + x3("、「*?(.)", "、「、「、「、「、", 0, 2, 1); + x3("、「*?(、)", "、「、「、「、「、", 8, 10, 1); + x3("[、、、ヲ、ィ]、「*(.)", "、ィ、「、「、「、「、", 10, 12, 1); + x3("(\\A、、、、)、ヲ、ヲ", "、、、、、ヲ、ヲ", 0, 4, 1); + n("(\\A、、、、)、ヲ、ヲ", "、、、、、、ヲ、ヲ"); + x3("(^、、、、)、ヲ、ヲ", "、、、、、ヲ、ヲ", 0, 4, 1); + n("(^、、、、)、ヲ、ヲ", "、、、、、、ヲ、ヲ"); + x3("、、(、、$)", "、、、、", 4, 8, 1); + n("、、(、、$)", "、、、、、"); + x2("(フオ)\\1", "フオフオ", 0, 4); + n("(フオ)\\1", "フオノ"); + x2("(カ?)\\1", "カカ", 0, 4); + x2("(カ?\?)\\1", "カカ", 0, 0); + x2("(カ*)\\1", "カカカカカ", 0, 8); + x3("(カ*)\\1", "カカカカカ", 0, 4, 1); + x2("、「(、、*)\\1", "、「、、、、、、、、", 0, 10); + x2("、「(、、*)\\1", "、「、、", 0, 2); + x2("(、「*)(、、*)\\1\\2", "、「、「、「、、、、、「、「、「、、、、", 0, 20); + x2("(、「*)(、、*)\\2", "、「、「、「、、、、、、、、", 0, 14); + x3("(、「*)(、、*)\\2", "、「、「、「、、、、、、、、", 6, 10, 2); + x2("(((((((、ン*)、レ))))))、ヤ\\7", "、ン、ン、ン、レ、ヤ、ン、ン、ン", 0, 16); + x3("(((((((、ン*)、レ))))))、ヤ\\7", "、ン、ン、ン、レ、ヤ、ン、ン、ン", 0, 6, 7); + x2("(、マ)(、メ)(、ユ)\\2\\1\\3", "、マ、メ、ユ、メ、マ、ユ", 0, 12); + x2("([、ュ-、ア])\\1", "、ッ、ッ", 0, 4); + x2("(\\w\\d\\s)\\1", "、「5 、「5 ", 0, 8); + n("(\\w\\d\\s)\\1", "、「5 、「5"); + x2("(テッ。ゥ|[、「-、ヲ]{3})\\1", "テッ。ゥテッ。ゥ", 0, 8); + x2("...(テッ。ゥ|[、「-、ヲ]{3})\\1", "、「a、「テッ。ゥテッ。ゥ", 0, 13); + x2("(テッ。ゥ|[、「-、ヲ]{3})\\1", "、ヲ、、、ヲ、ヲ、、、ヲ", 0, 12); + x2("(^、ウ)\\1", "、ウ、ウ", 0, 4); + n("(^、)\\1", "、皃爨"); + n("(、「$)\\1", "、「、「"); + n("(、「、、\\Z)\\1", "、「、、"); + x2("(、「*\\Z)\\1", "、「", 2, 2); + x2(".(、「*\\Z)\\1", "、、、「", 2, 4); + x3("(.(、荀、、)\\2)", "z、荀、、讀荀、、", 0, 13, 1); + x3("(.(..\\d.)\\2)", "、「12341234", 0, 10, 1); + x2("((?i:、「v、コ))\\1", "、「v、コ、「v、コ", 0, 10); + x2("(?<カ、ォ>ハム|\\(\\g<カ、ォ>\\))", "((((((ハム))))))", 0, 14); + x2("\\A(?:\\g<ー、_1>|\\g<アセ_2>|\\zスェホサ (?<ー、_1>エム|シォ\\g<アセ_2>シォ)(?<アセ_2>コ゚|ハサァ\\g<ー、_1>ハサァ))$", "ハサァシォハサァシォコ゚シォハサァシォハサァ", 0, 26); + x2("[[、メ、ユ]]", "、ユ", 0, 2); + x2("[[、、、ェ、ヲ]、ォ]", "、ォ", 0, 2); + n("[[^、「]]", "、「"); + n("[^[、「]]", "、「"); + x2("[^[^、「]]", "、「", 0, 2); + x2("[[、ォ、ュ、ッ]&&、ュ、ッ]", "、ッ", 0, 2); + n("[[、ォ、ュ、ッ]&&、ュ、ッ]", "、ォ"); + n("[[、ォ、ュ、ッ]&&、ュ、ッ]", "、ア"); + x2("[、「-、&&、、-、&&、ヲ-、]", "、", 0, 2); + n("[^、「-、&&、、-、&&、ヲ-、]", "、"); + x2("[[^、「&&、「]&&、「-、]", "、、", 0, 2); + n("[[^、「&&、「]&&、「-、]", "、「"); + x2("[[^、「-、&&、、、ヲ、ィ、ェ]&&[^、ヲ-、ォ]]", "、ュ", 0, 2); + n("[[^、「-、&&、、、ヲ、ィ、ェ]&&[^、ヲ-、ォ]]", "、、"); + x2("[^[^、「、、、ヲ]&&[^、ヲ、ィ、ェ]]", "、ヲ", 0, 2); + x2("[^[^、「、、、ヲ]&&[^、ヲ、ィ、ェ]]", "、ィ", 0, 2); + n("[^[^、「、、、ヲ]&&[^、ヲ、ィ、ェ]]", "、ォ"); + x2("[、「-&&-、「]", "-", 0, 1); + x2("[^[^a-z、「、、、ヲ]&&[^bcdefg、ヲ、ィ、ェ]q-w]", "、ィ", 0, 2); + x2("[^[^a-z、「、、、ヲ]&&[^bcdefg、ヲ、ィ、ェ]g-w]", "f", 0, 1); + x2("[^[^a-z、「、、、ヲ]&&[^bcdefg、ヲ、ィ、ェ]g-w]", "g", 0, 1); + n("[^[^a-z、「、、、ヲ]&&[^bcdefg、ヲ、ィ、ェ]g-w]", "2"); + x2("a・ミ。シ・ク・逾、ホ・タ・ヲ・・。シ・ノ<\\/b>", "a・ミ。シ・ク・逾、ホ・タ・ヲ・・。シ・ノ", 0, 32); + x2(".・ミ。シ・ク・逾、ホ・タ・ヲ・・。シ・ノ<\\/b>", "a・ミ。シ・ク・逾、ホ・タ・ヲ・・。シ・ノ", 0, 32); + fprintf(stdout, + "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + +#ifndef POSIX_TEST + onig_region_free(region, 1); + onig_end(); +#endif + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} diff --git a/oniguruma/testu.c b/oniguruma/testu.c new file mode 100644 index 0000000..5652988 --- /dev/null +++ b/oniguruma/testu.c @@ -0,0 +1,911 @@ +/* + * This program was generated by testconv.rb. + */ +#include + +#ifdef POSIX_TEST +#include "onigposix.h" +#else +#include "oniguruma.h" +#endif + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +#ifndef POSIX_TEST +static OnigRegion* region; +static OnigEncoding ENC; +#endif + +#define ulen(p) onigenc_str_bytelen_null(ENC, (UChar* )p) + +static void uconv(char* from, char* to, int len) +{ + int i; + unsigned char c; + char *q; + + q = to; + + for (i = 0; i < len; i += 2) { + c = (unsigned char )from[i]; + if (c == 0) { + c = (unsigned char )from[i+1]; + if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) { + sprintf(q, "\\%03o", c); + q += 4; + } + else { + sprintf(q, "%c", c); + q++; + } + } + else { + sprintf(q, "\\%03o", c); + q += 4; + c = (unsigned char )from[i+1]; + sprintf(q, "\\%03o", c); + q += 4; + } + } + + *q = 0; +} + +static void xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + char cpat[4000], cstr[4000]; + +#ifdef POSIX_TEST + regex_t reg; + char buf[200]; + regmatch_t pmatch[20]; + + uconv(pattern, cpat, ulen(pattern)); + uconv(str, cstr, ulen(str)); + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", cpat, cstr); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", cpat, cstr); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", cpat, cstr, + from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); + +#else + regex_t* reg; + OnigCompileInfo ci; + OnigErrorInfo einfo; + + uconv(pattern, cpat, ulen(pattern)); + uconv(str, cstr, ulen(str)); + +#if 0 + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + ulen(pattern)), + ONIG_OPTION_DEFAULT, ENC, ONIG_SYNTAX_DEFAULT, &einfo); +#else + ci.num_of_elements = 5; + ci.pattern_enc = ENC; + ci.target_enc = ENC; + ci.syntax = ONIG_SYNTAX_DEFAULT; + ci.option = ONIG_OPTION_DEFAULT; + ci.case_fold_flag = ONIGENC_CASE_FOLD_DEFAULT; + + r = onig_new_deluxe(®, (UChar* )pattern, + (UChar* )(pattern + ulen(pattern)), + &ci, &einfo); +#endif + + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r, &einfo); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + ulen(str)), + (UChar* )str, (UChar* )(str + ulen(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(s, r); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", cpat, cstr); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", cpat, cstr); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", cpat, cstr); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", cpat, cstr, + from, to, region->beg[mem], region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +#endif +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ + err_file = stdout; + +#ifndef POSIX_TEST + region = onig_region_new(); +#endif +#ifdef POSIX_TEST + reg_set_encoding(REG_POSIX_ENCODING_UTF16_BE); +#else + ENC = ONIG_ENCODING_UTF16_BE; +#endif + x2("\000\000", "\000\000", 0, 0); + x2("\000^\000\000", "\000\000", 0, 0); + x2("\000$\000\000", "\000\000", 0, 0); + x2("\000\134\000G\000\000", "\000\000", 0, 0); + x2("\000\134\000A\000\000", "\000\000", 0, 0); + x2("\000\134\000Z\000\000", "\000\000", 0, 0); + x2("\000\134\000z\000\000", "\000\000", 0, 0); + x2("\000^\000$\000\000", "\000\000", 0, 0); + x2("\000\134\000c\000a\000\000", "\000\001\000\000", 0, 2); + x2("\000\134\000C\000-\000b\000\000", "\000\002\000\000", 0, 2); + x2("\000\134\000c\000\134\000\134\000\000", "\000\034\000\000", 0, 2); + x2("\000q\000[\000\134\000c\000\134\000\134\000]\000\000", "\000q\000\034\000\000", 0, 4); + x2("\000\000", "\000a\000\000", 0, 0); + x2("\000a\000\000", "\000a\000\000", 0, 2); + x2("\000\134\000x\0000\0000\000\134\000x\0006\0001\000\000", "\000a\000\000", 0, 2); + x2("\000a\000a\000\000", "\000a\000a\000\000", 0, 4); + x2("\000a\000a\000a\000\000", "\000a\000a\000a\000\000", 0, 6); + x2("\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", "\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 70); + x2("\000a\000b\000\000", "\000a\000b\000\000", 0, 4); + x2("\000b\000\000", "\000a\000b\000\000", 2, 4); + x2("\000b\000c\000\000", "\000a\000b\000c\000\000", 2, 6); + x2("\000(\000?\000i\000:\000#\000R\000E\000T\000#\000)\000\000", "\000#\000I\000N\000S\000#\000#\000R\000E\000T\000#\000\000", 10, 20); + x2("\000\134\0000\0000\0000\000\134\0001\0007\000\000", "\000\017\000\000", 0, 2); + x2("\000\134\000x\0000\0000\000\134\000x\0001\000f\000\000", "\000\037\000\000", 0, 2); + x2("\000a\000(\000?\000#\000.\000.\000.\000.\000\134\000\134\000J\000J\000J\000J\000)\000b\000\000", "\000a\000b\000\000", 0, 4); + x2("\000(\000?\000x\000)\000 \000 \000G\000 \000(\000o\000 \000O\000(\000?\000-\000x\000)\000o\000O\000)\000 \000g\000 \000L\000\000", "\000G\000o\000O\000o\000O\000g\000L\000e\000\000", 0, 14); + x2("\000.\000\000", "\000a\000\000", 0, 2); + n("\000.\000\000", "\000\000"); + x2("\000.\000.\000\000", "\000a\000b\000\000", 0, 4); + x2("\000\134\000w\000\000", "\000e\000\000", 0, 2); + n("\000\134\000W\000\000", "\000e\000\000"); + x2("\000\134\000s\000\000", "\000 \000\000", 0, 2); + x2("\000\134\000S\000\000", "\000b\000\000", 0, 2); + x2("\000\134\000d\000\000", "\0004\000\000", 0, 2); + n("\000\134\000D\000\000", "\0004\000\000"); + x2("\000\134\000b\000\000", "\000z\000 \000\000", 0, 0); + x2("\000\134\000b\000\000", "\000 \000z\000\000", 2, 2); + x2("\000\134\000B\000\000", "\000z\000z\000 \000\000", 2, 2); + x2("\000\134\000B\000\000", "\000z\000 \000\000", 4, 4); + x2("\000\134\000B\000\000", "\000 \000z\000\000", 0, 0); + x2("\000[\000a\000b\000]\000\000", "\000b\000\000", 0, 2); + n("\000[\000a\000b\000]\000\000", "\000c\000\000"); + x2("\000[\000a\000-\000z\000]\000\000", "\000t\000\000", 0, 2); + n("\000[\000^\000a\000]\000\000", "\000a\000\000"); + x2("\000[\000^\000a\000]\000\000", "\000\012\000\000", 0, 2); + x2("\000[\000]\000]\000\000", "\000]\000\000", 0, 2); + n("\000[\000^\000]\000]\000\000", "\000]\000\000"); + x2("\000[\000\134\000^\000]\000+\000\000", "\0000\000^\000^\0001\000\000", 2, 6); + x2("\000[\000b\000-\000]\000\000", "\000b\000\000", 0, 2); + x2("\000[\000b\000-\000]\000\000", "\000-\000\000", 0, 2); + x2("\000[\000\134\000w\000]\000\000", "\000z\000\000", 0, 2); + n("\000[\000\134\000w\000]\000\000", "\000 \000\000"); + x2("\000[\000\134\000W\000]\000\000", "\000b\000$\000\000", 2, 4); + x2("\000[\000\134\000d\000]\000\000", "\0005\000\000", 0, 2); + n("\000[\000\134\000d\000]\000\000", "\000e\000\000"); + x2("\000[\000\134\000D\000]\000\000", "\000t\000\000", 0, 2); + n("\000[\000\134\000D\000]\000\000", "\0003\000\000"); + x2("\000[\000\134\000s\000]\000\000", "\000 \000\000", 0, 2); + n("\000[\000\134\000s\000]\000\000", "\000a\000\000"); + x2("\000[\000\134\000S\000]\000\000", "\000b\000\000", 0, 2); + n("\000[\000\134\000S\000]\000\000", "\000 \000\000"); + x2("\000[\000\134\000w\000\134\000d\000]\000\000", "\0002\000\000", 0, 2); + n("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000\000"); + x2("\000[\000[\000:\000u\000p\000p\000e\000r\000:\000]\000]\000\000", "\000B\000\000", 0, 2); + x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000+\000\000", 0, 2); + x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000G\000H\000I\000K\000K\000-\0009\000+\000*\000\000", 12, 14); + x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000-\000@\000^\000+\000\000", 6, 8); + n("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000A\000\000"); + x2("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000:\000\000", 0, 2); + x2("\000[\000\134\0000\0000\0000\000\134\0000\0004\0004\000-\000\134\0000\0000\0000\000\134\0000\0004\0007\000]\000\000", "\000&\000\000", 0, 2); + x2("\000[\000\134\000x\0000\0000\000\134\000x\0005\000a\000-\000\134\000x\0000\0000\000\134\000x\0005\000c\000]\000\000", "\000[\000\000", 0, 2); + x2("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000l\000\000", 0, 2); + n("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000n\000\000"); + n("\000^\000[\0000\000-\0009\000A\000-\000F\000]\000+\000 \0000\000+\000 \000U\000N\000D\000E\000F\000 \000\000", "\0007\0005\000F\000 \0000\0000\0000\0000\0000\0000\0000\0000\000 \000S\000E\000C\000T\0001\0004\000A\000 \000n\000o\000t\000y\000p\000e\000 \000(\000)\000 \000 \000 \000 \000E\000x\000t\000e\000r\000n\000a\000l\000 \000 \000 \000 \000|\000 \000_\000r\000b\000_\000a\000p\000p\000l\000y\000\000"); + x2("\000[\000\134\000[\000]\000\000", "\000[\000\000", 0, 2); + x2("\000[\000\134\000]\000]\000\000", "\000]\000\000", 0, 2); + x2("\000[\000&\000]\000\000", "\000&\000\000", 0, 2); + x2("\000[\000[\000a\000b\000]\000]\000\000", "\000b\000\000", 0, 2); + x2("\000[\000[\000a\000b\000]\000c\000]\000\000", "\000c\000\000", 0, 2); + n("\000[\000[\000^\000a\000]\000]\000\000", "\000a\000\000"); + n("\000[\000^\000[\000a\000]\000]\000\000", "\000a\000\000"); + x2("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000b\000\000", 0, 2); + n("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000a\000\000"); + n("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000c\000\000"); + x2("\000[\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000", 0, 2); + n("\000[\000^\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000"); + x2("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000b\000\000", 0, 2); + n("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000a\000\000"); + x2("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000h\000\000", 0, 2); + n("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000c\000\000"); + x2("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000c\000\000", 0, 2); + x2("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000e\000\000", 0, 2); + n("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000f\000\000"); + x2("\000[\000a\000-\000&\000&\000-\000a\000]\000\000", "\000-\000\000", 0, 2); + n("\000[\000a\000\134\000-\000&\000&\000\134\000-\000a\000]\000\000", "\000&\000\000"); + n("\000\134\000w\000a\000b\000c\000\000", "\000 \000a\000b\000c\000\000"); + x2("\000a\000\134\000W\000b\000c\000\000", "\000a\000 \000b\000c\000\000", 0, 8); + x2("\000a\000.\000b\000.\000c\000\000", "\000a\000a\000b\000b\000c\000\000", 0, 10); + x2("\000.\000\134\000w\000b\000\134\000W\000.\000.\000c\000\000", "\000a\000b\000b\000 \000b\000c\000c\000\000", 0, 14); + x2("\000\134\000s\000\134\000w\000z\000z\000z\000\000", "\000 \000z\000z\000z\000z\000\000", 0, 10); + x2("\000a\000a\000.\000b\000\000", "\000a\000a\000b\000b\000\000", 0, 8); + n("\000.\000a\000\000", "\000a\000b\000\000"); + x2("\000.\000a\000\000", "\000a\000a\000\000", 0, 4); + x2("\000^\000a\000\000", "\000a\000\000", 0, 2); + x2("\000^\000a\000$\000\000", "\000a\000\000", 0, 2); + x2("\000^\000\134\000w\000$\000\000", "\000a\000\000", 0, 2); + n("\000^\000\134\000w\000$\000\000", "\000 \000\000"); + x2("\000^\000\134\000w\000a\000b\000$\000\000", "\000z\000a\000b\000\000", 0, 6); + x2("\000^\000\134\000w\000a\000b\000c\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14); + x2("\000^\000\134\000w\000.\000.\000.\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14); + x2("\000\134\000w\000\134\000w\000\134\000s\000\134\000W\000a\000a\000a\000\134\000d\000\000", "\000a\000a\000 \000 \000a\000a\000a\0004\000\000", 0, 16); + x2("\000\134\000A\000\134\000Z\000\000", "\000\000", 0, 0); + x2("\000\134\000A\000x\000y\000z\000\000", "\000x\000y\000z\000\000", 0, 6); + x2("\000x\000y\000z\000\134\000Z\000\000", "\000x\000y\000z\000\000", 0, 6); + x2("\000x\000y\000z\000\134\000z\000\000", "\000x\000y\000z\000\000", 0, 6); + x2("\000a\000\134\000Z\000\000", "\000a\000\000", 0, 2); + x2("\000\134\000G\000a\000z\000\000", "\000a\000z\000\000", 0, 4); + n("\000\134\000G\000z\000\000", "\000b\000z\000a\000\000"); + n("\000a\000z\000\134\000G\000\000", "\000a\000z\000\000"); + n("\000a\000z\000\134\000A\000\000", "\000a\000z\000\000"); + n("\000a\000\134\000A\000z\000\000", "\000a\000z\000\000"); + x2("\000\134\000^\000\134\000$\000\000", "\000^\000$\000\000", 0, 4); + x2("\000^\000x\000?\000y\000\000", "\000x\000y\000\000", 0, 4); + x2("\000^\000(\000x\000?\000y\000)\000\000", "\000x\000y\000\000", 0, 4); + x2("\000\134\000w\000\000", "\000_\000\000", 0, 2); + n("\000\134\000W\000\000", "\000_\000\000"); + x2("\000(\000?\000=\000z\000)\000z\000\000", "\000z\000\000", 0, 2); + n("\000(\000?\000=\000z\000)\000.\000\000", "\000a\000\000"); + x2("\000(\000?\000!\000z\000)\000a\000\000", "\000a\000\000", 0, 2); + n("\000(\000?\000!\000z\000)\000a\000\000", "\000z\000\000"); + x2("\000(\000?\000i\000:\000a\000)\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000i\000:\000a\000)\000\000", "\000A\000\000", 0, 2); + x2("\000(\000?\000i\000:\000A\000)\000\000", "\000a\000\000", 0, 2); + n("\000(\000?\000i\000:\000A\000)\000\000", "\000b\000\000"); + x2("\000(\000?\000i\000:\000[\000A\000-\000Z\000]\000)\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000H\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000h\000\000", 0, 2); + n("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000e\000\000"); + x2("\000(\000?\000i\000:\000[\000A\000-\000c\000]\000)\000\000", "\000D\000\000", 0, 2); + n("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000A\000\000"); + n("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000a\000\000"); + x2("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\000Z\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\0007\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000b\000\000", 0, 2); + x2("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000{\000\000", 0, 2); + x2("\000(\000?\000i\000:\000\134\000?\000a\000)\000\000", "\000?\000A\000\000", 0, 4); + x2("\000(\000?\000i\000:\000\134\000*\000A\000)\000\000", "\000*\000a\000\000", 0, 4); + n("\000.\000\000", "\000\012\000\000"); + x2("\000(\000?\000m\000:\000.\000)\000\000", "\000\012\000\000", 0, 2); + x2("\000(\000?\000m\000:\000a\000.\000)\000\000", "\000a\000\012\000\000", 0, 4); + x2("\000(\000?\000m\000:\000.\000b\000)\000\000", "\000a\000\012\000b\000\000", 2, 6); + x2("\000.\000*\000a\000b\000c\000\000", "\000d\000d\000d\000a\000b\000d\000d\000\012\000d\000d\000a\000b\000c\000\000", 16, 26); + x2("\000(\000?\000m\000:\000.\000*\000a\000b\000c\000)\000\000", "\000d\000d\000d\000a\000b\000d\000d\000a\000b\000c\000\000", 0, 20); + n("\000(\000?\000i\000)\000(\000?\000-\000i\000)\000a\000\000", "\000A\000\000"); + n("\000(\000?\000i\000)\000(\000?\000-\000i\000:\000a\000)\000\000", "\000A\000\000"); + x2("\000a\000?\000\000", "\000\000", 0, 0); + x2("\000a\000?\000\000", "\000b\000\000", 0, 0); + x2("\000a\000?\000\000", "\000a\000\000", 0, 2); + x2("\000a\000*\000\000", "\000\000", 0, 0); + x2("\000a\000*\000\000", "\000a\000\000", 0, 2); + x2("\000a\000*\000\000", "\000a\000a\000a\000\000", 0, 6); + x2("\000a\000*\000\000", "\000b\000a\000a\000a\000a\000\000", 0, 0); + n("\000a\000+\000\000", "\000\000"); + x2("\000a\000+\000\000", "\000a\000\000", 0, 2); + x2("\000a\000+\000\000", "\000a\000a\000a\000a\000\000", 0, 8); + x2("\000a\000+\000\000", "\000a\000a\000b\000b\000b\000\000", 0, 4); + x2("\000a\000+\000\000", "\000b\000a\000a\000a\000a\000\000", 2, 10); + x2("\000.\000?\000\000", "\000\000", 0, 0); + x2("\000.\000?\000\000", "\000f\000\000", 0, 2); + x2("\000.\000?\000\000", "\000\012\000\000", 0, 0); + x2("\000.\000*\000\000", "\000\000", 0, 0); + x2("\000.\000*\000\000", "\000a\000b\000c\000d\000e\000\000", 0, 10); + x2("\000.\000+\000\000", "\000z\000\000", 0, 2); + x2("\000.\000+\000\000", "\000z\000d\000s\000w\000e\000r\000\012\000\000", 0, 12); + x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000b\000f\000b\000a\000c\000\000", 0, 8); + x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14); + x2("\000(\000(\000.\000*\000)\000a\000\134\0002\000f\000)\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14); + x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000z\000z\000z\000z\000z\000z\000\012\000b\000a\000z\000z\000\012\000z\000z\000z\000z\000b\000a\000b\000f\000\000", 38, 46); + x2("\000a\000|\000b\000\000", "\000a\000\000", 0, 2); + x2("\000a\000|\000b\000\000", "\000b\000\000", 0, 2); + x2("\000|\000a\000\000", "\000a\000\000", 0, 0); + x2("\000(\000|\000a\000)\000\000", "\000a\000\000", 0, 0); + x2("\000a\000b\000|\000b\000c\000\000", "\000a\000b\000\000", 0, 4); + x2("\000a\000b\000|\000b\000c\000\000", "\000b\000c\000\000", 0, 4); + x2("\000z\000(\000?\000:\000a\000b\000|\000b\000c\000)\000\000", "\000z\000b\000c\000\000", 0, 6); + x2("\000a\000(\000?\000:\000a\000b\000|\000b\000c\000)\000c\000\000", "\000a\000a\000b\000c\000\000", 0, 8); + x2("\000a\000b\000|\000(\000?\000:\000a\000c\000|\000a\000z\000)\000\000", "\000a\000z\000\000", 0, 4); + x2("\000a\000|\000b\000|\000c\000\000", "\000d\000c\000\000", 2, 4); + x2("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000p\000q\000r\000\000", 0, 4); + n("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000m\000n\000\000"); + x2("\000a\000|\000^\000z\000\000", "\000b\000a\000\000", 2, 4); + x2("\000a\000|\000^\000z\000\000", "\000z\000a\000\000", 0, 2); + x2("\000a\000|\000\134\000G\000z\000\000", "\000b\000z\000a\000\000", 4, 6); + x2("\000a\000|\000\134\000G\000z\000\000", "\000z\000a\000\000", 0, 2); + x2("\000a\000|\000\134\000A\000z\000\000", "\000b\000z\000a\000\000", 4, 6); + x2("\000a\000|\000\134\000A\000z\000\000", "\000z\000a\000\000", 0, 2); + x2("\000a\000|\000b\000\134\000Z\000\000", "\000b\000a\000\000", 2, 4); + x2("\000a\000|\000b\000\134\000Z\000\000", "\000b\000\000", 0, 2); + x2("\000a\000|\000b\000\134\000z\000\000", "\000b\000a\000\000", 2, 4); + x2("\000a\000|\000b\000\134\000z\000\000", "\000b\000\000", 0, 2); + x2("\000\134\000w\000|\000\134\000s\000\000", "\000 \000\000", 0, 2); + n("\000\134\000w\000|\000\134\000w\000\000", "\000 \000\000"); + x2("\000\134\000w\000|\000%\000\000", "\000%\000\000", 0, 2); + x2("\000\134\000w\000|\000[\000&\000$\000]\000\000", "\000&\000\000", 0, 2); + x2("\000[\000b\000-\000d\000]\000|\000[\000^\000e\000-\000z\000]\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000d\000z\000\000", 0, 2); + x2("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000b\000z\000\000", 0, 4); + x2("\000a\000b\000c\000|\000(\000?\000=\000z\000z\000)\000.\000.\000f\000\000", "\000z\000z\000f\000\000", 0, 6); + x2("\000a\000b\000c\000|\000(\000?\000!\000z\000z\000)\000.\000.\000f\000\000", "\000a\000b\000f\000\000", 0, 6); + x2("\000(\000?\000=\000z\000a\000)\000.\000.\000a\000|\000(\000?\000=\000z\000z\000)\000.\000.\000a\000\000", "\000z\000z\000a\000\000", 0, 6); + n("\000(\000?\000>\000a\000|\000a\000b\000d\000)\000c\000\000", "\000a\000b\000d\000c\000\000"); + x2("\000(\000?\000>\000a\000b\000d\000|\000a\000)\000c\000\000", "\000a\000b\000d\000c\000\000", 0, 8); + x2("\000a\000?\000|\000b\000\000", "\000a\000\000", 0, 2); + x2("\000a\000?\000|\000b\000\000", "\000b\000\000", 0, 0); + x2("\000a\000?\000|\000b\000\000", "\000\000", 0, 0); + x2("\000a\000*\000|\000b\000\000", "\000a\000a\000\000", 0, 4); + x2("\000a\000*\000|\000b\000*\000\000", "\000b\000a\000\000", 0, 0); + x2("\000a\000*\000|\000b\000*\000\000", "\000a\000b\000\000", 0, 2); + x2("\000a\000+\000|\000b\000*\000\000", "\000\000", 0, 0); + x2("\000a\000+\000|\000b\000*\000\000", "\000b\000b\000b\000\000", 0, 6); + x2("\000a\000+\000|\000b\000*\000\000", "\000a\000b\000b\000b\000\000", 0, 2); + n("\000a\000+\000|\000b\000+\000\000", "\000\000"); + x2("\000(\000a\000|\000b\000)\000?\000\000", "\000b\000\000", 0, 2); + x2("\000(\000a\000|\000b\000)\000*\000\000", "\000b\000a\000\000", 0, 4); + x2("\000(\000a\000|\000b\000)\000+\000\000", "\000b\000a\000b\000\000", 0, 6); + x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000c\000a\000a\000b\000b\000c\000\000", 0, 8); + x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000a\000b\000c\000a\000\000", 2, 10); + x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000b\000z\000c\000a\000\000", 0, 4); + x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 10); + x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000\000", 2, 4); + x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000a\000a\000b\000a\000\000", 2, 8); + x2("\000(\000?\000:\000a\000|\000b\000)\000(\000?\000:\000a\000|\000b\000)\000\000", "\000a\000b\000\000", 0, 4); + x2("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000*\000|\000b\000*\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 6); + x2("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000+\000|\000b\000+\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); + x2("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); + x2("\000h\000{\0000\000,\000}\000\000", "\000h\000h\000h\000h\000\000", 0, 8); + x2("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0001\000,\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); + n("\000a\000x\000{\0002\000}\000*\000a\000\000", "\0000\000a\000x\000x\000x\000a\0001\000\000"); + n("\000a\000.\000{\0000\000,\0002\000}\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000"); + n("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000"); + n("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000X\000a\0000\000\000"); + x2("\000^\000a\000{\0002\000,\000}\000?\000a\000$\000\000", "\000a\000a\000a\000\000", 0, 6); + x2("\000^\000[\000a\000-\000z\000]\000{\0002\000,\000}\000?\000$\000\000", "\000a\000a\000a\000\000", 0, 6); + x2("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000c\000c\000\000", 0, 4); + n("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000a\000b\000c\000c\000\000"); + x2("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000a\000b\000c\000\000", 12, 16); + x2("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000b\000c\000\000", 0, 14); + x2("\000a\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2); + x2("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000C\000\000", 0, 2); + x2("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000A\000\000", 0, 2); + x2("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000C\000\000", 0, 2); + n("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000A\000\000"); + x2("\000[\000a\000b\000c\000]\000?\000\000", "\000a\000b\000c\000\000", 0, 2); + x2("\000[\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 6); + x2("\000[\000^\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 0); + n("\000[\000^\000a\000b\000c\000]\000+\000\000", "\000a\000b\000c\000\000"); + x2("\000a\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0); + x2("\000b\000a\000?\000?\000b\000\000", "\000b\000a\000b\000\000", 0, 6); + x2("\000a\000*\000?\000\000", "\000a\000a\000a\000\000", 0, 0); + x2("\000b\000a\000*\000?\000\000", "\000b\000a\000a\000\000", 0, 2); + x2("\000b\000a\000*\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8); + x2("\000a\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2); + x2("\000b\000a\000+\000?\000\000", "\000b\000a\000a\000\000", 0, 4); + x2("\000b\000a\000+\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8); + x2("\000(\000?\000:\000a\000?\000)\000?\000?\000\000", "\000a\000\000", 0, 0); + x2("\000(\000?\000:\000a\000?\000?\000)\000?\000\000", "\000a\000\000", 0, 0); + x2("\000(\000?\000:\000a\000?\000)\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2); + x2("\000(\000?\000:\000a\000+\000)\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0); + x2("\000(\000?\000:\000a\000+\000)\000?\000?\000b\000\000", "\000a\000a\000a\000b\000\000", 0, 8); + x2("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 8); + x2("\000(\000?\000:\000a\000b\000)\000*\000{\0000\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 0); + x2("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16); + n("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000\000"); + x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000\000", 0, 12); + x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16); + x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000?\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 8); + x2("\000(\000?\000:\000a\000b\000)\000{\000,\000}\000\000", "\000a\000b\000{\000,\000}\000\000", 0, 10); + x2("\000(\000?\000:\000a\000b\000c\000)\000+\000?\000{\0002\000}\000\000", "\000a\000b\000c\000a\000b\000c\000a\000b\000c\000\000", 0, 12); + x2("\000(\000?\000:\000X\000*\000)\000(\000?\000i\000:\000x\000a\000)\000\000", "\000X\000X\000X\000a\000\000", 0, 8); + x2("\000(\000d\000+\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); + x2("\000(\000[\000^\000a\000b\000c\000]\000*\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); + x2("\000(\000\134\000w\000+\000)\000(\000\134\000w\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); + x3("\000(\000a\000)\000\000", "\000a\000\000", 0, 2, 1); + x3("\000(\000a\000b\000)\000\000", "\000a\000b\000\000", 0, 4, 1); + x2("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4); + x3("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 1); + x3("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 2); + x3("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000a\000b\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 20); + x3("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 0, 4, 1); + x3("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 4, 8, 2); + x3("\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 3); + x3("\000(\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000)\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 4); + x2("\000(\000^\000a\000)\000\000", "\000a\000\000", 0, 2); + x3("\000(\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 1); + x3("\000(\000^\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 2); + x3("\000(\000a\000?\000)\000\000", "\000a\000a\000a\000\000", 0, 2, 1); + x3("\000(\000a\000*\000)\000\000", "\000a\000a\000a\000\000", 0, 6, 1); + x3("\000(\000a\000*\000)\000\000", "\000\000", 0, 0, 1); + x3("\000(\000a\000+\000)\000\000", "\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 14, 1); + x3("\000(\000a\000+\000|\000b\000*\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 6, 1); + x3("\000(\000a\000+\000|\000b\000?\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 2, 1); + x3("\000(\000a\000b\000c\000)\000?\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000a\000b\000c\000)\000*\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000x\000y\000z\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000[\000x\000y\000z\000]\000[\000a\000b\000c\000]\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); + x3("\000(\000(\000?\000i\000:\000a\000b\000c\000)\000)\000\000", "\000A\000b\000C\000\000", 0, 6, 1); + x2("\000(\000a\000b\000c\000)\000(\000?\000i\000:\000\134\0001\000)\000\000", "\000a\000b\000c\000A\000B\000C\000\000", 0, 12); + x3("\000(\000(\000?\000m\000:\000a\000.\000c\000)\000)\000\000", "\000a\000\012\000c\000\000", 0, 6, 1); + x3("\000(\000(\000?\000=\000a\000z\000)\000a\000)\000\000", "\000a\000z\000b\000\000", 0, 2, 1); + x3("\000a\000b\000c\000|\000(\000.\000a\000b\000d\000)\000\000", "\000z\000a\000b\000d\000\000", 0, 8, 1); + x2("\000(\000?\000:\000a\000b\000c\000)\000|\000(\000A\000B\000C\000)\000\000", "\000a\000b\000c\000\000", 0, 6); + x3("\000(\000?\000i\000:\000(\000a\000b\000c\000)\000)\000|\000(\000z\000z\000z\000)\000\000", "\000A\000B\000C\000\000", 0, 6, 1); + x3("\000a\000*\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 8, 10, 1); + x3("\000a\000*\000?\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 0, 2, 1); + x3("\000a\000*\000?\000(\000c\000)\000\000", "\000a\000a\000a\000a\000c\000\000", 8, 10, 1); + x3("\000[\000b\000c\000d\000]\000a\000*\000(\000.\000)\000\000", "\000c\000a\000a\000a\000a\000z\000\000", 10, 12, 1); + x3("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1); + n("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000"); + x3("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1); + n("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000"); + x3("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000\000", 4, 8, 1); + n("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000b\000\000"); + n("\000(\000\134\0001\000)\000\000", "\000\000"); + n("\000\134\0001\000(\000a\000)\000\000", "\000a\000a\000\000"); + n("\000(\000a\000(\000b\000)\000\134\0001\000)\000\134\0002\000+\000\000", "\000a\000b\000a\000b\000b\000\000"); + n("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000\000"); + x2("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000a\000\000", 0, 8); + x2("\000(\000a\000)\000(\000?\000=\000\134\0001\000)\000\000", "\000a\000a\000\000", 0, 2); + n("\000(\000a\000)\000$\000|\000\134\0001\000\000", "\000a\000z\000\000"); + x2("\000(\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); + n("\000(\000a\000)\000\134\0001\000\000", "\000a\000b\000\000"); + x2("\000(\000a\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); + x2("\000(\000a\000?\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 0); + x2("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 8); + x3("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 4, 1); + x2("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000b\000b\000b\000\000", 0, 10); + x2("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000\000", 0, 2); + x2("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0001\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000a\000a\000a\000b\000b\000\000", 0, 20); + x2("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000b\000b\000\000", 0, 14); + x2("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 16); + x3("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 6, 7); + x2("\000(\000a\000)\000(\000b\000)\000(\000c\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "\000a\000b\000c\000b\000a\000c\000\000", 0, 12); + x2("\000(\000[\000a\000-\000d\000]\000)\000\134\0001\000\000", "\000c\000c\000\000", 0, 4); + x2("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000 \000\000", 0, 12); + n("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000\000"); + x2("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000w\000h\000o\000w\000h\000o\000\000", 0, 12); + x2("\000.\000.\000.\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000a\000b\000c\000w\000h\000o\000w\000h\000o\000\000", 0, 18); + x2("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000c\000b\000c\000c\000b\000c\000\000", 0, 12); + x2("\000(\000^\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); + n("\000(\000^\000a\000)\000\134\0001\000\000", "\000b\000a\000a\000\000"); + n("\000(\000a\000$\000)\000\134\0001\000\000", "\000a\000a\000\000"); + n("\000(\000a\000b\000\134\000Z\000)\000\134\0001\000\000", "\000a\000b\000\000"); + x2("\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000a\000\000", 2, 2); + x2("\000.\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000b\000a\000\000", 2, 4); + x3("\000(\000.\000(\000a\000b\000c\000)\000\134\0002\000)\000\000", "\000z\000a\000b\000c\000a\000b\000c\000\000", 0, 14, 1); + x3("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "\000z\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1); + x2("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000A\000z\000\000", 0, 8); + n("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000a\000z\000\000"); + x2("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000a\000b\000\000", 2, 4); + n("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000b\000b\000\000"); + x2("\000(\000?\000<\000=\000a\000|\000b\000)\000b\000\000", "\000b\000b\000\000", 2, 4); + x2("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000b\000c\000b\000\000", 4, 6); + x2("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000a\000b\000\000", 2, 4); + x2("\000(\000?\000<\000=\000a\000|\000b\000c\000|\000|\000d\000e\000f\000g\000h\000i\000j\000|\000k\000l\000m\000n\000o\000p\000q\000|\000r\000)\000z\000\000", "\000r\000z\000\000", 2, 4); + x2("\000(\000a\000)\000\134\000g\000<\0001\000>\000\000", "\000a\000a\000\000", 0, 4); + x2("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000c\000b\000\000", 2, 4); + n("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000a\000b\000\000"); + x2("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000b\000\000", "\000b\000b\000b\000\000", 0, 2); + n("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000z\000\000", "\000b\000c\000z\000\000"); + x2("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000a\000)\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000<\000n\000a\000m\000e\000_\0002\000>\000a\000b\000)\000\134\000g\000<\000n\000a\000m\000e\000_\0002\000>\000\000", "\000a\000b\000a\000b\000\000", 0, 8); + x2("\000(\000?\000<\000n\000a\000m\000e\000_\0003\000>\000.\000z\000v\000.\000)\000\134\000k\000<\000n\000a\000m\000e\000_\0003\000>\000\000", "\000a\000z\000v\000b\000a\000z\000v\000b\000\000", 0, 16); + x2("\000(\000?\000<\000=\000\134\000g\000<\000a\000b\000>\000)\000|\000-\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000a\000b\000>\000X\000y\000Z\000)\000\000", "\000X\000y\000Z\000\000", 6, 6); + x2("\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000n\000>\000)\000+\000\000", "\000\000", 0, 0); + x2("\000(\000?\000<\000n\000>\000|\000\134\000(\000\134\000g\000<\000n\000>\000\134\000)\000)\000+\000$\000\000", "\000(\000)\000(\000(\000)\000)\000\000", 0, 12); + x3("\000\134\000g\000<\000n\000>\000(\000?\000<\000n\000>\000.\000)\000{\0000\000}\000\000", "\000X\000\000", 0, 2, 1); + x2("\000\134\000g\000<\000n\000>\000(\000a\000b\000c\000|\000d\000f\000(\000?\000<\000n\000>\000.\000Y\000Z\000)\000{\0002\000,\0008\000}\000)\000{\0000\000}\000\000", "\000X\000Y\000Z\000\000", 0, 6); + x2("\000\134\000A\000(\000?\000<\000n\000>\000(\000a\000\134\000g\000<\000n\000>\000)\000|\000)\000\134\000z\000\000", "\000a\000a\000a\000a\000\000", 0, 8); + x2("\000(\000?\000<\000n\000>\000|\000\134\000g\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000a\000|\000(\000b\000)\000\134\000g\000<\000m\000>\000)\000\000", "\000b\000b\000b\000b\000a\000b\000b\000a\000\000", 0, 16); + x2("\000(\000?\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\134\000w\000+\000\134\000s\000x\000)\000a\000+\000\134\000k\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\000", "\000 \000 \000f\000g\000 \000x\000a\000a\000a\000a\000a\000a\000a\000a\000f\000g\000 \000x\000\000", 4, 36); + x3("\000(\000z\000)\000(\000)\000(\000)\000(\000?\000<\000_\0009\000>\000a\000)\000\134\000g\000<\000_\0009\000>\000\000", "\000z\000a\000a\000\000", 4, 6, 1); + x2("\000(\000.\000)\000(\000(\000(\000?\000<\000_\000>\000a\000)\000)\000)\000\134\000k\000<\000_\000>\000\000", "\000z\000a\000a\000\000", 0, 6); + x2("\000(\000(\000?\000<\000n\000a\000m\000e\0001\000>\000\134\000d\000)\000|\000(\000?\000<\000n\000a\000m\000e\0002\000>\000\134\000w\000)\000)\000(\000\134\000k\000<\000n\000a\000m\000e\0001\000>\000|\000\134\000k\000<\000n\000a\000m\000e\0002\000>\000)\000\000", "\000f\000f\000\000", 0, 4); + x2("\000(\000?\000:\000(\000?\000<\000x\000>\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000e\000f\000g\000\000", 6, 18); + n("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000\000"); + x2("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000a\000-\000p\000y\000u\000m\000p\000y\000u\000m\000\000", 4, 20); + x3("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000x\000x\000x\000x\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000\000", 8, 36, 14); + x3("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0006\000>\000)\000(\000?\000<\000n\000a\000m\000e\0007\000>\000)\000(\000?\000<\000n\000a\000m\000e\0008\000>\000)\000(\000?\000<\000n\000a\000m\000e\0009\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0000\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0006\000>\000a\000a\000a\000)\000(\000?\000<\000n\000a\000m\000e\0001\0007\000>\000)\000$\000\000", "\000a\000a\000a\000\000", 0, 6, 16); + x2("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000a\000\000", 0, 2); + x2("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000\000", 0, 26); + x3("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000)\000)\000\000", 0, 34, 1); + x2("\000\134\000g\000<\000b\000a\000r\000>\000|\000\134\000z\000E\000N\000D\000(\000?\000<\000b\000a\000r\000>\000.\000*\000a\000b\000c\000$\000)\000\000", "\000a\000b\000c\000x\000x\000x\000a\000b\000c\000\000", 0, 18); + x2("\000\134\000g\000<\0001\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000\000", "\000b\000a\000c\000\000", 0, 6); + x3("\000\134\000g\000<\000_\000A\000>\000\134\000g\000<\000_\000A\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000(\000?\000<\000_\000A\000>\000.\000b\000.\000)\000\000", "\000x\000b\000x\000y\000b\000y\000\000", 6, 12, 1); + x2("\000\134\000A\000(\000?\000:\000\134\000g\000<\000p\000o\000n\000>\000|\000\134\000g\000<\000p\000a\000n\000>\000|\000\134\000z\000E\000N\000D\000 \000 \000(\000?\000<\000p\000a\000n\000>\000a\000|\000c\000\134\000g\000<\000p\000o\000n\000>\000c\000)\000(\000?\000<\000p\000o\000n\000>\000b\000|\000d\000\134\000g\000<\000p\000a\000n\000>\000d\000)\000)\000$\000\000", "\000c\000d\000c\000b\000c\000d\000c\000\000", 0, 14); + x2("\000\134\000A\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000m\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\000", "\000a\000a\000a\000a\000\000", 0, 8); + x2("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000\000", 2, 10); + x2("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000a\000a\000a\000a\000\000", 0, 20); + x2("\000(\000?\000<\000p\000a\000r\000e\000>\000\134\000(\000(\000[\000^\000\134\000(\000\134\000)\000]\000+\000+\000|\000\134\000g\000<\000p\000a\000r\000e\000>\000)\000*\000+\000\134\000)\000)\000\000", "\000(\000(\000a\000)\000)\000\000", 0, 10); + x2("\000(\000)\000*\000\134\0001\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\000(\000)\000|\000(\000)\000)\000*\000\134\0001\000\134\0002\000\000", "\000\000", 0, 0); + x3("\000(\000?\000:\000\134\0001\000a\000|\000(\000)\000)\000*\000\000", "\000a\000\000", 0, 0, 1); + x2("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000\000", "\0000\000x\0001\000x\0002\000x\0003\000\000", 2, 12); + x2("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000(\000?\000i\000:\000\134\0001\000)\000\134\000Z\000\000", "\0000\000x\0001\000x\0002\000x\0001\000X\0002\000\000", 2, 18); + x2("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000\134\0005\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000x\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000b\000\134\0005\000\000", "\000b\000\000", 0, 2); + x2("\217\372\000\000", "\217\372\000\000", 0, 2); + x2("\000\000", "0B\000\000", 0, 0); + x2("0B\000\000", "0B\000\000", 0, 2); + n("0D\000\000", "0B\000\000"); + x2("0F0F\000\000", "0F0F\000\000", 0, 4); + x2("0B0D0F\000\000", "0B0D0F\000\000", 0, 6); + x2("0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", "0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", 0, 70); + x2("0B\000\000", "0D0B\000\000", 2, 4); + x2("0D0F\000\000", "0B0D0F\000\000", 2, 6); + x2("e\207\000\000", "e\207\000\000", 0, 2); + x2("\000.\000\000", "0B\000\000", 0, 2); + x2("\000.\000.\000\000", "0K0M\000\000", 0, 4); + x2("\000\134\000w\000\000", "0J\000\000", 0, 2); + n("\000\134\000W\000\000", "0B\000\000"); + x2("\000[\000\134\000W\000]\000\000", "0F\000$\000\000", 2, 4); + x2("\000\134\000S\000\000", "0]\000\000", 0, 2); + x2("\000\134\000S\000\000", "o\042\000\000", 0, 2); + x2("\000\134\000b\000\000", "l\027\000 \000\000", 0, 0); + x2("\000\134\000b\000\000", "\000 0{\000\000", 2, 2); + x2("\000\134\000B\000\000", "0[0]\000 \000\000", 2, 2); + x2("\000\134\000B\000\000", "0F\000 \000\000", 4, 4); + x2("\000\134\000B\000\000", "\000 0D\000\000", 0, 0); + x2("\000[0_0a\000]\000\000", "0a\000\000", 0, 2); + n("\000[0j0k\000]\000\000", "0l\000\000"); + x2("\000[0F\000-0J\000]\000\000", "0H\000\000", 0, 2); + n("\000[\000^0Q\000]\000\000", "0Q\000\000"); + x2("\000[\000\134\000w\000]\000\000", "0m\000\000", 0, 2); + n("\000[\000\134\000d\000]\000\000", "0u\000\000"); + x2("\000[\000\134\000D\000]\000\000", "0o\000\000", 0, 2); + n("\000[\000\134\000s\000]\000\000", "0O\000\000"); + x2("\000[\000\134\000S\000]\000\000", "0x\000\000", 0, 2); + x2("\000[\000\134\000w\000\134\000d\000]\000\000", "0\210\000\000", 0, 2); + x2("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000 \000 0\210\000\000", 6, 8); + n("\000\134\000w\233<\216\312\000\000", "\000 \233<\216\312\000\000"); + x2("\233<\000\134\000W\216\312\000\000", "\233<\000 \216\312\000\000", 0, 6); + x2("0B\000.0D\000.0F\000\000", "0B0B0D0D0F\000\000", 0, 10); + x2("\000.\000\134\000w0F\000\134\000W\000.\000.0^\000\000", "0H0F0F\000 0F0^0^\000\000", 0, 14); + x2("\000\134\000s\000\134\000w0S0S0S\000\000", "\000 0S0S0S0S\000\000", 0, 10); + x2("0B0B\000.0Q\000\000", "0B0B0Q0Q\000\000", 0, 8); + n("\000.0D\000\000", "0D0H\000\000"); + x2("\000.0J\000\000", "0J0J\000\000", 0, 4); + x2("\000^0B\000\000", "0B\000\000", 0, 2); + x2("\000^0\200\000$\000\000", "0\200\000\000", 0, 2); + x2("\000^\000\134\000w\000$\000\000", "0k\000\000", 0, 2); + x2("\000^\000\134\000w0K0M0O0Q0S\000$\000\000", "\000z0K0M0O0Q0S\000\000", 0, 12); + x2("\000^\000\134\000w\000.\000.\000.0F0H0J\000$\000\000", "\000z0B0D0F0F0H0J\000\000", 0, 14); + x2("\000\134\000w\000\134\000w\000\134\000s\000\134\000W0J0J0J\000\134\000d\000\000", "\000a0J\000 \000 0J0J0J\0004\000\000", 0, 16); + x2("\000\134\000A0_0a0d\000\000", "0_0a0d\000\000", 0, 6); + x2("0\2000\2010\202\000\134\000Z\000\000", "0\2000\2010\202\000\000", 0, 6); + x2("0K0M0O\000\134\000z\000\000", "0K0M0O\000\000", 0, 6); + x2("0K0M0O\000\134\000Z\000\000", "0K0M0O\000\012\000\000", 0, 6); + x2("\000\134\000G0}0t\000\000", "0}0t\000\000", 0, 4); + n("\000\134\000G0H\000\000", "0F0H0J\000\000"); + n("0h0f\000\134\000G\000\000", "0h0f\000\000"); + n("0~0\177\000\134\000A\000\000", "0~0\177\000\000"); + n("0~\000\134\000A0\177\000\000", "0~0\177\000\000"); + x2("\000(\000?\000=0[\000)0[\000\000", "0[\000\000", 0, 2); + n("\000(\000?\000=0F\000)\000.\000\000", "0D\000\000"); + x2("\000(\000?\000!0F\000)0K\000\000", "0K\000\000", 0, 2); + n("\000(\000?\000!0h\000)0B\000\000", "0h\000\000"); + x2("\000(\000?\000i\000:0B\000)\000\000", "0B\000\000", 0, 2); + x2("\000(\000?\000i\000:0v0y\000)\000\000", "0v0y\000\000", 0, 4); + n("\000(\000?\000i\000:0D\000)\000\000", "0F\000\000"); + x2("\000(\000?\000m\000:0\210\000.\000)\000\000", "0\210\000\012\000\000", 0, 4); + x2("\000(\000?\000m\000:\000.0\201\000)\000\000", "0~\000\0120\201\000\000", 2, 6); + x2("0B\000?\000\000", "\000\000", 0, 0); + x2("Y\011\000?\000\000", "S\026\000\000", 0, 0); + x2("Y\011\000?\000\000", "Y\011\000\000", 0, 2); + x2("\221\317\000*\000\000", "\000\000", 0, 0); + x2("\221\317\000*\000\000", "\221\317\000\000", 0, 2); + x2("[P\000*\000\000", "[P[P[P\000\000", 0, 6); + x2("\231\254\000*\000\000", "\236\177\231\254\231\254\231\254\231\254\000\000", 0, 0); + n("\134q\000+\000\000", "\000\000"); + x2("l\263\000+\000\000", "l\263\000\000", 0, 2); + x2("fB\000+\000\000", "fBfBfBfB\000\000", 0, 8); + x2("0H\000+\000\000", "0H0H0F0F0F\000\000", 0, 4); + x2("0F\000+\000\000", "0J0F0F0F0F\000\000", 2, 10); + x2("\000.\000?\000\000", "0_\000\000", 0, 2); + x2("\000.\000*\000\000", "0q0t0w0z\000\000", 0, 8); + x2("\000.\000+\000\000", "0\215\000\000", 0, 2); + x2("\000.\000+\000\000", "0D0F0H0K\000\012\000\000", 0, 8); + x2("0B\000|0D\000\000", "0B\000\000", 0, 2); + x2("0B\000|0D\000\000", "0D\000\000", 0, 2); + x2("0B0D\000|0D0F\000\000", "0B0D\000\000", 0, 4); + x2("0B0D\000|0D0F\000\000", "0D0F\000\000", 0, 4); + x2("0\222\000(\000?\000:0K0M\000|0M0O\000)\000\000", "0\2220K0M\000\000", 0, 6); + x2("0\222\000(\000?\000:0K0M\000|0M0O\000)0Q\000\000", "0\2220M0O0Q\000\000", 0, 8); + x2("0B0D\000|\000(\000?\000:0B0F\000|0B0\222\000)\000\000", "0B0\222\000\000", 0, 4); + x2("0B\000|0D\000|0F\000\000", "0H0F\000\000", 2, 4); + x2("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0W0Y0[\000\000", 0, 6); + n("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0Y0[\000\000"); + x2("0B\000|\000^0\217\000\000", "0v0B\000\000", 2, 4); + x2("0B\000|\000^0\222\000\000", "0\2220B\000\000", 0, 2); + x2("\233<\000|\000\134\000G\216\312\000\000", "0Q\216\312\233<\000\000", 4, 6); + x2("\233<\000|\000\134\000G\216\312\000\000", "\216\312\233<\000\000", 0, 2); + x2("\233<\000|\000\134\000A\216\312\000\000", "\000b\216\312\233<\000\000", 4, 6); + x2("\233<\000|\000\134\000A\216\312\000\000", "\216\312\000\000", 0, 2); + x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\233<\000\000", 2, 4); + x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\000", 0, 2); + x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\012\000\000", 0, 2); + x2("\233<\000|\216\312\000\134\000z\000\000", "\216\312\233<\000\000", 2, 4); + x2("\233<\000|\216\312\000\134\000z\000\000", "\216\312\000\000", 0, 2); + x2("\000\134\000w\000|\000\134\000s\000\000", "0J\000\000", 0, 2); + x2("\000\134\000w\000|\000%\000\000", "\000%0J\000\000", 0, 2); + x2("\000\134\000w\000|\000[\000&\000$\000]\000\000", "0F\000&\000\000", 0, 2); + x2("\000[0D\000-0Q\000]\000\000", "0F\000\000", 0, 2); + x2("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0B\000\000", 0, 2); + x2("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0K\000\000", 0, 2); + x2("\000[\000^0B\000]\000\000", "\000\012\000\000", 0, 2); + x2("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0F0\222\000\000", 0, 2); + x2("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0D0\222\000\000", 0, 4); + x2("0B0D0F\000|\000(\000?\000=0Q0Q\000)\000.\000.0{\000\000", "0Q0Q0{\000\000", 0, 6); + x2("0B0D0F\000|\000(\000?\000!0Q0Q\000)\000.\000.0{\000\000", "0B0D0{\000\000", 0, 6); + x2("\000(\000?\000=0\2220B\000)\000.\000.0B\000|\000(\000?\000=0\2220\222\000)\000.\000.0B\000\000", "0\2220\2220B\000\000", 0, 6); + x2("\000(\000?\000<\000=0B\000|0D0F\000)0D\000\000", "0D0F0D\000\000", 4, 6); + n("\000(\000?\000>0B\000|0B0D0H\000)0F\000\000", "0B0D0H0F\000\000"); + x2("\000(\000?\000>0B0D0H\000|0B\000)0F\000\000", "0B0D0H0F\000\000", 0, 8); + x2("0B\000?\000|0D\000\000", "0B\000\000", 0, 2); + x2("0B\000?\000|0D\000\000", "0D\000\000", 0, 0); + x2("0B\000?\000|0D\000\000", "\000\000", 0, 0); + x2("0B\000*\000|0D\000\000", "0B0B\000\000", 0, 4); + x2("0B\000*\000|0D\000*\000\000", "0D0B\000\000", 0, 0); + x2("0B\000*\000|0D\000*\000\000", "0B0D\000\000", 0, 2); + x2("\000[\000a0B\000]\000*\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 4); + x2("0B\000+\000|0D\000*\000\000", "\000\000", 0, 0); + x2("0B\000+\000|0D\000*\000\000", "0D0D0D\000\000", 0, 6); + x2("0B\000+\000|0D\000*\000\000", "0B0D0D0D\000\000", 0, 2); + x2("0B\000+\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 0); + n("0B\000+\000|0D\000+\000\000", "\000\000"); + x2("\000(0B\000|0D\000)\000?\000\000", "0D\000\000", 0, 2); + x2("\000(0B\000|0D\000)\000*\000\000", "0D0B\000\000", 0, 4); + x2("\000(0B\000|0D\000)\000+\000\000", "0D0B0D\000\000", 0, 6); + x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 0, 8); + x2("\000(0B0D\000|0F0H\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 4, 12); + x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0B0D0F0B\000\000", 2, 10); + x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0D0\2220F0B\000\000", 0, 4); + x2("\000(0B0D\000|0F0B\000)\000+\000\000", "\000$\000$\000z\000z\000z\000z0B0D0\2220F0B\000\000", 12, 16); + x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0B0D0B0D0B\000\000", 0, 10); + x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B\000\000", 2, 4); + x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B0B0B0D0B\000\000", 2, 8); + x2("\000(\000?\000:0B\000|0D\000)\000(\000?\000:0B\000|0D\000)\000\000", "0B0D\000\000", 0, 4); + x2("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000*\000|0D\000*\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 6); + x2("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000+\000|0D\000+\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 12); + x2("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12); + x2("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0001\000,\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12); + x2("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0F0F\000\000", 0, 4); + n("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0B0D0F0F\000\000"); + x2("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0B0D0F\000\000", 12, 16); + x2("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0D0F\000\000", 0, 14); + x2("0F\000{\0000\000,\000}\000\000", "0F0F0F0F\000\000", 0, 8); + x2("0B\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2); + x2("\000(\000?\000i\000)\000c\000|0B\000\000", "\000C\000\000", 0, 2); + x2("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000a\000\000", 0, 2); + n("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000A\000\000"); + x2("\000[0B0D0F\000]\000?\000\000", "0B0D0F\000\000", 0, 2); + x2("\000[0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 6); + x2("\000[\000^0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 0); + n("\000[\000^0B0D0F\000]\000+\000\000", "0B0D0F\000\000"); + x2("0B\000?\000?\000\000", "0B0B0B\000\000", 0, 0); + x2("0D0B\000?\000?0D\000\000", "0D0B0D\000\000", 0, 6); + x2("0B\000*\000?\000\000", "0B0B0B\000\000", 0, 0); + x2("0D0B\000*\000?\000\000", "0D0B0B\000\000", 0, 2); + x2("0D0B\000*\000?0D\000\000", "0D0B0B0D\000\000", 0, 8); + x2("0B\000+\000?\000\000", "0B0B0B\000\000", 0, 2); + x2("0D0B\000+\000?\000\000", "0D0B0B\000\000", 0, 4); + x2("0D0B\000+\000?0D\000\000", "0D0B0B0D\000\000", 0, 8); + x2("\000(\000?\000:Y)\000?\000)\000?\000?\000\000", "Y)\000\000", 0, 0); + x2("\000(\000?\000:Y)\000?\000?\000)\000?\000\000", "Y)\000\000", 0, 0); + x2("\000(\000?\000:Y\042\000?\000)\000+\000?\000\000", "Y\042Y\042Y\042\000\000", 0, 2); + x2("\000(\000?\000:\230\250\000+\000)\000?\000?\000\000", "\230\250\230\250\230\250\000\000", 0, 0); + x2("\000(\000?\000:\226\352\000+\000)\000?\000?\227\034\000\000", "\226\352\226\352\226\352\227\034\000\000", 0, 8); + x2("\000(\000?\000:0B0D\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0); + x2("\000(\000?\000:\233<\216\312\000)\000?\000{\0002\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 8); + x2("\000(\000?\000:\233<\216\312\000)\000*\000{\0000\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 0); + x2("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16); + n("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\000\000"); + x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 12); + x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16); + x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000?\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 8); + x2("\000(\000?\000:\233<\216\312\000)\000{\000,\000}\000\000", "\233<\216\312\000{\000,\000}\000\000", 0, 10); + x2("\000(\000?\000:0K0M0O\000)\000+\000?\000{\0002\000}\000\000", "0K0M0O0K0M0O0K0M0O\000\000", 0, 12); + x3("\000(pk\000)\000\000", "pk\000\000", 0, 2, 1); + x3("\000(pkl4\000)\000\000", "pkl4\000\000", 0, 4, 1); + x2("\000(\000(fB\225\223\000)\000)\000\000", "fB\225\223\000\000", 0, 4); + x3("\000(\000(\230\250l4\000)\000)\000\000", "\230\250l4\000\000", 0, 4, 1); + x3("\000(\000(f(e\345\000)\000)\000\000", "f(e\345\000\000", 0, 4, 2); + x3("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\221\317[P\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\221\317[P\000\000", 0, 4, 20); + x3("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 0, 4, 1); + x3("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 4, 8, 2); + x3("\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 3); + x3("\000(\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000)\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 4); + x3("\000.\000*\000(0\3250\251\000)0\3630\3730\336\000(0\363\000(\000)0\2670\3450\277\000)0\2440\363\000\000", "0\3250\2510\3630\3730\3360\3630\2670\3450\2770\2440\363\000\000", 10, 18, 2); + x2("\000(\000^0B\000)\000\000", "0B\000\000", 0, 2); + x3("\000(0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 1); + x3("\000(\000^0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 2); + x3("\000(0B\000?\000)\000\000", "0B0B0B\000\000", 0, 2, 1); + x3("\000(0~\000*\000)\000\000", "0~0~0~\000\000", 0, 6, 1); + x3("\000(0h\000*\000)\000\000", "\000\000", 0, 0, 1); + x3("\000(0\213\000+\000)\000\000", "0\2130\2130\2130\2130\2130\2130\213\000\000", 0, 14, 1); + x3("\000(0u\000+\000|0x\000*\000)\000\000", "0u0u0u0x0x\000\000", 0, 6, 1); + x3("\000(0B\000+\000|0D\000?\000)\000\000", "0D0D0D0B0B\000\000", 0, 2, 1); + x3("\000(0B0D0F\000)\000?\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(0B0D0F\000)\000*\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(0U0W0Y\000|0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(\000[0j0k0l\000]\000[0K0M0O\000]\000|0K0M0O\000)\000+\000\000", "0K0M0O\000\000", 0, 6, 1); + x3("\000(\000(\000?\000i\000:0B0D0F\000)\000)\000\000", "0B0D0F\000\000", 0, 6, 1); + x3("\000(\000(\000?\000m\000:0B\000.0F\000)\000)\000\000", "0B\000\0120F\000\000", 0, 6, 1); + x3("\000(\000(\000?\000=0B0\223\000)0B\000)\000\000", "0B0\2230D\000\000", 0, 2, 1); + x3("0B0D0F\000|\000(\000.0B0D0H\000)\000\000", "0\2230B0D0H\000\000", 0, 8, 1); + x3("0B\000*\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1); + x3("0B\000*\000?\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 0, 2, 1); + x3("0B\000*\000?\000(0\223\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1); + x3("\000[0D0F0H\000]0B\000*\000(\000.\000)\000\000", "0H0B0B0B0B0\223\000\000", 10, 12, 1); + x3("\000(\000\134\000A0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1); + n("\000(\000\134\000A0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000"); + x3("\000(\000^0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1); + n("\000(\000^0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000"); + x3("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\213\000\000", 4, 8, 1); + n("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\2130\213\000\000"); + x2("\000(q!\000)\000\134\0001\000\000", "q!q!\000\000", 0, 4); + n("\000(q!\000)\000\134\0001\000\000", "q!kf\000\000"); + x2("\000(zz\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 4); + x2("\000(zz\000?\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 0); + x2("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 8); + x3("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 4, 1); + x2("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D0D0D0D\000\000", 0, 10); + x2("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D\000\000", 0, 2); + x2("\000(0B\000*\000)\000(0D\000*\000)\000\134\0001\000\134\0002\000\000", "0B0B0B0D0D0B0B0B0D0D\000\000", 0, 20); + x2("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 0, 14); + x3("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 6, 10, 2); + x2("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 16); + x3("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 6, 7); + x2("\000(0o\000)\000(0r\000)\000(0u\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "0o0r0u0r0o0u\000\000", 0, 12); + x2("\000(\000[0M\000-0Q\000]\000)\000\134\0001\000\000", "0O0O\000\000", 0, 4); + x2("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000 \000\000", 0, 12); + n("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000\000"); + x2("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "\212\260\377\037\212\260\377\037\000\000", 0, 8); + x2("\000.\000.\000.\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0B\000a0B\212\260\377\037\212\260\377\037\000\000", 0, 14); + x2("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0F0D0F0F0D0F\000\000", 0, 12); + x2("\000(\000^0S\000)\000\134\0001\000\000", "0S0S\000\000", 0, 4); + n("\000(\000^0\200\000)\000\134\0001\000\000", "0\2010\2000\200\000\000"); + n("\000(0B\000$\000)\000\134\0001\000\000", "0B0B\000\000"); + n("\000(0B0D\000\134\000Z\000)\000\134\0001\000\000", "0B0D\000\000"); + x2("\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0B\000\000", 2, 2); + x2("\000.\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0D0B\000\000", 2, 4); + x3("\000(\000.\000(0\2040D0\206\000)\000\134\0002\000)\000\000", "\000z0\2040D0\2060\2040D0\206\000\000", 0, 14, 1); + x3("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "0B\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1); + x2("\000(\000(\000?\000i\000:0B\000v0Z\000)\000)\000\134\0001\000\000", "0B\000v0Z0B\000v0Z\000\000", 0, 12); + x2("\000(\000?\000Y\011\000|\000\134\000(\000\134\000g\000\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(Y\011\000)\000)\000)\000)\000)\000)\000\000", 0, 26); + x2("\000\134\000A\000(\000?\000:\000\134\000g\000<\226?\000_\0001\000>\000|\000\134\000g\000\000|\000\134\000z}BN\206\000 \000 \000(\000?\000<\226?\000_\0001\000>\211\263\000|\201\352\000\134\000g\000\201\352\000)\000(\000?\000W(\000|\203\351\205\251\000\134\000g\000<\226?\000_\0001\000>\203\351\205\251\000)\000)\000$\000\000", "\203\351\205\251\201\352\203\351\205\251\201\352W(\201\352\203\351\205\251\201\352\203\351\205\251\000\000", 0, 26); + x2("\000[\000[0r0u\000]\000]\000\000", "0u\000\000", 0, 2); + x2("\000[\000[0D0J0F\000]0K\000]\000\000", "0K\000\000", 0, 2); + n("\000[\000[\000^0B\000]\000]\000\000", "0B\000\000"); + n("\000[\000^\000[0B\000]\000]\000\000", "0B\000\000"); + x2("\000[\000^\000[\000^0B\000]\000]\000\000", "0B\000\000", 0, 2); + x2("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0O\000\000", 0, 2); + n("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0K\000\000"); + n("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0Q\000\000"); + x2("\000[0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000", 0, 2); + n("\000[\000^0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000"); + x2("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0D\000\000", 0, 2); + n("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0B\000\000"); + x2("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0M\000\000", 0, 2); + n("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0D\000\000"); + x2("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0F\000\000", 0, 2); + x2("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0H\000\000", 0, 2); + n("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0K\000\000"); + x2("\000[0B\000-\000&\000&\000-0B\000]\000\000", "\000-\000\000", 0, 2); + x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000q\000-\000w\000]\000\000", "0H\000\000", 0, 2); + x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000f\000\000", 0, 2); + x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000g\000\000", 0, 2); + n("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\0002\000\000"); + x2("\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); + x2("\000.\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); + fprintf(stdout, + "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + +#ifndef POSIX_TEST + onig_region_free(region, 1); + onig_end(); +#endif + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +} diff --git a/oniguruma/win32/Makefile b/oniguruma/win32/Makefile new file mode 100644 index 0000000..27d8832 --- /dev/null +++ b/oniguruma/win32/Makefile @@ -0,0 +1,200 @@ +# Oniguruma Makefile for Win32 + +product_name = oniguruma + +CPPFLAGS = +CFLAGS = -O2 -nologo /W3 +LDFLAGS = +LOADLIBES = +ARLIB = lib +ARLIB_FLAGS = -nologo +ARDLL = cl +ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll +LINKFLAGS = -link -incremental:no -pdb:none + +INSTALL = install -c +CP = copy +CC = cl +DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT +RUBYDIR = .. + +subdirs = + +libbase = onig +libname = $(libbase)_s.lib +dllname = $(libbase).dll +dlllib = $(libbase).lib + +onigheaders = oniguruma.h regint.h regparse.h regenc.h st.h +posixheaders = onigposix.h +headers = $(posixheaders) $(onigheaders) + +onigobjs = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \ + regexec.obj regenc.obj regsyntax.obj regtrav.obj \ + regversion.obj st.obj +posixobjs = regposix.obj regposerr.obj +libobjs = $(onigobjs) $(posixobjs) + +jp_objs = $(encdir)\euc_jp.obj $(encdir)\sjis.obj +iso8859_objs = $(encdir)\iso8859_1.obj $(encdir)\iso8859_2.obj \ + $(encdir)\iso8859_3.obj $(encdir)\iso8859_4.obj \ + $(encdir)\iso8859_5.obj $(encdir)\iso8859_6.obj \ + $(encdir)\iso8859_7.obj $(encdir)\iso8859_8.obj \ + $(encdir)\iso8859_9.obj $(encdir)\iso8859_10.obj \ + $(encdir)\iso8859_11.obj $(encdir)\iso8859_13.obj \ + $(encdir)\iso8859_14.obj $(encdir)\iso8859_15.obj \ + $(encdir)\iso8859_16.obj + +encobjs = $(encdir)\ascii.obj $(encdir)\utf8.obj \ + $(encdir)\unicode.obj \ + $(encdir)\utf16_be.obj $(encdir)\utf16_le.obj \ + $(encdir)\utf32_be.obj $(encdir)\utf32_le.obj \ + $(jp_objs) $(iso8859_objs) \ + $(encdir)\euc_tw.obj $(encdir)\euc_kr.obj $(encdir)\big5.obj \ + $(encdir)\gb18030.obj \ + $(encdir)\koi8_r.obj \ + $(encdir)\cp1251.obj # $(encdir)\koi8.obj + +onigsources = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \ + regsyntax.c regtrav.c regversion.c reggnu.c st.c +posixsources = regposix.c regposerr.c +libsources = $(posixsources) $(onigsources) +rubysources = $(onigsources) + +encdir = enc +patchfiles = re.c.168.patch re.c.181.patch +distfiles = README COPYING HISTORY \ + Makefile.in configure.in config.h.in configure \ + $(headers) $(libsources) $(patchfiles) \ + test.rb testconv.rb +testc = testc +testp = testp + +makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' + +.SUFFIXES: +.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo + +.c.obj: + $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $< + +# targets +default: all + +setup: + $(CP) win32\config.h config.h + $(CP) win32\testc.c testc.c + + +all: $(libname) $(dllname) + +$(libname): $(libobjs) $(encobjs) + $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs) + +$(dllname): $(libobjs) $(encobjs) + $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS) + +regparse.obj: regparse.c $(onigheaders) config.h st.h +regext.obj: regext.c $(onigheaders) config.h +regtrav.obj: regtrav.c $(onigheaders) config.h +regcomp.obj: regcomp.c $(onigheaders) config.h +regexec.obj: regexec.c regint.h regenc.h oniguruma.h config.h +reggnu.obj: reggnu.c regint.h regenc.h oniguruma.h config.h oniggnu.h +regerror.obj: regerror.c regint.h regenc.h oniguruma.h config.h +regenc.obj: regenc.c regenc.h oniguruma.h config.h +regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h +regversion.obj: regversion.c oniguruma.h config.h +regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h +regposerr.obj: regposerr.c $(posixheaders) config.h +st.obj: st.c regint.h oniguruma.h config.h st.h + +$(encdir)\ascii.obj: $(encdir)\ascii.c regenc.h config.h +$(encdir)\unicode.obj: $(encdir)\unicode.c regenc.h config.h +$(encdir)\utf8.obj: $(encdir)\utf8.c regenc.h config.h +$(encdir)\utf16_be.obj: $(encdir)\utf16_be.c regenc.h config.h +$(encdir)\utf16_le.obj: $(encdir)\utf16_le.c regenc.h config.h +$(encdir)\utf32_be.obj: $(encdir)\utf32_be.c regenc.h config.h +$(encdir)\utf32_le.obj: $(encdir)\utf32_le.c regenc.h config.h +$(encdir)\euc_jp.obj: $(encdir)\euc_jp.c regenc.h config.h +$(encdir)\euc_tw.obj: $(encdir)\euc_tw.c regenc.h config.h +$(encdir)\euc_kr.obj: $(encdir)\euc_kr.c regenc.h config.h +$(encdir)\sjis.obj: $(encdir)\sjis.c regenc.h config.h +$(encdir)\iso8859_1.obj: $(encdir)\iso8859_1.c regenc.h config.h +$(encdir)\iso8859_2.obj: $(encdir)\iso8859_2.c regenc.h config.h +$(encdir)\iso8859_3.obj: $(encdir)\iso8859_3.c regenc.h config.h +$(encdir)\iso8859_4.obj: $(encdir)\iso8859_4.c regenc.h config.h +$(encdir)\iso8859_5.obj: $(encdir)\iso8859_5.c regenc.h config.h +$(encdir)\iso8859_6.obj: $(encdir)\iso8859_6.c regenc.h config.h +$(encdir)\iso8859_7.obj: $(encdir)\iso8859_7.c regenc.h config.h +$(encdir)\iso8859_8.obj: $(encdir)\iso8859_8.c regenc.h config.h +$(encdir)\iso8859_9.obj: $(encdir)\iso8859_9.c regenc.h config.h +$(encdir)\iso8859_10.obj: $(encdir)\iso8859_10.c regenc.h config.h +$(encdir)\iso8859_11.obj: $(encdir)\iso8859_11.c regenc.h config.h +$(encdir)\iso8859_13.obj: $(encdir)\iso8859_13.c regenc.h config.h +$(encdir)\iso8859_14.obj: $(encdir)\iso8859_14.c regenc.h config.h +$(encdir)\iso8859_15.obj: $(encdir)\iso8859_15.c regenc.h config.h +$(encdir)\iso8859_16.obj: $(encdir)\iso8859_16.c regenc.h config.h +$(encdir)\koi8.obj: $(encdir)\koi8.c regenc.h config.h +$(encdir)\koi8_r.obj: $(encdir)\koi8_r.c regenc.h config.h +$(encdir)\cp1251.obj: $(encdir)\cp1251.c regenc.h config.h +$(encdir)\big5.obj: $(encdir)\big5.c regenc.h config.h +$(encdir)\gb18030.obj: $(encdir)\gb18030.c regenc.h config.h + + +# Ruby test +rtest: + $(RUBYDIR)\win32\ruby -w -Ke test.rb + +# C library test +ctest: $(testc) + .\$(testc) + +# POSIX C library test +ptest: $(testp) + .\$(testp) + +$(testc): $(testc).c $(libname) + $(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname) + +$(testp): $(testc).c $(dlllib) + $(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib) + +#$(testc)u.c: test.rb testconvu.rb +# ruby -Ke testconvu.rb test.rb > $@ + +$(testc)u: $(testc)u.c $(libname) + $(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) + +clean: + del *.obj $(encdir)\*.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj + + +# backup file suffix +SORIG = ruby_orig + +# ruby 1.9 source update +19: + $(CP) regerror.c $(RUBYDIR) + $(CP) regparse.c $(RUBYDIR) + $(CP) regcomp.c $(RUBYDIR) + $(CP) regexec.c $(RUBYDIR) + $(CP) regenc.c $(RUBYDIR) + $(CP) regint.h $(RUBYDIR) + $(CP) regparse.h $(RUBYDIR) + $(CP) regenc.h $(RUBYDIR) + $(CP) oniguruma.h $(RUBYDIR) + $(CP) enc\ascii.c $(RUBYDIR) + $(CP) enc\utf8.c $(RUBYDIR) + $(CP) enc\euc_jp.c $(RUBYDIR) + $(CP) enc\sjis.c $(RUBYDIR) + $(CP) enc\unicode.c $(RUBYDIR) + + +samples: all + $(CC) $(CFLAGS) -I. -o simple sample\simple.c $(dlllib) + $(CC) $(CFLAGS) -I. -o posix sample\posix.c $(dlllib) + $(CC) $(CFLAGS) -I. -o names sample\names.c $(dlllib) + $(CC) $(CFLAGS) -I. -o listcap sample\listcap.c $(dlllib) + $(CC) $(CFLAGS) -I. -o sql sample\sql.c $(dlllib) + $(CC) $(CFLAGS) -I. -o encode sample\encode.c $(dlllib) + $(CC) $(CFLAGS) -I. -o syntax sample\syntax.c $(dlllib) diff --git a/oniguruma/win32/config.h b/oniguruma/win32/config.h new file mode 100644 index 0000000..7ee9e25 --- /dev/null +++ b/oniguruma/win32/config.h @@ -0,0 +1,84 @@ +#define STDC_HEADERS 1 +#define HAVE_SYS_TYPES_H 1 +#define HAVE_SYS_STAT_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRING_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_FLOAT_H 1 +#define HAVE_OFF_T 1 +#define SIZEOF_INT 4 +#define SIZEOF_SHORT 2 +#define SIZEOF_LONG 4 +#define SIZEOF_LONG_LONG 0 +#define SIZEOF___INT64 8 +#define SIZEOF_OFF_T 4 +#define SIZEOF_VOIDP 4 +#define SIZEOF_FLOAT 4 +#define SIZEOF_DOUBLE 8 +#define HAVE_PROTOTYPES 1 +#define TOKEN_PASTE(x,y) x##y +#define HAVE_STDARG_PROTOTYPES 1 +#ifndef NORETURN +#if _MSC_VER > 1100 +#define NORETURN(x) __declspec(noreturn) x +#else +#define NORETURN(x) x +#endif +#endif +#define HAVE_DECL_SYS_NERR 1 +#define STDC_HEADERS 1 +#define HAVE_STDLIB_H 1 +#define HAVE_STRING_H 1 +#define HAVE_LIMITS_H 1 +#define HAVE_FCNTL_H 1 +#define HAVE_SYS_UTIME_H 1 +#define HAVE_MEMORY_H 1 +#define uid_t int +#define gid_t int +#define HAVE_STRUCT_STAT_ST_RDEV 1 +#define HAVE_ST_RDEV 1 +#define GETGROUPS_T int +#define RETSIGTYPE void +#define HAVE_ALLOCA 1 +#define HAVE_DUP2 1 +#define HAVE_MEMCMP 1 +#define HAVE_MEMMOVE 1 +#define HAVE_MKDIR 1 +#define HAVE_STRCASECMP 1 +#define HAVE_STRNCASECMP 1 +#define HAVE_STRERROR 1 +#define HAVE_STRFTIME 1 +#define HAVE_STRCHR 1 +#define HAVE_STRSTR 1 +#define HAVE_STRTOD 1 +#define HAVE_STRTOL 1 +#define HAVE_STRTOUL 1 +#define HAVE_FLOCK 1 +#define HAVE_VSNPRINTF 1 +#define HAVE_FINITE 1 +#define HAVE_FMOD 1 +#define HAVE_FREXP 1 +#define HAVE_HYPOT 1 +#define HAVE_MODF 1 +#define HAVE_WAITPID 1 +#define HAVE_CHSIZE 1 +#define HAVE_TIMES 1 +#define HAVE__SETJMP 1 +#define HAVE_TELLDIR 1 +#define HAVE_SEEKDIR 1 +#define HAVE_MKTIME 1 +#define HAVE_COSH 1 +#define HAVE_SINH 1 +#define HAVE_TANH 1 +#define HAVE_EXECVE 1 +#define HAVE_TZNAME 1 +#define HAVE_DAYLIGHT 1 +#define SETPGRP_VOID 1 +#define inline __inline +#define NEED_IO_SEEK_BETWEEN_RW 1 +#define RSHIFT(x,y) ((x)>>(int)y) +#define FILE_COUNT _cnt +#define FILE_READPTR _ptr +#define DEFAULT_KCODE KCODE_NONE +#define DLEXT ".so" +#define DLEXT2 ".dll" diff --git a/oniguruma/win32/testc.c b/oniguruma/win32/testc.c new file mode 100644 index 0000000..4b8f037 --- /dev/null +++ b/oniguruma/win32/testc.c @@ -0,0 +1,863 @@ +/* + * This program was generated by testconv.rb. + */ +#include "config.h" +#ifdef ONIG_ESCAPE_UCHAR_COLLISION +#undef ONIG_ESCAPE_UCHAR_COLLISION +#endif +#include + +#ifdef POSIX_TEST +#include "onigposix.h" +#else +#include "oniguruma.h" +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#define SLEN(s) strlen(s) + +static int nsucc = 0; +static int nfail = 0; +static int nerror = 0; + +static FILE* err_file; + +#ifndef POSIX_TEST +static OnigRegion* region; +#endif + +static void xx(char* pattern, char* str, int from, int to, int mem, int not) +{ + int r; + +#ifdef POSIX_TEST + regex_t reg; + char buf[200]; + regmatch_t pmatch[25]; + + r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE); + if (r) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + r = regexec(®, str, reg.re_nsub + 1, pmatch, 0); + if (r != 0 && r != REG_NOMATCH) { + regerror(r, ®, buf, sizeof(buf)); + fprintf(err_file, "ERROR: %s\n", buf); + nerror++; + return ; + } + + if (r == REG_NOMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo); + nfail++; + } + } + } + regfree(®); + +#else + regex_t* reg; + OnigErrorInfo einfo; + + r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_SJIS, ONIG_SYNTAX_DEFAULT, &einfo); + if (r) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)), + (UChar* )str, (UChar* )(str + SLEN(str)), + region, ONIG_OPTION_NONE); + if (r < ONIG_MISMATCH) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(err_file, "ERROR: %s\n", s); + nerror++; + return ; + } + + if (r == ONIG_MISMATCH) { + if (not) { + fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str); + nfail++; + } + } + else { + if (not) { + fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str); + nfail++; + } + else { + if (region->beg[mem] == from && region->end[mem] == to) { + fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str); + nsucc++; + } + else { + fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, + from, to, region->beg[mem], region->end[mem]); + nfail++; + } + } + } + onig_free(reg); +#endif +} + +static void x2(char* pattern, char* str, int from, int to) +{ + xx(pattern, str, from, to, 0, 0); +} + +static void x3(char* pattern, char* str, int from, int to, int mem) +{ + xx(pattern, str, from, to, mem, 0); +} + +static void n(char* pattern, char* str) +{ + xx(pattern, str, 0, 0, 0, 1); +} + +extern int main(int argc, char* argv[]) +{ + err_file = stdout; + +#ifdef POSIX_TEST + reg_set_encoding(REG_POSIX_ENCODING_SJIS); +#else + region = onig_region_new(); +#endif + + x2("", "", 0, 0); + x2("^", "", 0, 0); + x2("$", "", 0, 0); + x2("\\G", "", 0, 0); + x2("\\A", "", 0, 0); + x2("\\Z", "", 0, 0); + x2("\\z", "", 0, 0); + x2("^$", "", 0, 0); + x2("\\ca", "\001", 0, 1); + x2("\\C-b", "\002", 0, 1); + x2("\\c\\\\", "\034", 0, 1); + x2("q[\\c\\\\]", "q\034", 0, 2); + x2("", "a", 0, 0); + x2("a", "a", 0, 1); + x2("\\x61", "a", 0, 1); + x2("aa", "aa", 0, 2); + x2("aaa", "aaa", 0, 3); + x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); + x2("ab", "ab", 0, 2); + x2("b", "ab", 1, 2); + x2("bc", "abc", 1, 3); + x2("(?i:#RET#)", "#INS##RET#", 5, 10); + x2("\\17", "\017", 0, 1); + x2("\\x1f", "\x1f", 0, 1); + x2("a(?#....\\\\JJJJ)b", "ab", 0, 2); + x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); + x2(".", "a", 0, 1); + n(".", ""); + x2("..", "ab", 0, 2); + x2("\\w", "e", 0, 1); + n("\\W", "e"); + x2("\\s", " ", 0, 1); + x2("\\S", "b", 0, 1); + x2("\\d", "4", 0, 1); + n("\\D", "4"); + x2("\\b", "z ", 0, 0); + x2("\\b", " z", 1, 1); + x2("\\B", "zz ", 1, 1); + x2("\\B", "z ", 2, 2); + x2("\\B", " z", 0, 0); + x2("[ab]", "b", 0, 1); + n("[ab]", "c"); + x2("[a-z]", "t", 0, 1); + n("[^a]", "a"); + x2("[^a]", "\n", 0, 1); + x2("[]]", "]", 0, 1); + n("[^]]", "]"); + x2("[\\^]+", "0^^1", 1, 3); + x2("[b-]", "b", 0, 1); + x2("[b-]", "-", 0, 1); + x2("[\\w]", "z", 0, 1); + n("[\\w]", " "); + x2("[\\W]", "b$", 1, 2); + x2("[\\d]", "5", 0, 1); + n("[\\d]", "e"); + x2("[\\D]", "t", 0, 1); + n("[\\D]", "3"); + x2("[\\s]", " ", 0, 1); + n("[\\s]", "a"); + x2("[\\S]", "b", 0, 1); + n("[\\S]", " "); + x2("[\\w\\d]", "2", 0, 1); + n("[\\w\\d]", " "); + x2("[[:upper:]]", "B", 0, 1); + x2("[*[:xdigit:]+]", "+", 0, 1); + x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); + x2("[*[:xdigit:]+]", "-@^+", 3, 4); + n("[[:upper]]", "A"); + x2("[[:upper]]", ":", 0, 1); + x2("[\\044-\\047]", "\046", 0, 1); + x2("[\\x5a-\\x5c]", "\x5b", 0, 1); + x2("[\\x6A-\\x6D]", "\x6c", 0, 1); + n("[\\x6A-\\x6D]", "\x6E"); + n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); + x2("[\\[]", "[", 0, 1); + x2("[\\]]", "]", 0, 1); + x2("[&]", "&", 0, 1); + x2("[[ab]]", "b", 0, 1); + x2("[[ab]c]", "c", 0, 1); + n("[[^a]]", "a"); + n("[^[a]]", "a"); + x2("[[ab]&&bc]", "b", 0, 1); + n("[[ab]&&bc]", "a"); + n("[[ab]&&bc]", "c"); + x2("[a-z&&b-y&&c-x]", "w", 0, 1); + n("[^a-z&&b-y&&c-x]", "w"); + x2("[[^a&&a]&&a-z]", "b", 0, 1); + n("[[^a&&a]&&a-z]", "a"); + x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); + n("[[^a-z&&bcdef]&&[^c-g]]", "c"); + x2("[^[^abc]&&[^cde]]", "c", 0, 1); + x2("[^[^abc]&&[^cde]]", "e", 0, 1); + n("[^[^abc]&&[^cde]]", "f"); + x2("[a-&&-a]", "-", 0, 1); + n("[a\\-&&\\-a]", "&"); + n("\\wabc", " abc"); + x2("a\\Wbc", "a bc", 0, 4); + x2("a.b.c", "aabbc", 0, 5); + x2(".\\wb\\W..c", "abb bcc", 0, 7); + x2("\\s\\wzzz", " zzzz", 0, 5); + x2("aa.b", "aabb", 0, 4); + n(".a", "ab"); + x2(".a", "aa", 0, 2); + x2("^a", "a", 0, 1); + x2("^a$", "a", 0, 1); + x2("^\\w$", "a", 0, 1); + n("^\\w$", " "); + x2("^\\wab$", "zab", 0, 3); + x2("^\\wabcdef$", "zabcdef", 0, 7); + x2("^\\w...def$", "zabcdef", 0, 7); + x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); + x2("\\A\\Z", "", 0, 0); + x2("\\Axyz", "xyz", 0, 3); + x2("xyz\\Z", "xyz", 0, 3); + x2("xyz\\z", "xyz", 0, 3); + x2("a\\Z", "a", 0, 1); + x2("\\Gaz", "az", 0, 2); + n("\\Gz", "bza"); + n("az\\G", "az"); + n("az\\A", "az"); + n("a\\Az", "az"); + x2("\\^\\$", "^$", 0, 2); + x2("^x?y", "xy", 0, 2); + x2("^(x?y)", "xy", 0, 2); + x2("\\w", "_", 0, 1); + n("\\W", "_"); + x2("(?=z)z", "z", 0, 1); + n("(?=z).", "a"); + x2("(?!z)a", "a", 0, 1); + n("(?!z)a", "z"); + x2("(?i:a)", "a", 0, 1); + x2("(?i:a)", "A", 0, 1); + x2("(?i:A)", "a", 0, 1); + n("(?i:A)", "b"); + x2("(?i:[A-Z])", "a", 0, 1); + x2("(?i:[f-m])", "H", 0, 1); + x2("(?i:[f-m])", "h", 0, 1); + n("(?i:[f-m])", "e"); + x2("(?i:[A-c])", "D", 0, 1); + n("(?i:[^a-z])", "A"); + n("(?i:[^a-z])", "a"); + x2("(?i:[!-k])", "Z", 0, 1); + x2("(?i:[!-k])", "7", 0, 1); + x2("(?i:[T-}])", "b", 0, 1); + x2("(?i:[T-}])", "{", 0, 1); + x2("(?i:\\?a)", "?A", 0, 2); + x2("(?i:\\*A)", "*a", 0, 2); + n(".", "\n"); + x2("(?m:.)", "\n", 0, 1); + x2("(?m:a.)", "a\n", 0, 2); + x2("(?m:.b)", "a\nb", 1, 3); + x2(".*abc", "dddabdd\nddabc", 8, 13); + x2("(?m:.*abc)", "dddabddabc", 0, 10); + n("(?i)(?-i)a", "A"); + n("(?i)(?-i:a)", "A"); + x2("a?", "", 0, 0); + x2("a?", "b", 0, 0); + x2("a?", "a", 0, 1); + x2("a*", "", 0, 0); + x2("a*", "a", 0, 1); + x2("a*", "aaa", 0, 3); + x2("a*", "baaaa", 0, 0); + n("a+", ""); + x2("a+", "a", 0, 1); + x2("a+", "aaaa", 0, 4); + x2("a+", "aabbb", 0, 2); + x2("a+", "baaaa", 1, 5); + x2(".?", "", 0, 0); + x2(".?", "f", 0, 1); + x2(".?", "\n", 0, 0); + x2(".*", "", 0, 0); + x2(".*", "abcde", 0, 5); + x2(".+", "z", 0, 1); + x2(".+", "zdswer\n", 0, 6); + x2("(.*)a\\1f", "babfbac", 0, 4); + x2("(.*)a\\1f", "bacbabf", 3, 7); + x2("((.*)a\\2f)", "bacbabf", 3, 7); + x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); + x2("a|b", "a", 0, 1); + x2("a|b", "b", 0, 1); + x2("|a", "a", 0, 0); + x2("(|a)", "a", 0, 0); + x2("ab|bc", "ab", 0, 2); + x2("ab|bc", "bc", 0, 2); + x2("z(?:ab|bc)", "zbc", 0, 3); + x2("a(?:ab|bc)c", "aabc", 0, 4); + x2("ab|(?:ac|az)", "az", 0, 2); + x2("a|b|c", "dc", 1, 2); + x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); + n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); + x2("a|^z", "ba", 1, 2); + x2("a|^z", "za", 0, 1); + x2("a|\\Gz", "bza", 2, 3); + x2("a|\\Gz", "za", 0, 1); + x2("a|\\Az", "bza", 2, 3); + x2("a|\\Az", "za", 0, 1); + x2("a|b\\Z", "ba", 1, 2); + x2("a|b\\Z", "b", 0, 1); + x2("a|b\\z", "ba", 1, 2); + x2("a|b\\z", "b", 0, 1); + x2("\\w|\\s", " ", 0, 1); + n("\\w|\\w", " "); + x2("\\w|%", "%", 0, 1); + x2("\\w|[&$]", "&", 0, 1); + x2("[b-d]|[^e-z]", "a", 0, 1); + x2("(?:a|[c-f])|bz", "dz", 0, 1); + x2("(?:a|[c-f])|bz", "bz", 0, 2); + x2("abc|(?=zz)..f", "zzf", 0, 3); + x2("abc|(?!zz)..f", "abf", 0, 3); + x2("(?=za)..a|(?=zz)..a", "zza", 0, 3); + n("(?>a|abd)c", "abdc"); + x2("(?>abd|a)c", "abdc", 0, 4); + x2("a?|b", "a", 0, 1); + x2("a?|b", "b", 0, 0); + x2("a?|b", "", 0, 0); + x2("a*|b", "aa", 0, 2); + x2("a*|b*", "ba", 0, 0); + x2("a*|b*", "ab", 0, 1); + x2("a+|b*", "", 0, 0); + x2("a+|b*", "bbb", 0, 3); + x2("a+|b*", "abbb", 0, 1); + n("a+|b+", ""); + x2("(a|b)?", "b", 0, 1); + x2("(a|b)*", "ba", 0, 2); + x2("(a|b)+", "bab", 0, 3); + x2("(ab|ca)+", "caabbc", 0, 4); + x2("(ab|ca)+", "aabca", 1, 5); + x2("(ab|ca)+", "abzca", 0, 2); + x2("(a|bab)+", "ababa", 0, 5); + x2("(a|bab)+", "ba", 1, 2); + x2("(a|bab)+", "baaaba", 1, 4); + x2("(?:a|b)(?:a|b)", "ab", 0, 2); + x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); + x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); + x2("(?:a+|b+){2}", "aaabbb", 0, 6); + x2("h{0,}", "hhhh", 0, 4); + x2("(?:a+|b+){1,2}", "aaabbb", 0, 6); + n("ax{2}*a", "0axxxa1"); + n("a.{0,2}a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXa0"); + n("a.{0,2}?a", "0aXXXXa0"); + x2("^a{2,}?a$", "aaa", 0, 3); + x2("^[a-z]{2,}?$", "aaa", 0, 3); + x2("(?:a+|\\Ab*)cc", "cc", 0, 2); + n("(?:a+|\\Ab*)cc", "abcc"); + x2("(?:^a+|b+)*c", "aabbbabc", 6, 8); + x2("(?:^a+|b+)*c", "aabbbbc", 0, 7); + x2("a|(?i)c", "C", 0, 1); + x2("(?i)c|a", "C", 0, 1); + x2("(?i)c|a", "A", 0, 1); + x2("(?i:c)|a", "C", 0, 1); + n("(?i:c)|a", "A"); + x2("[abc]?", "abc", 0, 1); + x2("[abc]*", "abc", 0, 3); + x2("[^abc]*", "abc", 0, 0); + n("[^abc]+", "abc"); + x2("a?\?", "aaa", 0, 0); + x2("ba?\?b", "bab", 0, 3); + x2("a*?", "aaa", 0, 0); + x2("ba*?", "baa", 0, 1); + x2("ba*?b", "baab", 0, 4); + x2("a+?", "aaa", 0, 1); + x2("ba+?", "baa", 0, 2); + x2("ba+?b", "baab", 0, 4); + x2("(?:a?)?\?", "a", 0, 0); + x2("(?:a?\?)?", "a", 0, 0); + x2("(?:a?)+?", "aaa", 0, 1); + x2("(?:a+)?\?", "aaa", 0, 0); + x2("(?:a+)?\?b", "aaab", 0, 4); + x2("(?:ab)?{2}", "", 0, 0); + x2("(?:ab)?{2}", "ababa", 0, 4); + x2("(?:ab)*{0}", "ababa", 0, 0); + x2("(?:ab){3,}", "abababab", 0, 8); + n("(?:ab){3,}", "abab"); + x2("(?:ab){2,4}", "ababab", 0, 6); + x2("(?:ab){2,4}", "ababababab", 0, 8); + x2("(?:ab){2,4}?", "ababababab", 0, 4); + x2("(?:ab){,}", "ab{,}", 0, 5); + x2("(?:abc)+?{2}", "abcabcabc", 0, 6); + x2("(?:X*)(?i:xa)", "XXXa", 0, 4); + x2("(d+)([^abc]z)", "dddz", 0, 4); + x2("([^abc]*)([^abc]z)", "dddz", 0, 4); + x2("(\\w+)(\\wz)", "dddz", 0, 4); + x3("(a)", "a", 0, 1, 1); + x3("(ab)", "ab", 0, 2, 1); + x2("((ab))", "ab", 0, 2); + x3("((ab))", "ab", 0, 2, 1); + x3("((ab))", "ab", 0, 2, 2); + x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); + x3("(ab)(cd)", "abcd", 0, 2, 1); + x3("(ab)(cd)", "abcd", 2, 4, 2); + x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); + x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); + x2("(^a)", "a", 0, 1); + x3("(a)|(a)", "ba", 1, 2, 1); + x3("(^a)|(a)", "ba", 1, 2, 2); + x3("(a?)", "aaa", 0, 1, 1); + x3("(a*)", "aaa", 0, 3, 1); + x3("(a*)", "", 0, 0, 1); + x3("(a+)", "aaaaaaa", 0, 7, 1); + x3("(a+|b*)", "bbbaa", 0, 3, 1); + x3("(a+|b?)", "bbbaa", 0, 1, 1); + x3("(abc)?", "abc", 0, 3, 1); + x3("(abc)*", "abc", 0, 3, 1); + x3("(abc)+", "abc", 0, 3, 1); + x3("(xyz|abc)+", "abc", 0, 3, 1); + x3("([xyz][abc]|abc)+", "abc", 0, 3, 1); + x3("((?i:abc))", "AbC", 0, 3, 1); + x2("(abc)(?i:\\1)", "abcABC", 0, 6); + x3("((?m:a.c))", "a\nc", 0, 3, 1); + x3("((?=az)a)", "azb", 0, 1, 1); + x3("abc|(.abd)", "zabd", 0, 4, 1); + x2("(?:abc)|(ABC)", "abc", 0, 3); + x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); + x3("a*(.)", "aaaaz", 4, 5, 1); + x3("a*?(.)", "aaaaz", 0, 1, 1); + x3("a*?(c)", "aaaac", 4, 5, 1); + x3("[bcd]a*(.)", "caaaaz", 5, 6, 1); + x3("(\\Abb)cc", "bbcc", 0, 2, 1); + n("(\\Abb)cc", "zbbcc"); + x3("(^bb)cc", "bbcc", 0, 2, 1); + n("(^bb)cc", "zbbcc"); + x3("cc(bb$)", "ccbb", 2, 4, 1); + n("cc(bb$)", "ccbbb"); + n("(\\1)", ""); + n("\\1(a)", "aa"); + n("(a(b)\\1)\\2+", "ababb"); + n("(?:(?:\\1|z)(a))+$", "zaa"); + x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); + x2("(a)(?=\\1)", "aa", 0, 1); + n("(a)$|\\1", "az"); + x2("(a)\\1", "aa", 0, 2); + n("(a)\\1", "ab"); + x2("(a?)\\1", "aa", 0, 2); + x2("(a?\?)\\1", "aa", 0, 0); + x2("(a*)\\1", "aaaaa", 0, 4); + x3("(a*)\\1", "aaaaa", 0, 2, 1); + x2("a(b*)\\1", "abbbb", 0, 5); + x2("a(b*)\\1", "ab", 0, 1); + x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); + x2("(a*)(b*)\\2", "aaabbbb", 0, 7); + x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); + x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); + x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); + x2("([a-d])\\1", "cc", 0, 2); + x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); + n("(\\w\\d\\s)\\1", "f5 f5"); + x2("(who|[a-c]{3})\\1", "whowho", 0, 6); + x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); + x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6); + x2("(^a)\\1", "aa", 0, 2); + n("(^a)\\1", "baa"); + n("(a$)\\1", "aa"); + n("(ab\\Z)\\1", "ab"); + x2("(a*\\Z)\\1", "a", 1, 1); + x2(".(a*\\Z)\\1", "ba", 1, 2); + x3("(.(abc)\\2)", "zabcabc", 0, 7, 1); + x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); + x2("((?i:az))\\1", "AzAz", 0, 4); + n("((?i:az))\\1", "Azaz"); + x2("(?<=a)b", "ab", 1, 2); + n("(?<=a)b", "bb"); + x2("(?<=a|b)b", "bb", 1, 2); + x2("(?<=a|bc)b", "bcb", 2, 3); + x2("(?<=a|bc)b", "ab", 1, 2); + x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); + x2("(a)\\g<1>", "aa", 0, 2); + x2("(?a)", "a", 0, 1); + x2("(?ab)\\g", "abab", 0, 4); + x2("(?.zv.)\\k", "azvbazvb", 0, 8); + x2("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); + x2("(?|a\\g)+", "", 0, 0); + x2("(?|\\(\\g\\))+$", "()(())", 0, 6); + x3("\\g(?.){0}", "X", 0, 1, 1); + x2("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); + x2("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); + x2("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); + x2("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); + x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); + x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); + x2("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); + x2("(?:(?)|(?efg))\\k", "", 0, 0); + x2("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); + n("(?:(?abc)|(?efg))\\k", "abcefg"); + x2("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); + x3("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); + x3("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); + x2("(?a|\\(\\g\\))", "a", 0, 1); + x2("(?a|\\(\\g\\))", "((((((a))))))", 0, 13); + x3("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); + x2("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); + x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3); + x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); + x2("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); + x2("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); + x2("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); + x2("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); + x2("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); + x2("()*\\1", "", 0, 0); + x2("(?:()|())*\\1\\2", "", 0, 0); + x3("(?:\\1a|())*", "a", 0, 0, 1); + x2("x((.)*)*x", "0x1x2x3", 1, 6); + x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); + x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); + x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); + x2("\\xED\\xF2", "\xed\xf2", 0, 2); + x2("", "あ", 0, 0); + x2("あ", "あ", 0, 2); + n("い", "あ"); + x2("うう", "うう", 0, 4); + x2("あいう", "あいう", 0, 6); + x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70); + x2("あ", "いあ", 2, 4); + x2("いう", "あいう", 2, 6); + x2("\\xca\\xb8", "\xca\xb8", 0, 2); + x2(".", "あ", 0, 2); + x2("..", "かき", 0, 4); + x2("\\w", "お", 0, 2); + n("\\W", "あ"); + x2("[\\W]", "う$", 2, 3); + x2("\\S", "そ", 0, 2); + x2("\\S", "漢", 0, 2); + x2("\\b", "気 ", 0, 0); + x2("\\b", " ほ", 1, 1); + x2("\\B", "せそ ", 2, 2); + x2("\\B", "う ", 3, 3); + x2("\\B", " い", 0, 0); + x2("[たち]", "ち", 0, 2); + n("[なに]", "ぬ"); + x2("[う-お]", "え", 0, 2); + n("[^け]", "け"); + x2("[\\w]", "ね", 0, 2); + n("[\\d]", "ふ"); + x2("[\\D]", "は", 0, 2); + n("[\\s]", "く"); + x2("[\\S]", "へ", 0, 2); + x2("[\\w\\d]", "よ", 0, 2); + x2("[\\w\\d]", " よ", 3, 5); + n("\\w鬼車", " 鬼車"); + x2("鬼\\W車", "鬼 車", 0, 5); + x2("あ.い.う", "ああいいう", 0, 10); + x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13); + x2("\\s\\wこここ", " ここここ", 0, 9); + x2("ああ.け", "ああけけ", 0, 8); + n(".い", "いえ"); + x2(".お", "おお", 0, 4); + x2("^あ", "あ", 0, 2); + x2("^む$", "む", 0, 2); + x2("^\\w$", "に", 0, 2); + x2("^\\wかきくけこ$", "zかきくけこ", 0, 11); + x2("^\\w...うえお$", "zあいううえお", 0, 13); + x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12); + x2("\\Aたちつ", "たちつ", 0, 6); + x2("むめも\\Z", "むめも", 0, 6); + x2("かきく\\z", "かきく", 0, 6); + x2("かきく\\Z", "かきく\n", 0, 6); + x2("\\Gぽぴ", "ぽぴ", 0, 4); + n("\\Gえ", "うえお"); + n("とて\\G", "とて"); + n("まみ\\A", "まみ"); + n("ま\\Aみ", "まみ"); + x2("(?=せ)せ", "せ", 0, 2); + n("(?=う).", "い"); + x2("(?!う)か", "か", 0, 2); + n("(?!と)あ", "と"); + x2("(?i:あ)", "あ", 0, 2); + x2("(?i:ぶべ)", "ぶべ", 0, 4); + n("(?i:い)", "う"); + x2("(?m:よ.)", "よ\n", 0, 3); + x2("(?m:.め)", "ま\nめ", 2, 5); + x2("あ?", "", 0, 0); + x2("変?", "化", 0, 0); + x2("変?", "変", 0, 2); + x2("量*", "", 0, 0); + x2("量*", "量", 0, 2); + x2("子*", "子子子", 0, 6); + x2("馬*", "鹿馬馬馬馬", 0, 0); + n("山+", ""); + x2("河+", "河", 0, 2); + x2("時+", "時時時時", 0, 8); + x2("え+", "ええううう", 0, 4); + x2("う+", "おうううう", 2, 10); + x2(".?", "た", 0, 2); + x2(".*", "ぱぴぷぺ", 0, 8); + x2(".+", "ろ", 0, 2); + x2(".+", "いうえか\n", 0, 8); + x2("あ|い", "あ", 0, 2); + x2("あ|い", "い", 0, 2); + x2("あい|いう", "あい", 0, 4); + x2("あい|いう", "いう", 0, 4); + x2("を(?:かき|きく)", "をかき", 0, 6); + x2("を(?:かき|きく)け", "をきくけ", 0, 8); + x2("あい|(?:あう|あを)", "あを", 0, 4); + x2("あ|い|う", "えう", 2, 4); + x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6); + n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ"); + x2("あ|^わ", "ぶあ", 2, 4); + x2("あ|^を", "をあ", 0, 2); + x2("鬼|\\G車", "け車鬼", 4, 6); + x2("鬼|\\G車", "車鬼", 0, 2); + x2("鬼|\\A車", "b車鬼", 3, 5); + x2("鬼|\\A車", "車", 0, 2); + x2("鬼|車\\Z", "車鬼", 2, 4); + x2("鬼|車\\Z", "車", 0, 2); + x2("鬼|車\\Z", "車\n", 0, 2); + x2("鬼|車\\z", "車鬼", 2, 4); + x2("鬼|車\\z", "車", 0, 2); + x2("\\w|\\s", "お", 0, 2); + x2("\\w|%", "%お", 0, 1); + x2("\\w|[&$]", "う&", 0, 2); + x2("[い-け]", "う", 0, 2); + x2("[い-け]|[^か-こ]", "あ", 0, 2); + x2("[い-け]|[^か-こ]", "か", 0, 2); + x2("[^あ]", "\n", 0, 1); + x2("(?:あ|[う-き])|いを", "うを", 0, 2); + x2("(?:あ|[う-き])|いを", "いを", 0, 4); + x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6); + x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6); + x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6); + x2("(?<=あ|いう)い", "いうい", 4, 6); + n("(?>あ|あいえ)う", "あいえう"); + x2("(?>あいえ|あ)う", "あいえう", 0, 8); + x2("あ?|い", "あ", 0, 2); + x2("あ?|い", "い", 0, 0); + x2("あ?|い", "", 0, 0); + x2("あ*|い", "ああ", 0, 4); + x2("あ*|い*", "いあ", 0, 0); + x2("あ*|い*", "あい", 0, 2); + x2("[aあ]*|い*", "aあいいい", 0, 3); + x2("あ+|い*", "", 0, 0); + x2("あ+|い*", "いいい", 0, 6); + x2("あ+|い*", "あいいい", 0, 2); + x2("あ+|い*", "aあいいい", 0, 0); + n("あ+|い+", ""); + x2("(あ|い)?", "い", 0, 2); + x2("(あ|い)*", "いあ", 0, 4); + x2("(あ|い)+", "いあい", 0, 6); + x2("(あい|うあ)+", "うああいうえ", 0, 8); + x2("(あい|うえ)+", "うああいうえ", 4, 12); + x2("(あい|うあ)+", "ああいうあ", 2, 10); + x2("(あい|うあ)+", "あいをうあ", 0, 4); + x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10); + x2("(あ|いあい)+", "あいあいあ", 0, 10); + x2("(あ|いあい)+", "いあ", 2, 4); + x2("(あ|いあい)+", "いあああいあ", 2, 8); + x2("(?:あ|い)(?:あ|い)", "あい", 0, 4); + x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6); + x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12); + x2("(?:あ+|い+){2}", "あああいいい", 0, 12); + x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12); + x2("(?:あ+|\\Aい*)うう", "うう", 0, 4); + n("(?:あ+|\\Aい*)うう", "あいうう"); + x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16); + x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14); + x2("う{0,}", "うううう", 0, 8); + x2("あ|(?i)c", "C", 0, 1); + x2("(?i)c|あ", "C", 0, 1); + x2("(?i:あ)|a", "a", 0, 1); + n("(?i:あ)|a", "A"); + x2("[あいう]?", "あいう", 0, 2); + x2("[あいう]*", "あいう", 0, 6); + x2("[^あいう]*", "あいう", 0, 0); + n("[^あいう]+", "あいう"); + x2("あ?\?", "あああ", 0, 0); + x2("いあ?\?い", "いあい", 0, 6); + x2("あ*?", "あああ", 0, 0); + x2("いあ*?", "いああ", 0, 2); + x2("いあ*?い", "いああい", 0, 8); + x2("あ+?", "あああ", 0, 2); + x2("いあ+?", "いああ", 0, 4); + x2("いあ+?い", "いああい", 0, 8); + x2("(?:天?)?\?", "天", 0, 0); + x2("(?:天?\?)?", "天", 0, 0); + x2("(?:夢?)+?", "夢夢夢", 0, 2); + x2("(?:風+)?\?", "風風風", 0, 0); + x2("(?:雪+)?\?霜", "雪雪雪霜", 0, 8); + x2("(?:あい)?{2}", "", 0, 0); + x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8); + x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0); + x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16); + n("(?:鬼車){3,}", "鬼車鬼車"); + x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12); + x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16); + x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8); + x2("(?:鬼車){,}", "鬼車{,}", 0, 7); + x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12); + x3("(火)", "火", 0, 2, 1); + x3("(火水)", "火水", 0, 4, 1); + x2("((時間))", "時間", 0, 4); + x3("((風水))", "風水", 0, 4, 1); + x3("((昨日))", "昨日", 0, 4, 2); + x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20); + x3("(あい)(うえ)", "あいうえ", 0, 4, 1); + x3("(あい)(うえ)", "あいうえ", 4, 8, 2); + x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3); + x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4); + x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2); + x2("(^あ)", "あ", 0, 2); + x3("(あ)|(あ)", "いあ", 2, 4, 1); + x3("(^あ)|(あ)", "いあ", 2, 4, 2); + x3("(あ?)", "あああ", 0, 2, 1); + x3("(ま*)", "ままま", 0, 6, 1); + x3("(と*)", "", 0, 0, 1); + x3("(る+)", "るるるるるるる", 0, 14, 1); + x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1); + x3("(あ+|い?)", "いいいああ", 0, 2, 1); + x3("(あいう)?", "あいう", 0, 6, 1); + x3("(あいう)*", "あいう", 0, 6, 1); + x3("(あいう)+", "あいう", 0, 6, 1); + x3("(さしす|あいう)+", "あいう", 0, 6, 1); + x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1); + x3("((?i:あいう))", "あいう", 0, 6, 1); + x3("((?m:あ.う))", "あ\nう", 0, 5, 1); + x3("((?=あん)あ)", "あんい", 0, 2, 1); + x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1); + x3("あ*(.)", "ああああん", 8, 10, 1); + x3("あ*?(.)", "ああああん", 0, 2, 1); + x3("あ*?(ん)", "ああああん", 8, 10, 1); + x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1); + x3("(\\Aいい)うう", "いいうう", 0, 4, 1); + n("(\\Aいい)うう", "んいいうう"); + x3("(^いい)うう", "いいうう", 0, 4, 1); + n("(^いい)うう", "んいいうう"); + x3("ろろ(るる$)", "ろろるる", 4, 8, 1); + n("ろろ(るる$)", "ろろるるる"); + x2("(無)\\1", "無無", 0, 4); + n("(無)\\1", "無武"); + x2("(空?)\\1", "空空", 0, 4); + x2("(空?\?)\\1", "空空", 0, 0); + x2("(空*)\\1", "空空空空空", 0, 8); + x3("(空*)\\1", "空空空空空", 0, 4, 1); + x2("あ(い*)\\1", "あいいいい", 0, 10); + x2("あ(い*)\\1", "あい", 0, 2); + x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20); + x2("(あ*)(い*)\\2", "あああいいいい", 0, 14); + x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2); + x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16); + x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7); + x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12); + x2("([き-け])\\1", "くく", 0, 4); + x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8); + n("(\\w\\d\\s)\\1", "あ5 あ5"); + x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8); + x2("...(誰?|[あ-う]{3})\\1", "あaあ誰?誰?", 0, 13); + x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12); + x2("(^こ)\\1", "ここ", 0, 4); + n("(^む)\\1", "めむむ"); + n("(あ$)\\1", "ああ"); + n("(あい\\Z)\\1", "あい"); + x2("(あ*\\Z)\\1", "あ", 2, 2); + x2(".(あ*\\Z)\\1", "いあ", 2, 4); + x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1); + x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1); + x2("((?i:あvず))\\1", "あvずあvず", 0, 10); + x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14); + x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26); + x2("[[ひふ]]", "ふ", 0, 2); + x2("[[いおう]か]", "か", 0, 2); + n("[[^あ]]", "あ"); + n("[^[あ]]", "あ"); + x2("[^[^あ]]", "あ", 0, 2); + x2("[[かきく]&&きく]", "く", 0, 2); + n("[[かきく]&&きく]", "か"); + n("[[かきく]&&きく]", "け"); + x2("[あ-ん&&い-を&&う-ゑ]", "ゑ", 0, 2); + n("[^あ-ん&&い-を&&う-ゑ]", "ゑ"); + x2("[[^あ&&あ]&&あ-ん]", "い", 0, 2); + n("[[^あ&&あ]&&あ-ん]", "あ"); + x2("[[^あ-ん&&いうえお]&&[^う-か]]", "き", 0, 2); + n("[[^あ-ん&&いうえお]&&[^う-か]]", "い"); + x2("[^[^あいう]&&[^うえお]]", "う", 0, 2); + x2("[^[^あいう]&&[^うえお]]", "え", 0, 2); + n("[^[^あいう]&&[^うえお]]", "か"); + x2("[あ-&&-あ]", "-", 0, 1); + x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "え", 0, 2); + x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1); + x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1); + n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2"); + x2("aバージョンのダウンロード<\\/b>", "aバージョンのダウンロード", 0, 32); + x2(".バージョンのダウンロード<\\/b>", "aバージョンのダウンロード", 0, 32); + fprintf(stdout, + "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + nsucc, nfail, nerror, onig_version()); + +#ifndef POSIX_TEST + onig_region_free(region, 1); + onig_end(); +#endif + + return ((nfail == 0 && nerror == 0) ? 0 : -1); +}