No more submodules

This commit is contained in:
Mark Ellzey 2011-07-08 13:32:08 -04:00
parent 354e71d223
commit 1e0ec98c51
91 changed files with 53450 additions and 0 deletions

24
evthr/Makefile Normal file
View file

@ -0,0 +1,24 @@
SRC = evthr.c
OUT = libevthr.a
OBJ = $(SRC:.c=.o)
INCLUDES = -I.
CFLAGS += -Wall -ggdb
LDFLAGS += -ggdb
CC = gcc
.SUFFIXES: .c
default: $(OUT)
.c.o:
$(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@
$(OUT): $(OBJ)
ar rcs $(OUT) $(OBJ)
test: $(OUT) test.c
$(CC) $(INCLUDES) $(CFLAGS) test.c -o test $(OUT) -levent -levent_pthreads -lpthread
clean:
rm -f $(OBJ) $(OUT) test

53
evthr/README Normal file
View file

@ -0,0 +1,53 @@
Libevthr is an API which manages threads and thread-pools in an event based
manner. This API requires libevent with threading support.
Libevthr works a bit differently than most thread management systems. Instead of
conditional signalling and some type of pre-thread queue, Libevthr uses a
deferral type mechanism. That is, a thread is always running, abstracted to a
point where you "defer" your function *into* a thread.
For example you can start up a single thread with a backlog of 10 (a backlog
being the max number of outstanding callbacks to run within the thread), and
execute a function you would like to run inside the thread one or many times.
The act of deferrals is non-blocking.
Example Code for evthrs:
evthr_t * thr = evthr_new(10, NULL);
if (evthr_start(thr) < 0) {
exit(1);
}
evthr_defer(thr, my_cb_1, NULL);
evthr_defer(thr, my_cb_2, NULL);
evthr_defer(thr, my_cb_3, NULL);
sleep(n_seconds);
evthr_stop(thr);
Libevthr also has the ability to create pools using the same methods that a
single evthr has. For example, if you would like to create 10 threads, each
with a backlog of 5:
evthr_pool_t * thr_pool = evthr_pool_new(10, 5, NULL);
if (evthr_pool_start(thr_pool) < 0) {
exit(1);
}
evthr_pool_defer(thr_pool, my_cb_1, NULL);
evthr_pool_defer(thr_pool, my_cb_2, NULL);
evthr_pool_defer(thr_pool, my_cb_3, NULL);
Your callback functions which you defer must be of type "evthr_cb", or
"void cb_name(void * arg, void * shared)". In this case, the "arg" variable is
the data you passed as the third argument to either evthr_pool_defer, or
evthr_defer. The "shared" variable is the data that was either the second
variable in evthr_new(), or the third variable in evthr_pool_new().
The gist of this is to allow a global dataset, along with deferred specific
data.
See test.c for a quick example.

468
evthr/evthr.c Normal file
View file

@ -0,0 +1,468 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <limits.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <sched.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/queue.h>
#include <pthread.h>
#include <event.h>
#include <event2/thread.h>
#include "evthr.h"
#define _EVTHR_MAGIC 0x4d52
typedef struct evthr_cmd evthr_cmd_t;
typedef struct evthr_pool_slist evthr_pool_slist_t;
struct evthr_cmd {
uint16_t magic;
uint8_t stop;
void * args;
evthr_cb cb;
};
TAILQ_HEAD(evthr_pool_slist, evthr);
struct evthr_pool {
int nthreads;
int nprocs;
evthr_pool_slist_t threads;
};
struct evthr {
int cur_backlog;
int proc_to_use;
int rdr;
int wdr;
char err;
ev_t * event;
evbase_t * evbase;
pthread_mutex_t * lock;
pthread_mutex_t * stat_lock;
pthread_mutex_t * rlock;
pthread_t * thr;
void * args;
TAILQ_ENTRY(evthr) next;
};
void
evthr_inc_backlog(evthr_t * evthr) {
__sync_fetch_and_add(&evthr->cur_backlog, 1);
}
void
evthr_dec_backlog(evthr_t * evthr) {
__sync_fetch_and_sub(&evthr->cur_backlog, 1);
}
int
evthr_get_backlog(evthr_t * evthr) {
return __sync_add_and_fetch(&evthr->cur_backlog, 0);
}
static void
_evthr_read_cmd(int sock, short which, void * args) {
evthr_t * thread;
evthr_cmd_t cmd;
int avail = 0;
ssize_t recvd;
if (!(thread = (evthr_t *)args)) {
return;
}
if (pthread_mutex_trylock(thread->lock) != 0) {
return;
}
if (ioctl(sock, FIONREAD, &avail) < 0) {
goto error;
}
if (avail <= 0) {
goto end;
}
if (avail < (int)sizeof(evthr_cmd_t)) {
goto end;
}
pthread_mutex_lock(thread->rlock);
if ((recvd = recv(sock, &cmd, sizeof(evthr_cmd_t), 0)) <= 0) {
pthread_mutex_unlock(thread->rlock);
if (errno == EAGAIN) {
goto end;
} else {
goto error;
}
}
pthread_mutex_unlock(thread->rlock);
if (recvd != sizeof(evthr_cmd_t)) {
goto error;
}
if (cmd.magic != _EVTHR_MAGIC) {
goto error;
}
if (cmd.stop == 1) {
goto stop;
}
if (cmd.cb != NULL) {
cmd.cb(thread, cmd.args, thread->args);
goto done;
} else {
goto done;
}
stop:
event_base_loopbreak(thread->evbase);
done:
evthr_dec_backlog(thread);
end:
pthread_mutex_unlock(thread->lock);
return;
error:
pthread_mutex_lock(thread->stat_lock);
thread->cur_backlog = -1;
thread->err = 1;
pthread_mutex_unlock(thread->stat_lock);
pthread_mutex_unlock(thread->lock);
event_base_loopbreak(thread->evbase);
return;
} /* _evthr_read_cmd */
static int
_evthr_get_num_procs(void) {
return sysconf(_SC_NPROCESSORS_ONLN);
}
static void *
_evthr_loop(void * args) {
evthr_t * thread;
if (!(thread = (evthr_t *)args)) {
return NULL;
}
if (thread == NULL || thread->thr == NULL) {
pthread_exit(NULL);
}
thread->evbase = event_base_new();
thread->event = event_new(thread->evbase, thread->rdr,
EV_READ | EV_PERSIST, _evthr_read_cmd, args);
event_add(thread->event, NULL);
event_base_loop(thread->evbase, 0);
if (thread->err == 1) {
fprintf(stderr, "FATAL ERROR!\n");
}
evthr_free(thread);
pthread_exit(NULL);
}
evthr_res
evthr_defer(evthr_t * thread, evthr_cb cb, void * arg) {
int cur_backlog;
evthr_cmd_t cmd = { 0 };
cur_backlog = evthr_get_backlog(thread);
if (cur_backlog == -1) {
return EVTHR_RES_FATAL;
}
evthr_inc_backlog(thread);
cmd.magic = _EVTHR_MAGIC;
cmd.cb = cb;
cmd.args = arg;
cmd.stop = 0;
pthread_mutex_lock(thread->rlock);
if (send(thread->wdr, &cmd, sizeof(evthr_cmd_t), 0) <= 0) {
pthread_mutex_unlock(thread->rlock);
return EVTHR_RES_RETRY;
}
pthread_mutex_unlock(thread->rlock);
return EVTHR_RES_OK;
}
evthr_res
evthr_stop(evthr_t * thread) {
evthr_cmd_t cmd = { 0 };
cmd.magic = _EVTHR_MAGIC;
cmd.cb = NULL;
cmd.args = NULL;
cmd.stop = 1;
pthread_mutex_lock(thread->rlock);
if (write(thread->wdr, &cmd, sizeof(evthr_cmd_t)) < 0) {
pthread_mutex_unlock(thread->rlock);
return EVTHR_RES_RETRY;
}
pthread_mutex_unlock(thread->rlock);
return EVTHR_RES_OK;
}
evbase_t *
evthr_get_base(evthr_t * thr) {
return thr->evbase;
}
evthr_t *
evthr_new(void * args, int proc_to_use) {
evthr_t * thread;
int fds[2];
if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == -1) {
return NULL;
}
if (!(thread = calloc(sizeof(evthr_t), sizeof(char)))) {
return NULL;
}
thread->stat_lock = malloc(sizeof(pthread_mutex_t));
thread->rlock = malloc(sizeof(pthread_mutex_t));
thread->lock = malloc(sizeof(pthread_mutex_t));
thread->thr = malloc(sizeof(pthread_t));
thread->args = args;
thread->rdr = fds[0];
thread->wdr = fds[1];
thread->proc_to_use = proc_to_use;
if (pthread_mutex_init(thread->lock, NULL)) {
evthr_free(thread);
return NULL;
}
if (pthread_mutex_init(thread->stat_lock, NULL)) {
evthr_free(thread);
return NULL;
}
if (pthread_mutex_init(thread->rlock, NULL)) {
evthr_free(thread);
return NULL;
}
fcntl(thread->rdr, F_SETFL, O_NONBLOCK);
fcntl(thread->wdr, F_SETFL, O_NONBLOCK);
return thread;
} /* evthr_new */
int
evthr_start(evthr_t * thread) {
if (thread == NULL || thread->thr == NULL) {
return -1;
}
if (pthread_create(thread->thr, NULL, _evthr_loop, (void *)thread)) {
return -1;
}
return pthread_detach(*thread->thr);
}
void
evthr_free(evthr_t * thread) {
if (thread == NULL) {
return;
}
if (thread->rdr > 0) {
close(thread->rdr);
}
if (thread->wdr > 0) {
close(thread->wdr);
}
if (thread->lock) {
pthread_mutex_destroy(thread->lock);
free(thread->lock);
}
if (thread->stat_lock) {
pthread_mutex_destroy(thread->stat_lock);
}
if (thread->rlock) {
pthread_mutex_destroy(thread->rlock);
}
if (thread->thr) {
free(thread->thr);
}
if (thread->event) {
event_free(thread->event);
}
if (thread->evbase) {
event_base_free(thread->evbase);
}
free(thread);
}
void
evthr_pool_free(evthr_pool_t * pool) {
evthr_t * thread;
evthr_t * save;
if (pool == NULL) {
return;
}
for (thread = TAILQ_FIRST(&pool->threads); thread != NULL; thread = save) {
save = TAILQ_NEXT(thread, next);
TAILQ_REMOVE(&pool->threads, thread, next);
evthr_free(thread);
}
free(pool);
}
evthr_res
evthr_pool_stop(evthr_pool_t * pool) {
evthr_t * thr;
if (pool == NULL) {
return EVTHR_RES_FATAL;
}
TAILQ_FOREACH(thr, &pool->threads, next) {
evthr_stop(thr);
}
memset(&pool->threads, 0, sizeof(pool->threads));
return EVTHR_RES_OK;
}
evthr_res
evthr_pool_defer(evthr_pool_t * pool, evthr_cb cb, void * arg) {
evthr_t * min_thr = NULL;
evthr_t * thr = NULL;
if (pool == NULL) {
return EVTHR_RES_FATAL;
}
if (cb == NULL) {
return EVTHR_RES_NOCB;
}
/* find the thread with the smallest backlog */
TAILQ_FOREACH(thr, &pool->threads, next) {
evthr_t * m_save;
evthr_t * t_save;
int thr_backlog = 0;
int min_backlog = 0;
thr_backlog = evthr_get_backlog(thr);
if (min_thr) {
min_backlog = evthr_get_backlog(min_thr);
}
m_save = min_thr;
t_save = thr;
if (min_thr == NULL) {
min_thr = thr;
} else if (thr_backlog == 0) {
min_thr = thr;
} else if (thr_backlog < min_backlog) {
min_thr = thr;
}
if (evthr_get_backlog(min_thr) == 0) {
break;
}
}
return evthr_defer(min_thr, cb, arg);
} /* evthr_pool_defer */
evthr_pool_t *
evthr_pool_new(int nthreads, void * shared) {
evthr_pool_t * pool;
int i;
if (nthreads == 0) {
return NULL;
}
if (!(pool = calloc(sizeof(evthr_pool_t), sizeof(char)))) {
return NULL;
}
pool->nprocs = _evthr_get_num_procs();
pool->nthreads = nthreads;
TAILQ_INIT(&pool->threads);
for (i = 0; i < nthreads; i++) {
evthr_t * thread;
int proc = i % pool->nprocs;
if (!(thread = evthr_new(shared, proc))) {
evthr_pool_free(pool);
return NULL;
}
TAILQ_INSERT_TAIL(&pool->threads, thread, next);
}
return pool;
}
int
evthr_pool_start(evthr_pool_t * pool) {
evthr_t * evthr = NULL;
if (pool == NULL) {
return -1;
}
TAILQ_FOREACH(evthr, &pool->threads, next) {
if (evthr_start(evthr) < 0) {
return -1;
}
usleep(300);
}
return 0;
}

48
evthr/evthr.h Normal file
View file

@ -0,0 +1,48 @@
#define _GNU_SOURCE
#ifndef __EVTHR_H__
#define __EVTHR_H__
#include <sched.h>
#include <pthread.h>
#include <sys/queue.h>
#include <event.h>
#include <event2/thread.h>
struct evthr_pool;
struct evthr;
typedef struct event_base evbase_t;
typedef struct event ev_t;
typedef struct evthr_pool evthr_pool_t;
typedef struct evthr evthr_t;
typedef enum evthr_res evthr_res;
typedef void (*evthr_cb)(evthr_t * thr, void * cmd_arg, void * shared);
enum evthr_res {
EVTHR_RES_OK = 0,
EVTHR_RES_BACKLOG,
EVTHR_RES_RETRY,
EVTHR_RES_NOCB,
EVTHR_RES_FATAL
};
evthr_t * evthr_new(void * arg, int proc_to_use);
evbase_t * evthr_get_base(evthr_t * thr);
int evthr_start(evthr_t * evthr);
evthr_res evthr_stop(evthr_t * evthr);
evthr_res evthr_defer(evthr_t * evthr, evthr_cb cb, void * arg);
void evthr_free(evthr_t * evthr);
void evthr_inc_backlog(evthr_t * evthr);
void evthr_dec_backlog(evthr_t * evthr);
int evthr_get_backlog(evthr_t * evthr);
evthr_pool_t * evthr_pool_new(int nthreads, void * shared);
int evthr_pool_start(evthr_pool_t * pool);
evthr_res evthr_pool_stop(evthr_pool_t * pool);
evthr_res evthr_pool_defer(evthr_pool_t * pool, evthr_cb cb, void * arg);
void evthr_pool_free(evthr_pool_t * pool);
#endif /* __EVTHR_H__ */

49
evthr/test.c Normal file
View file

@ -0,0 +1,49 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <errno.h>
#include <unistd.h>
#include <evthr.h>
static void
_test_cb_1(evthr_t * thr, void * cmdarg, void * shared) {
printf("START _test_cb_1 (%u)\n", (unsigned int)pthread_self());
sleep(1);
printf("END _test_cb_1 (%u)\n", (unsigned int)pthread_self());
}
int
main(int argc, char ** argv) {
evthr_pool_t * pool = NULL;
int i = 0;
evthread_use_pthreads();
evthread_enable_lock_debuging();
pool = evthr_pool_new(8, NULL);
evthr_pool_start(pool);
while (1) {
if (i++ >= 5) {
break;
}
printf("Iter %d\n", i);
printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp"));
printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp"));
printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp"));
printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp"));
printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp"));
printf("%d\n", evthr_pool_defer(pool, _test_cb_1, "derp"));
sleep(2);
}
evthr_pool_stop(pool);
evthr_pool_free(pool);
return 0;
}

4
http_parser/.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
tags
*.o
test
test_g

View file

@ -0,0 +1,4 @@
Contributors must agree to the Contributor License Agreement before patches
can be accepted.
http://spreadsheets2.google.com/viewform?hl=en&formkey=dDJXOGUwbzlYaWM4cHN1MERwQS1CSnc6MQ

19
http_parser/LICENSE-MIT Normal file
View file

@ -0,0 +1,19 @@
Copyright Joyent, Inc. and other Node contributors. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

41
http_parser/Makefile Normal file
View file

@ -0,0 +1,41 @@
OPT_DEBUG=-O0 -g -Wall -Wextra -Werror -I.
OPT_FAST=-O3 -DHTTP_PARSER_STRICT=0 -I.
CC?=gcc
test: test_g
./test_g
test_g: http_parser_g.o test_g.o
$(CC) $(OPT_DEBUG) http_parser_g.o test_g.o -o $@
test_g.o: test.c http_parser.h Makefile
$(CC) $(OPT_DEBUG) -c test.c -o $@
test.o: test.c http_parser.h Makefile
$(CC) $(OPT_FAST) -c test.c -o $@
http_parser_g.o: http_parser.c http_parser.h Makefile
$(CC) $(OPT_DEBUG) -c http_parser.c -o $@
test-valgrind: test_g
valgrind ./test_g
http_parser.o: http_parser.c http_parser.h Makefile
$(CC) $(OPT_FAST) -c http_parser.c
test_fast: http_parser.o test.c http_parser.h
$(CC) $(OPT_FAST) http_parser.o test.c -o $@
test-run-timed: test_fast
while(true) do time ./test_fast > /dev/null; done
tags: http_parser.c http_parser.h test.c
ctags $^
clean:
rm -f *.o test test_fast test_g http_parser.tar tags
.PHONY: clean package test-run test-run-timed test-valgrind

171
http_parser/README.md Normal file
View file

@ -0,0 +1,171 @@
HTTP Parser
===========
This is a parser for HTTP messages written in C. It parses both requests and
responses. The parser is designed to be used in performance HTTP
applications. It does not make any syscalls nor allocations, it does not
buffer data, it can be interrupted at anytime. Depending on your
architecture, it only requires about 40 bytes of data per message
stream (in a web server that is per connection).
Features:
* No dependencies
* Handles persistent streams (keep-alive).
* Decodes chunked encoding.
* Upgrade support
* Defends against buffer overflow attacks.
The parser extracts the following information from HTTP messages:
* Header fields and values
* Content-Length
* Request method
* Response status code
* Transfer-Encoding
* HTTP version
* Request path, query string, fragment
* Message body
Usage
-----
One `http_parser` object is used per TCP connection. Initialize the struct
using `http_parser_init()` and set the callbacks. That might look something
like this for a request parser:
http_parser_settings settings;
settings.on_path = my_path_callback;
settings.on_header_field = my_header_field_callback;
/* ... */
http_parser *parser = malloc(sizeof(http_parser));
http_parser_init(parser, HTTP_REQUEST);
parser->data = my_socket;
When data is received on the socket execute the parser and check for errors.
size_t len = 80*1024, nparsed;
char buf[len];
ssize_t recved;
recved = recv(fd, buf, len, 0);
if (recved < 0) {
/* Handle error. */
}
/* Start up / continue the parser.
* Note we pass recved==0 to signal that EOF has been recieved.
*/
nparsed = http_parser_execute(parser, &settings, buf, recved);
if (parser->upgrade) {
/* handle new protocol */
} else if (nparsed != recved) {
/* Handle error. Usually just close the connection. */
}
HTTP needs to know where the end of the stream is. For example, sometimes
servers send responses without Content-Length and expect the client to
consume input (for the body) until EOF. To tell http_parser about EOF, give
`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
can still be encountered during an EOF, so one must still be prepared
to receive them.
Scalar valued message information such as `status_code`, `method`, and the
HTTP version are stored in the parser structure. This data is only
temporally stored in `http_parser` and gets reset on each new message. If
this information is needed later, copy it out of the structure during the
`headers_complete` callback.
The parser decodes the transfer-encoding for both requests and responses
transparently. That is, a chunked encoding is decoded before being sent to
the on_body callback.
The Special Problem of Upgrade
------------------------------
HTTP supports upgrading the connection to a different protocol. An
increasingly common example of this is the Web Socket protocol which sends
a request like
GET /demo HTTP/1.1
Upgrade: WebSocket
Connection: Upgrade
Host: example.com
Origin: http://example.com
WebSocket-Protocol: sample
followed by non-HTTP data.
(See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
information the Web Socket protocol.)
To support this, the parser will treat this as a normal HTTP message without a
body. Issuing both on_headers_complete and on_message_complete callbacks. However
http_parser_execute() will stop parsing at the end of the headers and return.
The user is expected to check if `parser->upgrade` has been set to 1 after
`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
offset by the return value of `http_parser_execute()`.
Callbacks
---------
During the `http_parser_execute()` call, the callbacks set in
`http_parser_settings` will be executed. The parser maintains state and
never looks behind, so buffering the data is not necessary. If you need to
save certain data for later usage, you can do that from the callbacks.
There are two types of callbacks:
* notification `typedef int (*http_cb) (http_parser*);`
Callbacks: on_message_begin, on_headers_complete, on_message_complete.
* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
Callbacks: (requests only) on_path, on_query_string, on_uri, on_fragment,
(common) on_header_field, on_header_value, on_body;
Callbacks must return 0 on success. Returning a non-zero value indicates
error to the parser, making it exit immediately.
In case you parse HTTP message in chunks (i.e. `read()` request line
from socket, parse, read half headers, parse, etc) your data callbacks
may be called more than once. Http-parser guarantees that data pointer is only
valid for the lifetime of callback. You can also `read()` into a heap allocated
buffer to avoid copying memory around if this fits your application.
Reading headers may be a tricky task if you read/parse headers partially.
Basically, you need to remember whether last header callback was field or value
and apply following logic:
(on_header_field and on_header_value shortened to on_h_*)
------------------------ ------------ --------------------------------------------
| State (prev. callback) | Callback | Description/action |
------------------------ ------------ --------------------------------------------
| nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
| | | into it |
------------------------ ------------ --------------------------------------------
| value | on_h_field | New header started. |
| | | Copy current name,value buffers to headers |
| | | list and allocate new buffer for new name |
------------------------ ------------ --------------------------------------------
| field | on_h_field | Previous name continues. Reallocate name |
| | | buffer and append callback data to it |
------------------------ ------------ --------------------------------------------
| field | on_h_value | Value for current header started. Allocate |
| | | new buffer and copy callback data to it |
------------------------ ------------ --------------------------------------------
| value | on_h_value | Value continues. Reallocate value buffer |
| | | and append callback data to it |
------------------------ ------------ --------------------------------------------
See examples of reading in headers:
* [partial example](http://gist.github.com/155877) in C
* [from http-parser tests](http://github.com/ry/http-parser/blob/37a0ff8928fb0d83cec0d0d8909c5a4abcd221af/test.c#L403) in C
* [from Node library](http://github.com/ry/node/blob/842eaf446d2fdcb33b296c67c911c32a0dabc747/src/http.js#L284) in Javascript

1644
http_parser/http_parser.c Normal file

File diff suppressed because it is too large Load diff

183
http_parser/http_parser.h Normal file
View file

@ -0,0 +1,183 @@
/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef http_parser_h
#define http_parser_h
#ifdef __cplusplus
extern "C" {
#endif
#define HTTP_PARSER_VERSION_MAJOR 1
#define HTTP_PARSER_VERSION_MINOR 0
#include <sys/types.h>
#if defined(_WIN32) && !defined(__MINGW32__)
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef unsigned int size_t;
typedef int ssize_t;
#else
#include <stdint.h>
#endif
/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
* faster
*/
#ifndef HTTP_PARSER_STRICT
# define HTTP_PARSER_STRICT 1
#else
# define HTTP_PARSER_STRICT 0
#endif
/* Maximium header size allowed */
#define HTTP_MAX_HEADER_SIZE (80*1024)
typedef struct http_parser http_parser;
typedef struct http_parser_settings http_parser_settings;
/* Callbacks should return non-zero to indicate an error. The parser will
* then halt execution.
*
* The one exception is on_headers_complete. In a HTTP_RESPONSE parser
* returning '1' from on_headers_complete will tell the parser that it
* should not expect a body. This is used when receiving a response to a
* HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
* chunked' headers that indicate the presence of a body.
*
* http_data_cb does not return data chunks. It will be call arbitrarally
* many times for each string. E.G. you might get 10 callbacks for "on_path"
* each providing just a few characters more data.
*/
typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
typedef int (*http_cb) (http_parser*);
/* Request Methods */
enum http_method
{ HTTP_DELETE = 0
, HTTP_GET
, HTTP_HEAD
, HTTP_POST
, HTTP_PUT
/* pathological */
, HTTP_CONNECT
, HTTP_OPTIONS
, HTTP_TRACE
/* webdav */
, HTTP_COPY
, HTTP_LOCK
, HTTP_MKCOL
, HTTP_MOVE
, HTTP_PROPFIND
, HTTP_PROPPATCH
, HTTP_UNLOCK
/* subversion */
, HTTP_REPORT
, HTTP_MKACTIVITY
, HTTP_CHECKOUT
, HTTP_MERGE
/* upnp */
, HTTP_MSEARCH
, HTTP_NOTIFY
, HTTP_SUBSCRIBE
, HTTP_UNSUBSCRIBE
};
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
struct http_parser {
/** PRIVATE **/
unsigned char type : 2;
unsigned char flags : 6;
unsigned char state;
unsigned char header_state;
unsigned char index;
uint32_t nread;
int64_t content_length;
/** READ-ONLY **/
unsigned short http_major;
unsigned short http_minor;
unsigned short status_code; /* responses only */
unsigned char method; /* requests only */
/* 1 = Upgrade header was present and the parser has exited because of that.
* 0 = No upgrade header present.
* Should be checked when http_parser_execute() returns in addition to
* error checking.
*/
char upgrade;
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */
};
struct http_parser_settings {
http_cb on_message_begin;
http_data_cb on_path;
http_data_cb on_query_string;
http_data_cb on_url;
http_data_cb on_fragment;
http_data_cb on_header_field;
http_data_cb on_header_value;
http_cb on_headers_complete;
http_data_cb on_body;
http_cb on_message_complete;
};
void http_parser_init(http_parser *parser, enum http_parser_type type);
size_t http_parser_execute(http_parser *parser,
const http_parser_settings *settings,
const char *data,
size_t len);
/* If http_should_keep_alive() in the on_headers_complete or
* on_message_complete callback returns true, then this will be should be
* the last message on the connection.
* If you are the server, respond with the "Connection: close" header.
* If you are the client, close the connection.
*/
int http_should_keep_alive(http_parser *parser);
/* Returns a string version of the HTTP method. */
const char *http_method_str(enum http_method);
#ifdef __cplusplus
}
#endif
#endif

1952
http_parser/test.c Normal file

File diff suppressed because it is too large Load diff

1
oniguruma/AUTHORS Normal file
View file

@ -0,0 +1 @@
sndgk393 AT ybb DOT ne DOT jp (K.Kosako)

49
oniguruma/CMakeLists.txt Normal file
View file

@ -0,0 +1,49 @@
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE (CheckFunctionExists)
INCLUDE (CheckIncludeFiles)
INCLUDE (CheckTypeSize)
CHECK_FUNCTION_EXISTS(alloca C_ALLOCA)
CHECK_FUNCTION_EXISTS(memcmp HAVE_MEMCMP)
CHECK_INCLUDE_FILES(alloca.h HAVE_ALLOCA_H)
CHECK_INCLUDE_FILES(strings.h HAVE_STRINGS_H)
CHECK_INCLUDE_FILES(string.h HAVE_STRING_H)
CHECK_INCLUDE_FILES(stdlib.h HAVE_STDLIB_H)
CHECK_INCLUDE_FILES(sys/time.h HAVE_SYS_TIME_H)
CHECK_INCLUDE_FILES(sys/times.h HAVE_SYS_TIMES_H)
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILES(memory.h HAVE_MEMORY_H)
CHECK_INCLUDE_FILES(stdarg.h HAVE_STDARG_PROTOTYPES)
CHECK_TYPE_SIZE("int" SIZEOF_INT)
CHECK_TYPE_SIZE("long" SIZEOF_LONG)
CHECK_TYPE_SIZE("short" SIZEOF_SHORT)
CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
set(SOURCES regint.h regparse.h regenc.h st.h
regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c
regenc.c regsyntax.c regtrav.c regversion.c st.c
regposix.c regposerr.c
enc/unicode.c enc/ascii.c enc/utf8.c
enc/utf16_be.c enc/utf16_le.c
enc/utf32_be.c enc/utf32_le.c
enc/euc_jp.c enc/sjis.c enc/iso8859_1.c
enc/iso8859_2.c enc/iso8859_3.c
enc/iso8859_4.c enc/iso8859_5.c
enc/iso8859_6.c enc/iso8859_7.c
enc/iso8859_8.c enc/iso8859_9.c
enc/iso8859_10.c enc/iso8859_11.c
enc/iso8859_13.c enc/iso8859_14.c
enc/iso8859_15.c enc/iso8859_16.c
enc/euc_tw.c enc/euc_kr.c enc/big5.c
enc/gb18030.c enc/koi8_r.c enc/cp1251.c)
add_library(libonig STATIC ${SOURCES})
set_target_properties(libonig PROPERTIES OUTPUT_NAME "libonig")

28
oniguruma/COPYING Normal file
View file

@ -0,0 +1,28 @@
Oniguruma LICENSE
-----------------
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/

2052
oniguruma/HISTORY Normal file

File diff suppressed because it is too large Load diff

236
oniguruma/INSTALL Normal file
View file

@ -0,0 +1,236 @@
Installation Instructions
*************************
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005 Free
Software Foundation, Inc.
This file is free documentation; the Free Software Foundation gives
unlimited permission to copy, distribute and modify it.
Basic Installation
==================
These are generic installation instructions.
The `configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses
those values to create a `Makefile' in each directory of the package.
It may also create one or more `.h' files containing system-dependent
definitions. Finally, it creates a shell script `config.status' that
you can run in the future to recreate the current configuration, and a
file `config.log' containing compiler output (useful mainly for
debugging `configure').
It can also use an optional file (typically called `config.cache'
and enabled with `--cache-file=config.cache' or simply `-C') that saves
the results of its tests to speed up reconfiguring. (Caching is
disabled by default to prevent problems with accidental use of stale
cache files.)
If you need to do unusual things to compile the package, please try
to figure out how `configure' could check whether to do them, and mail
diffs or instructions to the address given in the `README' so they can
be considered for the next release. If you are using the cache, and at
some point `config.cache' contains results you don't want to keep, you
may remove or edit it.
The file `configure.ac' (or `configure.in') is used to create
`configure' by a program called `autoconf'. You only need
`configure.ac' if you want to change it or regenerate `configure' using
a newer version of `autoconf'.
The simplest way to compile this package is:
1. `cd' to the directory containing the package's source code and type
`./configure' to configure the package for your system. If you're
using `csh' on an old version of System V, you might need to type
`sh ./configure' instead to prevent `csh' from trying to execute
`configure' itself.
Running `configure' takes awhile. While running, it prints some
messages telling which features it is checking for.
2. Type `make' to compile the package.
3. Optionally, type `make check' to run any self-tests that come with
the package.
4. Type `make install' to install the programs and any data files and
documentation.
5. You can remove the program binaries and object files from the
source code directory by typing `make clean'. To also remove the
files that `configure' created (so you can compile the package for
a different kind of computer), type `make distclean'. There is
also a `make maintainer-clean' target, but that is intended mainly
for the package's developers. If you use it, you may have to get
all sorts of other programs in order to regenerate files that came
with the distribution.
Compilers and Options
=====================
Some systems require unusual options for compilation or linking that the
`configure' script does not know about. Run `./configure --help' for
details on some of the pertinent environment variables.
You can give `configure' initial values for configuration parameters
by setting variables in the command line or in the environment. Here
is an example:
./configure CC=c89 CFLAGS=-O2 LIBS=-lposix
*Note Defining Variables::, for more details.
Compiling For Multiple Architectures
====================================
You can compile the package for more than one kind of computer at the
same time, by placing the object files for each architecture in their
own directory. To do this, you must use a version of `make' that
supports the `VPATH' variable, such as GNU `make'. `cd' to the
directory where you want the object files and executables to go and run
the `configure' script. `configure' automatically checks for the
source code in the directory that `configure' is in and in `..'.
If you have to use a `make' that does not support the `VPATH'
variable, you have to compile the package for one architecture at a
time in the source code directory. After you have installed the
package for one architecture, use `make distclean' before reconfiguring
for another architecture.
Installation Names
==================
By default, `make install' will install the package's files in
`/usr/local/bin', `/usr/local/man', etc. You can specify an
installation prefix other than `/usr/local' by giving `configure' the
option `--prefix=PREFIX'.
You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you
give `configure' the option `--exec-prefix=PREFIX', the package will
use PREFIX as the prefix for installing programs and libraries.
Documentation and other data files will still use the regular prefix.
In addition, if you use an unusual directory layout you can give
options like `--bindir=DIR' to specify different values for particular
kinds of files. Run `configure --help' for a list of the directories
you can set and what kinds of files go in them.
If the package supports it, you can cause programs to be installed
with an extra prefix or suffix on their names by giving `configure' the
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
Optional Features
=================
Some packages pay attention to `--enable-FEATURE' options to
`configure', where FEATURE indicates an optional part of the package.
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
is something like `gnu-as' or `x' (for the X Window System). The
`README' should mention any `--enable-' and `--with-' options that the
package recognizes.
For packages that use the X Window System, `configure' can usually
find the X include and library files automatically, but if it doesn't,
you can use the `configure' options `--x-includes=DIR' and
`--x-libraries=DIR' to specify their locations.
Specifying the System Type
==========================
There may be some features `configure' cannot figure out automatically,
but needs to determine by the type of machine the package will run on.
Usually, assuming the package is built to be run on the _same_
architectures, `configure' can figure that out, but if it prints a
message saying it cannot guess the machine type, give it the
`--build=TYPE' option. TYPE can either be a short name for the system
type, such as `sun4', or a canonical name which has the form:
CPU-COMPANY-SYSTEM
where SYSTEM can have one of these forms:
OS KERNEL-OS
See the file `config.sub' for the possible values of each field. If
`config.sub' isn't included in this package, then this package doesn't
need to know the machine type.
If you are _building_ compiler tools for cross-compiling, you should
use the `--target=TYPE' option to select the type of system they will
produce code for.
If you want to _use_ a cross compiler, that generates code for a
platform different from the build platform, you should specify the
"host" platform (i.e., that on which the generated programs will
eventually be run) with `--host=TYPE'.
Sharing Defaults
================
If you want to set default values for `configure' scripts to share, you
can create a site shell script called `config.site' that gives default
values for variables like `CC', `cache_file', and `prefix'.
`configure' looks for `PREFIX/share/config.site' if it exists, then
`PREFIX/etc/config.site' if it exists. Or, you can set the
`CONFIG_SITE' environment variable to the location of the site script.
A warning: not all `configure' scripts look for a site script.
Defining Variables
==================
Variables not defined in a site shell script can be set in the
environment passed to `configure'. However, some packages may run
configure again during the build, and the customized values of these
variables may be lost. In order to avoid this problem, you should set
them in the `configure' command line, using `VAR=value'. For example:
./configure CC=/usr/local2/bin/gcc
causes the specified `gcc' to be used as the C compiler (unless it is
overridden in the site shell script). Here is a another example:
/bin/bash ./configure CONFIG_SHELL=/bin/bash
Here the `CONFIG_SHELL=/bin/bash' operand causes subsequent
configuration-related scripts to be executed by `/bin/bash'.
`configure' Invocation
======================
`configure' recognizes the following options to control how it operates.
`--help'
`-h'
Print a summary of the options to `configure', and exit.
`--version'
`-V'
Print the version of Autoconf used to generate the `configure'
script, and exit.
`--cache-file=FILE'
Enable the cache: use and save the results of the tests in FILE,
traditionally `config.cache'. FILE defaults to `/dev/null' to
disable caching.
`--config-cache'
`-C'
Alias for `--cache-file=config.cache'.
`--quiet'
`--silent'
`-q'
Do not print messages saying which checks are being made. To
suppress all normal output, redirect it to `/dev/null' (any error
messages will still be shown).
`--srcdir=DIR'
Look for the package's source code in directory DIR. Usually
`configure' can determine that directory automatically.
`configure' also accepts some other, not widely useful, options. Run
`configure --help' for more details.

94
oniguruma/Makefile.am Normal file
View file

@ -0,0 +1,94 @@
## Makefile.am for Oniguruma
encdir = $(top_srcdir)/enc
sampledir = $(top_srcdir)/sample
libname = libonig.la
#AM_CFLAGS = -DNOT_RUBY
AM_CFLAGS =
INCLUDES = -I$(top_srcdir) -I$(includedir)
SUBDIRS = . sample
include_HEADERS = oniguruma.h oniggnu.h onigposix.h
lib_LTLIBRARIES = $(libname)
libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \
regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \
regenc.c regsyntax.c regtrav.c regversion.c st.c \
regposix.c regposerr.c \
$(encdir)/unicode.c $(encdir)/ascii.c $(encdir)/utf8.c \
$(encdir)/utf16_be.c $(encdir)/utf16_le.c \
$(encdir)/utf32_be.c $(encdir)/utf32_le.c \
$(encdir)/euc_jp.c $(encdir)/sjis.c $(encdir)/iso8859_1.c \
$(encdir)/iso8859_2.c $(encdir)/iso8859_3.c \
$(encdir)/iso8859_4.c $(encdir)/iso8859_5.c \
$(encdir)/iso8859_6.c $(encdir)/iso8859_7.c \
$(encdir)/iso8859_8.c $(encdir)/iso8859_9.c \
$(encdir)/iso8859_10.c $(encdir)/iso8859_11.c \
$(encdir)/iso8859_13.c $(encdir)/iso8859_14.c \
$(encdir)/iso8859_15.c $(encdir)/iso8859_16.c \
$(encdir)/euc_tw.c $(encdir)/euc_kr.c $(encdir)/big5.c \
$(encdir)/gb18030.c $(encdir)/koi8_r.c $(encdir)/cp1251.c
libonig_la_LDFLAGS = -version-info $(LTVERSION)
EXTRA_DIST = HISTORY README.ja index.html index_ja.html \
doc/API doc/API.ja doc/RE doc/RE.ja doc/FAQ doc/FAQ.ja \
win32/Makefile win32/config.h win32/testc.c \
$(encdir)/koi8.c $(encdir)/mktable.c \
$(sampledir)/encode.c $(sampledir)/listcap.c $(sampledir)/names.c \
$(sampledir)/posix.c $(sampledir)/simple.c $(sampledir)/sql.c \
$(sampledir)/syntax.c
bin_SCRIPTS = onig-config
onig-config: onig-config.in
dll:
$(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \
$(LIBS)
strip libonig.dll
# Ruby TEST
rtest:
$(RUBYDIR)/ruby -w -Ke $(srcdir)/test.rb
# character-types-table source generator
mktable: $(encdir)/mktable.c $(srcdir)/regenc.h
$(CC) -I$(top_srcdir) -o mktable $(encdir)/mktable.c
# TEST
TESTS = testc testp testcu
check_PROGRAMS = testc testp testcu
atest: testc testp testcu
@echo "[Oniguruma API, ASCII/EUC-JP check]"
@$(top_builddir)/testc | grep RESULT
@echo "[POSIX API, ASCII/EUC-JP check]"
@$(top_builddir)/testp | grep RESULT
@echo "[Oniguruma API, UTF-16 check]"
@$(top_builddir)/testcu | grep RESULT
testc_SOURCES = testc.c
testc_LDADD = libonig.la
testp_SOURCES = testc.c
testp_LDADD = libonig.la
testp_CFLAGS = -DPOSIX_TEST
testcu_SOURCES = testu.c
testcu_LDADD = libonig.la
#testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb
# ruby -Ke $(srcdir)/testconv.rb < $(srcdir)/test.rb > $@
#testu.c: $(srcdir)/test.rb $(srcdir)/testconvu.rb
# ruby -Ke $(srcdir)/testconvu.rb $(srcdir)/test.rb > $@
#win32/testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb
# ruby -Ke $(srcdir)/testconv.rb -win < $(srcdir)/test.rb | nkf -cs > $@
## END OF FILE

1238
oniguruma/Makefile.in Normal file

File diff suppressed because it is too large Load diff

189
oniguruma/README Normal file
View file

@ -0,0 +1,189 @@
README 2007/05/31
Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
http://www.geocities.jp/kosako3/oniguruma/
Oniguruma is a regular expressions library.
The characteristics of this library is that different character encoding
for every regular expression object can be specified.
Supported character encodings:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
* GB18030: contributed by KUBO Takehiro
* CP1251: contributed by Byte
------------------------------------------------------------
License
BSD license.
Install
Case 1: Unix and Cygwin platform
1. ./configure
2. make
3. make install
* uninstall
make uninstall
* test (ASCII/EUC-JP)
make atest
* configuration check
onig-config --cflags
onig-config --libs
onig-config --prefix
onig-config --exec-prefix
Case 2: Win32 platform (VC++)
1. copy win32\Makefile Makefile
2. copy win32\config.h config.h
3. nmake
onig_s.lib: static link library
onig.dll: dynamic link library
* test (ASCII/Shift_JIS)
4. copy win32\testc.c testc.c
5. nmake ctest
Regular Expressions
See doc/RE (or doc/RE.ja for Japanese).
Usage
Include oniguruma.h in your program. (Oniguruma API)
See doc/API for Oniguruma API.
If you want to disable UChar type (== unsigned char) definition
in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
include oniguruma.h.
If you want to disable regex_t type definition in oniguruma.h,
define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h.
Example of the compiling/linking command line in Unix or Cygwin,
(prefix == /usr/local case)
cc sample.c -L/usr/local/lib -lonig
If you want to use static link library(onig_s.lib) in Win32,
add option -DONIG_EXTERN=extern to C compiler.
Sample Programs
sample/simple.c example of the minimum (Oniguruma API)
sample/names.c example of the named group callback.
sample/encode.c example of some encodings.
sample/listcap.c example of the capture history.
sample/posix.c POSIX API sample.
sample/sql.c example of the variable meta characters.
(SQL-like pattern matching)
Test Programs
sample/syntax.c Perl, Java and ASIS syntax test.
sample/crnl.c --enable-crnl-as-line-terminator test
Source Files
oniguruma.h Oniguruma API header file. (public)
onig-config.in configuration check program template.
regenc.h character encodings framework header file.
regint.h internal definitions
regparse.h internal definitions for regparse.c and regcomp.c
regcomp.c compiling and optimization functions
regenc.c character encodings framework.
regerror.c error message function
regext.c extended API functions. (deluxe version API)
regexec.c search and match functions
regparse.c parsing functions.
regsyntax.c pattern syntax functions and built-in syntax definitions.
regtrav.c capture history tree data traverse functions.
regversion.c version info function.
st.h hash table functions header file
st.c hash table functions
oniggnu.h GNU regex API header file. (public)
reggnu.c GNU regex API functions
onigposix.h POSIX API header file. (public)
regposerr.c POSIX error message function.
regposix.c POSIX API functions.
enc/mktable.c character type table generator.
enc/ascii.c ASCII encoding.
enc/euc_jp.c EUC-JP encoding.
enc/euc_tw.c EUC-TW encoding.
enc/euc_kr.c EUC-KR, EUC-CN encoding.
enc/sjis.c Shift_JIS encoding.
enc/big5.c Big5 encoding.
enc/gb18030.c GB18030 encoding.
enc/koi8.c KOI8 encoding.
enc/koi8_r.c KOI8-R encoding.
enc/cp1251.c CP1251 encoding.
enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4)
enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic)
enc/iso8859_6.c ISO-8859-6 encoding. (Arabic)
enc/iso8859_7.c ISO-8859-7 encoding. (Greek)
enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew)
enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish)
enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic)
enc/iso8859_11.c ISO-8859-11 encoding. (Thai)
enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim)
enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic)
enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
enc/iso8859_16.c ISO-8859-16 encoding.
(Latin-10 or South-Eastern European with Euro)
enc/utf8.c UTF-8 encoding.
enc/utf16_be.c UTF-16BE encoding.
enc/utf16_le.c UTF-16LE encoding.
enc/utf32_be.c UTF-32BE encoding.
enc/utf32_le.c UTF-32LE encoding.
enc/unicode.c Unicode information data.
win32/Makefile Makefile for Win32 (VC++)
win32/config.h config.h for Win32
ToDo
? case fold flag: Katakana <-> Hiragana.
? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
?? \X (== \PM\pM*)
?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
?? transmission stopper. (return ONIG_STOP from match_at())
and I'm thankful to Akinori MUSHA.
Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>

195
oniguruma/README.ja Normal file
View file

@ -0,0 +1,195 @@
README.ja 2007/05/31
鬼車 ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
http://www.geocities.jp/kosako3/oniguruma/
鬼車は正規表現ライブラリである。
このライブラリの特長は、それぞれの正規表現オブジェクトごとに
文字エンコーディングを指定できることである。
サポートしている文字エンコーディング:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
* GB18030: 久保健洋氏提供
* CP1251: Byte氏提供
------------------------------------------------------------
ライセンス
BSDライセンスに従う。
インストール
ケース1: UnixとCygwin環境
1. ./configure
2. make
3. make install
アンインストール
make uninstall
動作テスト (ASCII/EUC-JP)
make atest
構成確認
onig-config --cflags
onig-config --libs
onig-config --prefix
onig-config --exec-prefix
ケース2: Win32(VC++)環境
1. copy win32\Makefile Makefile
2. copy win32\config.h config.h
3. nmake
onig_s.lib: static link library
onig.dll: dynamic link library
* 動作テスト (ASCII/Shift_JIS)
4. copy win32\testc.c testc.c
5. nmake ctest
正規表現
doc/RE.jaを参照
使用方法
使用するプログラムで、oniguruma.hをインクルードする(Oniguruma APIの場合)。
Oniguruma APIについては、doc/API.jaを参照。
oniguruma.hで定義されている型名UChar(== unsigned char)を無効にしたい場合
には、ONIG_ESCAPE_UCHAR_COLLISIONをdefineしてからoniguruma.hをインクルード
すること。このときにはUCharは定義されず、OnigUCharという名前の定義のみが
有効になる。
oniguruma.hで定義されている型名regex_tを無効にしたい場合には、
ONIG_ESCAPE_REGEX_T_COLLISIONをdefineしてからoniguruma.hをインクルード
すること。このときにはregex_tは定義されず、OnigRegexType, OnigRegexという
名前の定義のみが有効になる。
Unix/Cygwin上でコンパイル、リンクする場合の例
(prefixが/usr/localのとき)
cc sample.c -L/usr/local/lib -lonig
GNU libtoolを使用しているので、プラットフォームが共有ライブラリをサポートして
いれば、使用できるようになっている。
静的ライブラリと共有ライブラリのどちらを使用するかを指定する方法、実行時点での
環境設定方法については、自分で調べて下さい。
Win32でスタティックリンクライブラリ(onig_s.lib)をリンクする場合には、
コンパイルするときに -DONIG_EXTERN=extern をコンパイル引数に追加すること。
使用例プログラム
sample/simple.c 最小例 (Oniguruma API)
sample/names.c 名前付きグループコールバック使用例
sample/encode.c 幾つかの文字エンコーディング使用例
sample/listcap.c 捕獲履歴機能の使用例
sample/posix.c POSIX API使用例
sample/sql.c 可変メタ文字機能使用例 (SQL-like パターン)
テストプログラム
sample/syntax.c Perl、Java、ASIS文法のテスト
sample/crnl.c --enable-crnl-as-line-terminator テスト
ソースファイル
oniguruma.h 鬼車APIヘッダ (公開)
onig-config.in onig-configプログラム テンプレート
regenc.h 文字エンコーディング枠組みヘッダ
regint.h 内部宣言
regparse.h regparse.cとregcomp.cのための内部宣言
regcomp.c コンパイル、最適化関数
regenc.c 文字エンコーディング枠組み
regerror.c エラーメッセージ関数
regext.c 拡張API関数
regexec.c 検索、照合関数
regparse.c 正規表現パターン解析関数
regsyntax.c 正規表現パターン文法関数、組込み文法定義
regtrav.c 捕獲履歴木巡回関数
regversion.c 版情報関数
st.h ハッシュテーブル関数宣言
st.c ハッシュテーブル関数
oniggnu.h GNU regex APIヘッダ (公開)
reggnu.c GNU regex API関数
onigposix.h POSIX APIヘッダ (公開)
regposerr.c POSIX APIエラーメッセージ関数
regposix.c POSIX API関数
enc/mktable.c 文字タイプテーブル生成プログラム
enc/ascii.c ASCII エンコーディング
enc/euc_jp.c EUC-JP エンコーディング
enc/euc_tw.c EUC-TW エンコーディング
enc/euc_kr.c EUC-KR, EUC-CN エンコーディング
enc/sjis.c Shift_JIS エンコーディング
enc/big5.c Big5 エンコーディング
enc/gb18030.c GB18030 エンコーディング
enc/koi8.c KOI8 エンコーディング
enc/koi8_r.c KOI8-R エンコーディング
enc/cp1251.c CP1251 エンコーディング
enc/iso8859_1.c ISO-8859-1 (Latin-1)
enc/iso8859_2.c ISO-8859-2 (Latin-2)
enc/iso8859_3.c ISO-8859-3 (Latin-3)
enc/iso8859_4.c ISO-8859-4 (Latin-4)
enc/iso8859_5.c ISO-8859-5 (Cyrillic)
enc/iso8859_6.c ISO-8859-6 (Arabic)
enc/iso8859_7.c ISO-8859-7 (Greek)
enc/iso8859_8.c ISO-8859-8 (Hebrew)
enc/iso8859_9.c ISO-8859-9 (Latin-5 または Turkish)
enc/iso8859_10.c ISO-8859-10 (Latin-6 または Nordic)
enc/iso8859_11.c ISO-8859-11 (Thai)
enc/iso8859_13.c ISO-8859-13 (Latin-7 または Baltic Rim)
enc/iso8859_14.c ISO-8859-14 (Latin-8 または Celtic)
enc/iso8859_15.c ISO-8859-15 (Latin-9 または West European with Euro)
enc/iso8859_16.c ISO-8859-16
(Latin-10 または South-Eastern European with Euro)
enc/utf8.c UTF-8 エンコーディング
enc/utf16_be.c UTF-16BE エンコーディング
enc/utf16_le.c UTF-16LE エンコーディング
enc/utf32_be.c UTF-32BE エンコーディング
enc/utf32_le.c UTF-32LE エンコーディング
enc/unicode.c Unicode情報
win32/Makefile Win32用 Makefile (for VC++)
win32/config.h Win32用 config.h
残件
? case fold flag: Katakana <-> Hiragana
? ONIG_OPTION_NOTBOS/NOTEOS追加 (\A, \z, \Z)
?? \X (== \PM\pM*)
?? 文法要素 ONIG_SYN_CONTEXT_INDEP_ANCHORSの実装
?? 検索位置移動停止演算子 (match_at()からONIG_STOPを返す)
and I'm thankful to Akinori MUSHA.
アドレス: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>

34
oniguruma/config.h.in Normal file
View file

@ -0,0 +1,34 @@
#cmakedefine CRAY_STACKSEG_END 1
#cmakedefine C_ALLOCA 1
#cmakedefine HAVE_ALLOCA 1
#cmakedefine HAVE_ALLOCA_H 1
#cmakedefine HAVE_DLFCN_H 1
#cmakedefine HAVE_INTTYPES_H 1
#cmakedefine HAVE_MEMORY_H 1
#cmakedefine HAVE_PROTOTYPES 1
#cmakedefine HAVE_STDARG_PROTOTYPES 1
#cmakedefine HAVE_STDINT_H 1
#cmakedefine HAVE_STDLIB_H 1
#cmakedefine HAVE_STRINGS_H 1
#cmakedefine HAVE_STRING_H 1
#cmakedefine HAVE_SYS_STAT_H 1
#cmakedefine HAVE_SYS_TIMES_H 1
#cmakedefine HAVE_SYS_TIME_H 1
#cmakedefine HAVE_SYS_TYPES_H 1
#cmakedefine HAVE_UNISTD_H 1
#cmakedefine LT_OBJDIR 1
#cmakedefine PACKAGE 1
#cmakedefine PACKAGE_BUGREPORT 1
#cmakedefine PACKAGE_NAME 1
#cmakedefine PACKAGE_STRING 1
#cmakedefine PACKAGE_TARNAME 1
#cmakedefine PACKAGE_VERSION 1
#cmakedefine SIZEOF_INT 1
#cmakedefine SIZEOF_LONG 1
#cmakedefine SIZEOF_SHORT 1
#cmakedefine STACK_DIRECTION 1
#cmakedefine STDC_HEADERS 1
#cmakedefine TIME_WITH_SYS_TIME 1
#cmakedefine USE_COMBINATION_EXPLOSION_CHECK 1
#cmakedefine USE_CRNL_AS_LINE_TERMINATOR 1
#cmakedefine VERSION 1

58
oniguruma/enc/ascii.c Normal file
View file

@ -0,0 +1,58 @@
/**********************************************************************
ascii.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingASCII = {
onigenc_single_byte_mbc_enc_len,
"US-ASCII", /* name */
1, /* max byte length */
1, /* min byte length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
ascii_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

162
oniguruma/enc/big5.c Normal file
View file

@ -0,0 +1,162 @@
/**********************************************************************
big5.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_BIG5[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
big5_mbc_enc_len(const UChar* p)
{
return EncLen_BIG5[*p];
}
static OnigCodePoint
big5_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
}
static int
big5_code_to_mbc(OnigCodePoint code, UChar *buf)
{
return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf);
}
static int
big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_BIG5, flag,
pp, end, lower);
}
#if 0
static int
big5_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
}
#endif
static int
big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype);
}
static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1)
#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)]
static UChar*
big5_left_adjust_char_head(const UChar* start, const UChar* s)
{
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
if (BIG5_ISMB_TRAIL(*p)) {
while (p > start) {
if (! BIG5_ISMB_FIRST(*--p)) {
p++;
break;
}
}
}
len = enclen(ONIG_ENCODING_BIG5, p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE);
}
OnigEncodingType OnigEncodingBIG5 = {
big5_mbc_enc_len,
"Big5", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
big5_mbc_to_code,
onigenc_mb2_code_to_mbclen,
big5_code_to_mbc,
big5_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
big5_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_is_allowed_reverse_match
};

200
oniguruma/enc/cp1251.c Normal file
View file

@ -0,0 +1,200 @@
/**********************************************************************
cp1251.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2006-2007 Byte <byte AT mail DOT kna DOT ru>
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c]
#define ENC_IS_CP1251_CTYPE(code,ctype) \
((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncCP1251_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247',
'\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277',
'\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncCP1251_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0,
0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2,
0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0,
0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_CP1251_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
static int
cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_CP1251_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xb8, 0xa8 },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde },
{ 0xff, 0xdf }
};
static int
cp1251_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingCP1251 = {
onigenc_single_byte_mbc_enc_len,
"CP1251", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
cp1251_mbc_case_fold,
cp1251_apply_all_case_fold,
cp1251_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
cp1251_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

285
oniguruma/enc/euc_jp.c Normal file
View file

@ -0,0 +1,285 @@
/**********************************************************************
euc_jp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
static const int EncLen_EUCJP[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
mbc_enc_len(const UChar* p)
{
return EncLen_EUCJP[*p];
}
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
len = enclen(ONIG_ENCODING_EUC_JP, p);
n = (OnigCodePoint )*p++;
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
static int
code_to_mbclen(OnigCodePoint code)
{
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
else if ((code & 0xff0000) != 0) return 3;
else if ((code & 0xff00) != 0) return 2;
else
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
#if 0
static int
code_to_mbc_first(OnigCodePoint code)
{
int first;
if ((code & 0xff0000) != 0) {
first = (code >> 16) & 0xff;
}
else if ((code & 0xff00) != 0) {
first = (code >> 8) & 0xff;
}
else {
return (int )code;
}
return first;
}
#endif
static int
code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
*p++ = (UChar )(code & 0xff);
#if 1
if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
return p - buf;
}
static int
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
int len;
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
else {
int i;
len = enclen(ONIG_ENCODING_EUC_JP, p);
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static UChar*
left_adjust_char_head(const UChar* start, const UChar* s)
{
/* In this encoding
mb-trail bytes doesn't mix with single bytes.
*/
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
while (!eucjp_islead(*p) && p > start) p--;
len = enclen(ONIG_ENCODING_EUC_JP, p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
if (c <= 0x7e || c == 0x8e || c == 0x8f)
return TRUE;
else
return FALSE;
}
static int PropertyInited = 0;
static const OnigCodePoint** PropertyList;
static int PropertyListNum;
static int PropertyListSize;
static hash_table_type* PropertyNameTable;
static const OnigCodePoint CR_Hiragana[] = {
1,
0xa4a1, 0xa4f3
}; /* CR_Hiragana */
static const OnigCodePoint CR_Katakana[] = {
3,
0xa5a1, 0xa5f6,
0xaaa6, 0xaaaf,
0xaab1, 0xaadd
}; /* CR_Katakana */
static int
init_property_list(void)
{
int r;
PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana);
PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana);
PropertyInited = 1;
end:
return r;
}
static int
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
hash_data_type ctype;
PROPERTY_LIST_INIT_CHECK;
if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) {
return onigenc_minimum_property_name_to_ctype(enc, p, end);
}
return (int )ctype;
}
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
}
}
else {
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (unsigned int )PropertyListNum)
return ONIGERR_TYPE_BUG;
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
}
return FALSE;
}
static int
get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
const OnigCodePoint* ranges[])
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
return ONIG_NO_SUPPORT_CONFIG;
}
else {
*sb_out = 0x80;
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (OnigCtype )PropertyListNum)
return ONIGERR_TYPE_BUG;
*ranges = PropertyList[ctype];
return 0;
}
}
OnigEncodingType OnigEncodingEUC_JP = {
mbc_enc_len,
"EUC-JP", /* name */
3, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
mbc_to_code,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
property_name_to_ctype,
is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match
};

158
oniguruma/enc/euc_kr.c Normal file
View file

@ -0,0 +1,158 @@
/**********************************************************************
euc_kr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_EUCKR[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
euckr_mbc_enc_len(const UChar* p)
{
return EncLen_EUCKR[*p];
}
static OnigCodePoint
euckr_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
}
static int
euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
{
return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf);
}
static int
euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag,
pp, end, lower);
}
#if 0
static int
euckr_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
}
#endif
static int
euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
}
#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)
static UChar*
euckr_left_adjust_char_head(const UChar* start, const UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
while (!euckr_islead(*p) && p > start) p--;
len = enclen(ONIG_ENCODING_EUC_KR, p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
if (c <= 0x7e) return TRUE;
else return FALSE;
}
OnigEncodingType OnigEncodingEUC_KR = {
euckr_mbc_enc_len,
"EUC-KR", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euckr_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match
};
/* Same with OnigEncodingEUC_KR except the name */
OnigEncodingType OnigEncodingEUC_CN = {
euckr_mbc_enc_len,
"EUC-CN", /* name */
2, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euckr_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match
};

129
oniguruma/enc/euc_tw.c Normal file
View file

@ -0,0 +1,129 @@
/**********************************************************************
euc_tw.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_EUCTW[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
euctw_mbc_enc_len(const UChar* p)
{
return EncLen_EUCTW[*p];
}
static OnigCodePoint
euctw_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
}
static int
euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
{
return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf);
}
static int
euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_TW, flag,
pp, end, lower);
}
static int
euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
}
#define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
static UChar*
euctw_left_adjust_char_head(const UChar* start, const UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
while (!euctw_islead(*p) && p > start) p--;
len = enclen(ONIG_ENCODING_EUC_TW, p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
if (c <= 0x7e) return TRUE;
else return FALSE;
}
OnigEncodingType OnigEncodingEUC_TW = {
euctw_mbc_enc_len,
"EUC-TW", /* name */
4, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euctw_mbc_to_code,
onigenc_mb4_code_to_mbclen,
euctw_code_to_mbc,
euctw_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
euctw_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euctw_left_adjust_char_head,
euctw_is_allowed_reverse_match
};

495
oniguruma/enc/gb18030.c Normal file
View file

@ -0,0 +1,495 @@
/**********************************************************************
gb18030.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2005-2007 KUBO Takehiro <kubo AT jiubao DOT org>
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#if 1
#define DEBUG_GB18030(arg)
#else
#define DEBUG_GB18030(arg) printf arg
#endif
enum {
C1, /* one-byte char */
C2, /* one-byte or second of two-byte char */
C4, /* one-byte or second or fourth of four-byte char */
CM /* first of two- or four-byte char or second of two-byte char */
};
static const char GB18030_MAP[] = {
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
};
static int
gb18030_mbc_enc_len(const UChar* p)
{
if (GB18030_MAP[*p] != CM)
return 1;
p++;
if (GB18030_MAP[*p] == C4)
return 4;
if (GB18030_MAP[*p] == C1)
return 1; /* illegal sequence */
return 2;
}
static OnigCodePoint
gb18030_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end);
}
static int
gb18030_code_to_mbc(OnigCodePoint code, UChar *buf)
{
return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf);
}
static int
gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_GB18030, flag,
pp, end, lower);
}
#if 0
static int
gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
}
#endif
static int
gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype);
}
enum state {
S_START,
S_one_C2,
S_one_C4,
S_one_CM,
S_odd_CM_one_CX,
S_even_CM_one_CX,
/* CMC4 : pair of "CM C4" */
S_one_CMC4,
S_odd_CMC4,
S_one_C4_odd_CMC4,
S_even_CMC4,
S_one_C4_even_CMC4,
S_odd_CM_odd_CMC4,
S_even_CM_odd_CMC4,
S_odd_CM_even_CMC4,
S_even_CM_even_CMC4,
/* C4CM : pair of "C4 CM" */
S_odd_C4CM,
S_one_CM_odd_C4CM,
S_even_C4CM,
S_one_CM_even_C4CM,
S_even_CM_odd_C4CM,
S_odd_CM_odd_C4CM,
S_even_CM_even_C4CM,
S_odd_CM_even_C4CM,
};
static UChar*
gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
{
const UChar *p;
enum state state = S_START;
DEBUG_GB18030(("----------------\n"));
for (p = s; p >= start; p--) {
DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p));
switch (state) {
case S_START:
switch (GB18030_MAP[*p]) {
case C1:
return (UChar *)s;
case C2:
state = S_one_C2; /* C2 */
break;
case C4:
state = S_one_C4; /* C4 */
break;
case CM:
state = S_one_CM; /* CM */
break;
}
break;
case S_one_C2: /* C2 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)s;
case CM:
state = S_odd_CM_one_CX; /* CM C2 */
break;
}
break;
case S_one_C4: /* C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)s;
case CM:
state = S_one_CMC4;
break;
}
break;
case S_one_CM: /* CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)s;
case C4:
state = S_odd_C4CM;
break;
case CM:
state = S_odd_CM_one_CX; /* CM CM */
break;
}
break;
case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 1);
case CM:
state = S_even_CM_one_CX;
break;
}
break;
case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)s;
case CM:
state = S_odd_CM_one_CX;
break;
}
break;
case S_one_CMC4: /* CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 1);
case C4:
state = S_one_C4_odd_CMC4; /* C4 CM C4 */
break;
case CM:
state = S_even_CM_one_CX; /* CM CM C4 */
break;
}
break;
case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 1);
case C4:
state = S_one_C4_odd_CMC4;
break;
case CM:
state = S_odd_CM_odd_CMC4;
break;
}
break;
case S_one_C4_odd_CMC4: /* C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 1);
case CM:
state = S_even_CMC4; /* CM C4 CM C4 */
break;
}
break;
case S_even_CMC4: /* CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 3);
case C4:
state = S_one_C4_even_CMC4;
break;
case CM:
state = S_odd_CM_even_CMC4;
break;
}
break;
case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 3);
case CM:
state = S_odd_CMC4;
break;
}
break;
case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 3);
case CM:
state = S_even_CM_odd_CMC4;
break;
}
break;
case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 1);
case CM:
state = S_odd_CM_odd_CMC4;
break;
}
break;
case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 1);
case CM:
state = S_even_CM_even_CMC4;
break;
}
break;
case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 3);
case CM:
state = S_odd_CM_even_CMC4;
break;
}
break;
case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)s;
case CM:
state = S_one_CM_odd_C4CM; /* CM C4 CM */
break;
}
break;
case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 2); /* |CM C4 CM */
case C4:
state = S_even_C4CM;
break;
case CM:
state = S_even_CM_odd_C4CM;
break;
}
break;
case S_even_C4CM: /* C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 2); /* C4|CM C4 CM */
case CM:
state = S_one_CM_even_C4CM;
break;
}
break;
case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
return (UChar *)(s - 0); /*|CM C4 CM C4|CM */
case C4:
state = S_odd_C4CM;
break;
case CM:
state = S_even_CM_even_C4CM;
break;
}
break;
case S_even_CM_odd_C4CM: /* CM CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 0); /* |CM CM|C4|CM */
case CM:
state = S_odd_CM_odd_C4CM;
break;
}
break;
case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
case CM:
state = S_even_CM_odd_C4CM;
break;
}
break;
case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
case CM:
state = S_odd_CM_even_C4CM;
break;
}
break;
case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
case C4:
return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */
case CM:
state = S_even_CM_even_C4CM;
break;
}
break;
}
}
DEBUG_GB18030(("state %d\n", state));
switch (state) {
case S_START: return (UChar *)(s - 0);
case S_one_C2: return (UChar *)(s - 0);
case S_one_C4: return (UChar *)(s - 0);
case S_one_CM: return (UChar *)(s - 0);
case S_odd_CM_one_CX: return (UChar *)(s - 1);
case S_even_CM_one_CX: return (UChar *)(s - 0);
case S_one_CMC4: return (UChar *)(s - 1);
case S_odd_CMC4: return (UChar *)(s - 1);
case S_one_C4_odd_CMC4: return (UChar *)(s - 1);
case S_even_CMC4: return (UChar *)(s - 3);
case S_one_C4_even_CMC4: return (UChar *)(s - 3);
case S_odd_CM_odd_CMC4: return (UChar *)(s - 3);
case S_even_CM_odd_CMC4: return (UChar *)(s - 1);
case S_odd_CM_even_CMC4: return (UChar *)(s - 1);
case S_even_CM_even_CMC4: return (UChar *)(s - 3);
case S_odd_C4CM: return (UChar *)(s - 0);
case S_one_CM_odd_C4CM: return (UChar *)(s - 2);
case S_even_C4CM: return (UChar *)(s - 2);
case S_one_CM_even_C4CM: return (UChar *)(s - 0);
case S_even_CM_odd_C4CM: return (UChar *)(s - 0);
case S_odd_CM_odd_C4CM: return (UChar *)(s - 2);
case S_even_CM_even_C4CM: return (UChar *)(s - 2);
case S_odd_CM_even_C4CM: return (UChar *)(s - 0);
}
return (UChar* )s; /* never come here. (escape warning) */
}
static int
gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
}
OnigEncodingType OnigEncodingGB18030 = {
gb18030_mbc_enc_len,
"GB18030", /* name */
4, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
gb18030_mbc_to_code,
onigenc_mb4_code_to_mbclen,
gb18030_code_to_mbc,
gb18030_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
gb18030_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
gb18030_left_adjust_char_head,
gb18030_is_allowed_reverse_match
};

272
oniguruma/enc/iso8859_1.c Normal file
View file

@ -0,0 +1,272 @@
/**********************************************************************
iso8859_1.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const unsigned short EncISO_8859_1_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
if (*p == 0x53 && end > p + 1
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
}
else if (0x61 <= *p && *p <= 0x7a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
if (*p == 0x73 && end > p + 1
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
}
else if (0xc0 <= *p && *p <= 0xcf) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
return 1;
}
else if (0xd0 <= *p && *p <= 0xdf) {
if (*p == 0xdf) {
items[0].byte_len = 1;
items[0].code_len = 2;
items[0].code[0] = (OnigCodePoint )'s';
items[0].code[1] = (OnigCodePoint )'s';
items[1].byte_len = 1;
items[1].code_len = 2;
items[1].code[0] = (OnigCodePoint )'S';
items[1].code[1] = (OnigCodePoint )'S';
items[2].byte_len = 1;
items[2].code_len = 2;
items[2].code[0] = (OnigCodePoint )'s';
items[2].code[1] = (OnigCodePoint )'S';
items[3].byte_len = 1;
items[3].code_len = 2;
items[3].code[0] = (OnigCodePoint )'S';
items[3].code[1] = (OnigCodePoint )'s';
return 4;
}
else if (*p != 0xd7) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
return 1;
}
}
else if (0xe0 <= *p && *p <= 0xef) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
return 1;
}
else if (0xf0 <= *p && *p <= 0xfe) {
if (*p != 0xf7) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
return 1;
}
}
return 0;
}
static int
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p >= 0xaa && *p <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_1 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-1", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

239
oniguruma/enc/iso8859_10.c Normal file
View file

@ -0,0 +1,239 @@
/**********************************************************************
iso8859_10.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \
((EncISO_8859_10_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247',
'\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_10_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_10_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xb1 },
{ 0xa2, 0xb2 },
{ 0xa3, 0xb3 },
{ 0xa4, 0xb4 },
{ 0xa5, 0xb5 },
{ 0xa6, 0xb6 },
{ 0xa8, 0xb8 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xae, 0xbe },
{ 0xaf, 0xbf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_10 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-10", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

View file

@ -0,0 +1,96 @@
/**********************************************************************
iso8859_11.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const unsigned short EncISO_8859_11_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_11_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_11 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-11", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

228
oniguruma/enc/iso8859_13.c Normal file
View file

@ -0,0 +1,228 @@
/**********************************************************************
iso8859_13.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \
((EncISO_8859_13_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_13_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x34a2, 0x00a0, 0x34a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x34a2,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x30e2, 0x00a0, 0x01a0,
0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xdf, 0xb5 are lower case letter, but can't convert. */
if (*p == 0xb5)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_13 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-13", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

241
oniguruma/enc/iso8859_14.c Normal file
View file

@ -0,0 +1,241 @@
/**********************************************************************
iso8859_14.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \
((EncISO_8859_14_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247',
'\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377',
'\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271',
'\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_14_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x30e2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x00a0,
0x34a2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x34a2,
0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x00a0, 0x34a2,
0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xa2 },
{ 0xa4, 0xa5 },
{ 0xa6, 0xab },
{ 0xa8, 0xb8 },
{ 0xaa, 0xba },
{ 0xac, 0xbc },
{ 0xaf, 0xff },
{ 0xb0, 0xb1 },
{ 0xb2, 0xb3 },
{ 0xb4, 0xb5 },
{ 0xb7, 0xb9 },
{ 0xbb, 0xbf },
{ 0xbd, 0xbe },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_14 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-14", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

235
oniguruma/enc/iso8859_15.c Normal file
View file

@ -0,0 +1,235 @@
/**********************************************************************
iso8859_15.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
((EncISO_8859_15_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_15_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0,
0x30e2, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x34a2, 0x30e2, 0x00a0, 0x01a0,
0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xdf etc.. are lower case letter, but can't convert. */
if (*p == 0xaa || *p == 0xb5 || *p == 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa6, 0xa8 },
{ 0xb4, 0xb8 },
{ 0xbc, 0xbd },
{ 0xbe, 0xff },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_15 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-15", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

237
oniguruma/enc/iso8859_16.c Normal file
View file

@ -0,0 +1,237 @@
/**********************************************************************
iso8859_16.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \
((EncISO_8859_16_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247',
'\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277',
'\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_16_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0,
0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2,
0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0,
0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xa2 },
{ 0xa3, 0xb3 },
{ 0xa6, 0xa8 },
{ 0xaa, 0xba },
{ 0xac, 0xae },
{ 0xaf, 0xbf },
{ 0xb2, 0xb9 },
{ 0xb4, 0xb8 },
{ 0xbc, 0xbd },
{ 0xbe, 0xff },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_16 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-16", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

235
oniguruma/enc/iso8859_2.c Normal file
View file

@ -0,0 +1,235 @@
/**********************************************************************
iso8859_2.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
'\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_2_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x00a0, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xae, 0xbe },
{ 0xaf, 0xbf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_2_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_2 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-2", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

235
oniguruma/enc/iso8859_3.c Normal file
View file

@ -0,0 +1,235 @@
/**********************************************************************
iso8859_3.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \
((EncISO_8859_3_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247',
'\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_3_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x34a2, 0x00a0,
0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x0000, 0x34a2,
0x00a0, 0x30e2, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x30e2, 0x01a0,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x11a0, 0x0000, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
};
static int
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p == 0xb5)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_3_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xb1 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xaf, 0xbf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_3 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-3", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

237
oniguruma/enc/iso8859_4.c Normal file
View file

@ -0,0 +1,237 @@
/**********************************************************************
iso8859_4.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \
((EncISO_8859_4_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
'\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_4_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x00a0,
0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
if (*p == 0xa2)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_4_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xae, 0xbe },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_4 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-4", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

226
oniguruma/enc/iso8859_5.c Normal file
View file

@ -0,0 +1,226 @@
/**********************************************************************
iso8859_5.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \
((EncISO_8859_5_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_5_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
(*pp)++;
v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_5_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xf1 },
{ 0xa2, 0xf2 },
{ 0xa3, 0xf3 },
{ 0xa4, 0xf4 },
{ 0xa5, 0xf5 },
{ 0xa6, 0xf6 },
{ 0xa7, 0xf7 },
{ 0xa8, 0xf8 },
{ 0xa9, 0xf9 },
{ 0xaa, 0xfa },
{ 0xab, 0xfb },
{ 0xac, 0xfc },
{ 0xae, 0xfe },
{ 0xaf, 0xff },
{ 0xb0, 0xd0 },
{ 0xb1, 0xd1 },
{ 0xb2, 0xd2 },
{ 0xb3, 0xd3 },
{ 0xb4, 0xd4 },
{ 0xb5, 0xd5 },
{ 0xb6, 0xd6 },
{ 0xb7, 0xd7 },
{ 0xb8, 0xd8 },
{ 0xb9, 0xd9 },
{ 0xba, 0xda },
{ 0xbb, 0xdb },
{ 0xbc, 0xdc },
{ 0xbd, 0xdd },
{ 0xbe, 0xde },
{ 0xbf, 0xdf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end,
OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_5 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-5", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

96
oniguruma/enc/iso8859_6.c Normal file
View file

@ -0,0 +1,96 @@
/**********************************************************************
iso8859_6.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const unsigned short EncISO_8859_6_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0,
0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_6_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_6 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-6", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

222
oniguruma/enc/iso8859_7.c Normal file
View file

@ -0,0 +1,222 @@
/**********************************************************************
iso8859_7.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \
((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267',
'\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376',
'\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_7_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0,
0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2,
0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000
};
static int
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
(*pp)++;
v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
if (*p == 0xc0 || *p == 0xe0)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_7_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xb6, 0xdc },
{ 0xb8, 0xdd },
{ 0xb9, 0xde },
{ 0xba, 0xdf },
{ 0xbc, 0xfc },
{ 0xbe, 0xfd },
{ 0xbf, 0xfe },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_7 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-7", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

96
oniguruma/enc/iso8859_8.c Normal file
View file

@ -0,0 +1,96 @@
/**********************************************************************
iso8859_8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const unsigned short EncISO_8859_8_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_8_CTYPE(code, ctype);
else
return FALSE;
}
OnigEncodingType OnigEncodingISO_8859_8 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-8", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

228
oniguruma/enc/iso8859_9.c Normal file
View file

@ -0,0 +1,228 @@
/**********************************************************************
iso8859_9.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \
((EncISO_8859_9_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static const unsigned short EncISO_8859_9_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
static int
mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
*lower++ = 's';
*lower = 's';
(*pp)++;
return 2;
}
*lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
(*pp)++;
return TRUE;
}
(*pp)++;
v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xdf etc.. are lower case letter, but can't convert. */
if (*p >= 0xaa && *p <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_9_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe }
};
static int
apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, f, arg);
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
flag, p, end, items);
}
OnigEncodingType OnigEncodingISO_8859_9 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-9", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
mbc_case_fold,
apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

250
oniguruma/enc/koi8.c Normal file
View file

@ -0,0 +1,250 @@
/**********************************************************************
koi8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c]
#define ENC_IS_KOI8_CTYPE(code,ctype) \
((EncKOI8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncKOI8_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
};
static const unsigned short EncKOI8_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
};
static int
koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_KOI8_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
{
const OnigUChar* p = *pp;
(*pp)++;
if (((flag & ONIGENC_CASE_FOLD_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncKOI8_CtypeTable[*p] &
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static int
koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_KOI8_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xdf, 0xff },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfe, 0xde },
{ 0xff, 0xdf }
};
static int
koi8_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
koi8_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingKOI8 = {
onigenc_single_byte_mbc_enc_len,
"KOI8", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
koi8_mbc_case_fold,
koi8_apply_all_case_fold,
koi8_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
koi8_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

212
oniguruma/enc/koi8_r.c Normal file
View file

@ -0,0 +1,212 @@
/**********************************************************************
koi8_r.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c]
#define ENC_IS_KOI8_R_CTYPE(code,ctype) \
((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
};
static const unsigned short EncKOI8_R_CtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
};
static int
koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
*lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
#if 0
static int
koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
(*pp)++;
v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
return (v != 0 ? TRUE : FALSE);
}
#endif
static int
koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_KOI8_R_CTYPE(code, ctype);
else
return FALSE;
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa3, 0xb3 },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xdf, 0xff }
};
static int
koi8_r_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
return onigenc_apply_all_case_fold_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, f, arg);
}
static int
koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_get_case_fold_codes_by_str_with_map(
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
flag, p, end, items);
}
OnigEncodingType OnigEncodingKOI8_R = {
onigenc_single_byte_mbc_enc_len,
"KOI8-R", /* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
koi8_r_mbc_case_fold,
koi8_r_apply_all_case_fold,
koi8_r_get_case_fold_codes_by_str,
onigenc_minimum_property_name_to_ctype,
koi8_r_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

1162
oniguruma/enc/mktable.c Normal file

File diff suppressed because it is too large Load diff

318
oniguruma/enc/sjis.c Normal file
View file

@ -0,0 +1,318 @@
/**********************************************************************
sjis.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
static const int EncLen_SJIS[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
};
static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
static int
mbc_enc_len(const UChar* p)
{
return EncLen_SJIS[*p];
}
static int
code_to_mbclen(OnigCodePoint code)
{
if (code < 256) {
if (EncLen_SJIS[(int )code] == 1)
return 1;
else
return 0;
}
else if (code <= 0xffff) {
return 2;
}
else
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
len = enclen(ONIG_ENCODING_SJIS, p);
c = *p++;
n = c;
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
static int
code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
*p++ = (UChar )(code & 0xff);
#if 0
if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf))
return REGERR_INVALID_CODE_POINT_VALUE;
#endif
return p - buf;
}
static int
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
else {
int i;
int len = enclen(ONIG_ENCODING_SJIS, p);
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
}
#endif
#if 0
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
}
return FALSE;
}
#endif
static UChar*
left_adjust_char_head(const UChar* start, const UChar* s)
{
const UChar *p;
int len;
if (s <= start) return (UChar* )s;
p = s;
if (SJIS_ISMB_TRAIL(*p)) {
while (p > start) {
if (! SJIS_ISMB_FIRST(*--p)) {
p++;
break;
}
}
}
len = enclen(ONIG_ENCODING_SJIS, p);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
}
static int
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
{
const UChar c = *s;
return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
}
static int PropertyInited = 0;
static const OnigCodePoint** PropertyList;
static int PropertyListNum;
static int PropertyListSize;
static hash_table_type* PropertyNameTable;
static const OnigCodePoint CR_Hiragana[] = {
1,
0x829f, 0x82f1
}; /* CR_Hiragana */
static const OnigCodePoint CR_Katakana[] = {
4,
0x00a6, 0x00af,
0x00b1, 0x00dd,
0x8340, 0x837e,
0x8380, 0x8396,
}; /* CR_Katakana */
static int
init_property_list(void)
{
int r;
PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana);
PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana);
PropertyInited = 1;
end:
return r;
}
static int
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
hash_data_type ctype;
PROPERTY_LIST_INIT_CHECK;
if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) {
return onigenc_minimum_property_name_to_ctype(enc, p, end);
}
return (int )ctype;
}
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
}
}
else {
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (unsigned int )PropertyListNum)
return ONIGERR_TYPE_BUG;
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
}
return FALSE;
}
static int
get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
const OnigCodePoint* ranges[])
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
return ONIG_NO_SUPPORT_CONFIG;
}
else {
*sb_out = 0x80;
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (OnigCtype )PropertyListNum)
return ONIGERR_TYPE_BUG;
*ranges = PropertyList[ctype];
return 0;
}
}
OnigEncodingType OnigEncodingSJIS = {
mbc_enc_len,
"Shift_JIS", /* name */
2, /* max byte length */
1, /* min byte length */
onigenc_is_mbc_newline_0x0a,
mbc_to_code,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
onigenc_ascii_get_case_fold_codes_by_str,
property_name_to_ctype,
is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match
};

11356
oniguruma/enc/unicode.c Normal file

File diff suppressed because it is too large Load diff

225
oniguruma/enc/utf16_be.c Normal file
View file

@ -0,0 +1,225 @@
/**********************************************************************
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static int
utf16be_mbc_enc_len(const UChar* p)
{
return EncLen_UTF16[*p];
}
static int
utf16be_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 1 < end) {
if (*(p+1) == 0x0a && *p == 0x00)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((
#ifndef USE_CRNL_AS_LINE_TERMINATOR
*(p+1) == 0x0d ||
#endif
*(p+1) == 0x85) && *p == 0x00)
return 1;
if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
OnigCodePoint code;
if (UTF16_IS_SURROGATE_FIRST(*p)) {
code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
+ ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
+ p[3];
}
else {
code = p[0] * 256 + p[1];
}
return code;
}
static int
utf16be_code_to_mbclen(OnigCodePoint code)
{
return (code > 0xffff ? 4 : 2);
}
static int
utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
if (code > 0xffff) {
unsigned int plane, high;
plane = (code >> 16) - 1;
*p++ = (plane >> 2) + 0xd8;
high = (code & 0xff00) >> 8;
*p++ = ((plane & 0x03) << 6) + (high >> 2);
*p++ = (high & 0x03) + 0xdc;
*p = (UChar )(code & 0xff);
return 4;
}
else {
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar )(code & 0xff);
return 2;
}
}
static int
utf16be_mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) {
p++;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*p == 0x49) {
*fold++ = 0x01;
*fold = 0x31;
(*pp) += 2;
return 2;
}
}
#endif
*fold++ = 0;
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
*pp += 2;
return 2;
}
else
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE, flag,
pp, end, fold);
}
#if 0
static int
utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += EncLen_UTF16[*p];
if (*p == 0) {
int c, v;
p++;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static UChar*
utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
{
if (s <= start) return (UChar* )s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
s -= 2;
return (UChar* )s;
}
static int
utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF16_BE = {
utf16be_mbc_enc_len,
"UTF-16BE", /* name */
4, /* max byte length */
2, /* min byte length */
utf16be_is_mbc_newline,
utf16be_mbc_to_code,
utf16be_code_to_mbclen,
utf16be_code_to_mbc,
utf16be_mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
utf16be_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf16be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};

226
oniguruma/enc/utf16_le.c Normal file
View file

@ -0,0 +1,226 @@
/**********************************************************************
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static int
utf16le_code_to_mbclen(OnigCodePoint code)
{
return (code > 0xffff ? 4 : 2);
}
static int
utf16le_mbc_enc_len(const UChar* p)
{
return EncLen_UTF16[*(p+1)];
}
static int
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 1 < end) {
if (*p == 0x0a && *(p+1) == 0x00)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((
#ifndef USE_CRNL_AS_LINE_TERMINATOR
*p == 0x0d ||
#endif
*p == 0x85) && *(p+1) == 0x00)
return 1;
if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
OnigCodePoint code;
UChar c0 = *p;
UChar c1 = *(p+1);
if (UTF16_IS_SURROGATE_FIRST(c1)) {
code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
+ ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)
+ p[2];
}
else {
code = c1 * 256 + p[0];
}
return code;
}
static int
utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
if (code > 0xffff) {
unsigned int plane, high;
plane = (code >> 16) - 1;
high = (code & 0xff00) >> 8;
*p++ = ((plane & 0x03) << 6) + (high >> 2);
*p++ = (plane >> 2) + 0xd8;
*p++ = (UChar )(code & 0xff);
*p = (high & 0x03) + 0xdc;
return 4;
}
else {
*p++ = (UChar )(code & 0xff);
*p++ = (UChar )((code & 0xff00) >> 8);
return 2;
}
}
static int
utf16le_mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*p == 0x49) {
*fold++ = 0x31;
*fold = 0x01;
(*pp) += 2;
return 2;
}
}
#endif
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
*fold = 0;
*pp += 2;
return 2;
}
else
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,
fold);
}
#if 0
static int
utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
const UChar* end)
{
const UChar* p = *pp;
(*pp) += EncLen_UTF16[*(p+1)];
if (*(p+1) == 0) {
int c, v;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static UChar*
utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
{
if (s <= start) return (UChar* )s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
s -= 2;
return (UChar* )s;
}
static int
utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF16_LE = {
utf16le_mbc_enc_len,
"UTF-16LE", /* name */
4, /* max byte length */
2, /* min byte length */
utf16le_is_mbc_newline,
utf16le_mbc_to_code,
utf16le_code_to_mbclen,
utf16le_code_to_mbc,
utf16le_mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
utf16le_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf16le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};

184
oniguruma/enc/utf32_be.c Normal file
View file

@ -0,0 +1,184 @@
/**********************************************************************
utf32_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
utf32be_mbc_enc_len(const UChar* p ARG_UNUSED)
{
return 4;
}
static int
utf32be_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 3 < end) {
if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((
#ifndef USE_CRNL_AS_LINE_TERMINATOR
*(p+3) == 0x0d ||
#endif
*(p+3) == 0x85)
&& *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
return 1;
if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
&& *(p+1) == 0 && *p == 0)
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
}
static int
utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
{
return 4;
}
static int
utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
*p++ = (UChar )((code & 0xff000000) >>24);
*p++ = (UChar )((code & 0xff0000) >>16);
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar ) (code & 0xff);
return 4;
}
static int
utf32be_mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) {
*fold++ = 0;
*fold++ = 0;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*(p+3) == 0x49) {
*fold++ = 0x01;
*fold = 0x31;
(*pp) += 4;
return 4;
}
}
#endif
*fold++ = 0;
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3));
*pp += 4;
return 4;
}
else
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_BE, flag, pp, end,
fold);
}
#if 0
static int
utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += 4;
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
int c, v;
p += 3;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static UChar*
utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
{
int rem;
if (s <= start) return (UChar* )s;
rem = (s - start) % 4;
return (UChar* )(s - rem);
}
static int
utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_BE,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF32_BE = {
utf32be_mbc_enc_len,
"UTF-32BE", /* name */
4, /* max byte length */
4, /* min byte length */
utf32be_is_mbc_newline,
utf32be_mbc_to_code,
utf32be_code_to_mbclen,
utf32be_code_to_mbc,
utf32be_mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
utf32be_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf32be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};

184
oniguruma/enc/utf32_le.c Normal file
View file

@ -0,0 +1,184 @@
/**********************************************************************
utf32_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
utf32le_mbc_enc_len(const UChar* p ARG_UNUSED)
{
return 4;
}
static int
utf32le_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 3 < end) {
if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
if ((
#ifndef USE_CRNL_AS_LINE_TERMINATOR
*p == 0x0d ||
#endif
*p == 0x85)
&& *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00)
return 1;
if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
&& *(p+2) == 0x00 && *(p+3) == 0x00)
return 1;
#endif
}
return 0;
}
static OnigCodePoint
utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
}
static int
utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
{
return 4;
}
static int
utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
*p++ = (UChar ) (code & 0xff);
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar )((code & 0xff0000) >>16);
*p++ = (UChar )((code & 0xff000000) >>24);
return 4;
}
static int
utf32le_mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*p == 0x49) {
*fold++ = 0x31;
*fold++ = 0x01;
}
}
else {
#endif
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
*fold++ = 0;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
}
#endif
*fold++ = 0;
*fold = 0;
*pp += 4;
return 4;
}
else
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_LE, flag, pp, end,
fold);
}
#if 0
static int
utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += 4;
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
int c, v;
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
if ((v | BIT_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
#endif
static UChar*
utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
{
int rem;
if (s <= start) return (UChar* )s;
rem = (s - start) % 4;
return (UChar* )(s - rem);
}
static int
utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_LE,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF32_LE = {
utf32le_mbc_enc_len,
"UTF-32LE", /* name */
4, /* max byte length */
4, /* min byte length */
utf32le_is_mbc_newline,
utf32le_mbc_to_code,
utf32le_code_to_mbclen,
utf32le_code_to_mbc,
utf32le_mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
utf32le_get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
onigenc_utf16_32_get_ctype_code_range,
utf32le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};

305
oniguruma/enc/utf8.c Normal file
View file

@ -0,0 +1,305 @@
/**********************************************************************
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define USE_INVALID_CODE_SCHEME
#ifdef USE_INVALID_CODE_SCHEME
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
#define INVALID_CODE_FE 0xfffffffe
#define INVALID_CODE_FF 0xffffffff
#define VALID_CODE_LIMIT 0x7fffffff
#endif
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
static const int EncLen_UTF8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
};
static int
mbc_enc_len(const UChar* p)
{
return EncLen_UTF8[*p];
}
static int
is_mbc_newline(const UChar* p, const UChar* end)
{
if (p < end) {
if (*p == 0x0a) return 1;
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
#ifndef USE_CRNL_AS_LINE_TERMINATOR
if (*p == 0x0d) return 1;
#endif
if (p + 1 < end) {
if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
return 1;
if (p + 2 < end) {
if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
&& *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
return 1;
}
}
#endif
}
return 0;
}
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
int c, len;
OnigCodePoint n;
len = enclen(ONIG_ENCODING_UTF8, p);
c = *p++;
if (len > 1) {
len--;
n = c & ((1 << (6 - len)) - 1);
while (len--) {
c = *p++;
n = (n << 6) | (c & ((1 << 6) - 1));
}
return n;
}
else {
#ifdef USE_INVALID_CODE_SCHEME
if (c > 0xfd) {
return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
}
#endif
return (OnigCodePoint )c;
}
}
static int
code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xffffff80) == 0) return 1;
else if ((code & 0xfffff800) == 0) return 2;
else if ((code & 0xffff0000) == 0) return 3;
else if ((code & 0xffe00000) == 0) return 4;
else if ((code & 0xfc000000) == 0) return 5;
else if ((code & 0x80000000) == 0) return 6;
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) return 1;
else if (code == INVALID_CODE_FF) return 1;
#endif
else
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static int
code_to_mbc(OnigCodePoint code, UChar *buf)
{
#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
if ((code & 0xffffff80) == 0) {
*buf = (UChar )code;
return 1;
}
else {
UChar *p = buf;
if ((code & 0xfffff800) == 0) {
*p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
}
else if ((code & 0xffff0000) == 0) {
*p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xffe00000) == 0) {
*p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0xfc000000) == 0) {
*p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
else if ((code & 0x80000000) == 0) {
*p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
*p++ = UTF8_TRAILS(code, 24);
*p++ = UTF8_TRAILS(code, 18);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) {
*p = 0xfe;
return 1;
}
else if (code == INVALID_CODE_FF) {
*p = 0xff;
return 1;
}
#endif
else {
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
}
*p++ = UTF8_TRAIL0(code);
return p - buf;
}
}
static int
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
const UChar* end, UChar* fold)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
if (*p == 0x49) {
*fold++ = 0xc4;
*fold = 0xb1;
(*pp)++;
return 2;
}
}
#endif
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
else {
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag,
pp, end, fold);
}
}
#if 0
static int
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
(*pp)++;
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
else {
(*pp) += enclen(ONIG_ENCODING_UTF8, p);
if (*p == 0xc3) {
int c = *(p + 1);
if (c >= 0x80) {
if (c <= (UChar )0x9e) { /* upper */
if (c == (UChar )0x97) return FALSE;
return TRUE;
}
else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */
if (c == (UChar )'\267') return FALSE;
return TRUE;
}
else if (c == (UChar )0x9f &&
(flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
return TRUE;
}
}
}
}
return FALSE;
}
#endif
static int
get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
const OnigCodePoint* ranges[])
{
*sb_out = 0x80;
return onigenc_unicode_ctype_code_range(ctype, ranges);
}
static UChar*
left_adjust_char_head(const UChar* start, const UChar* s)
{
const UChar *p;
if (s <= start) return (UChar* )s;
p = s;
while (!utf8_islead(*p) && p > start) p--;
return (UChar* )p;
}
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8,
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF8 = {
mbc_enc_len,
"UTF-8", /* name */
6, /* max byte length */
1, /* min byte length */
is_mbc_newline,
mbc_to_code,
code_to_mbclen,
code_to_mbc,
mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
get_ctype_code_range,
left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
};

78
oniguruma/onig-config.in Normal file
View file

@ -0,0 +1,78 @@
#!/bin/sh
# Copyright (C) 2006 K.Kosako (sndgk393 AT ybb DOT ne DOT jp)
ONIG_VERSION=@PACKAGE_VERSION@
show_usage()
{
cat <<EOF
Usage: onig-config [OPTION]
Values for OPTION are:
--prefix[=DIR] change prefix to DIR
--prefix print prefix
--exec-prefix[=DIR] change exec_prefix to DIR
--exec-prefix print exec_prefix
--cflags print C compiler flags
--libs print library information
--version print oniguruma version
--help print this help
EOF
exit 1
}
if test $# -eq 0; then
show_usage
fi
prefix=@prefix@
exec_prefix=@exec_prefix@
is_set_exec_prefix=no
while test $# -gt 0; do
case "$1" in
-*=*) val=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'`
;;
*) val=
;;
esac
case $1 in
--prefix=*)
prefix=$val
if test $is_set_exec_prefix = no ; then
exec_prefix=$val
fi
;;
--prefix)
echo $prefix
;;
--exec-prefix=*)
exec_prefix=$val
is_set_exec_prefix=yes
;;
--exec-prefix)
echo $exec_prefix
;;
--cflags)
if test @includedir@ != /usr/include ; then
show_includedir=-I@includedir@
fi
echo $show_includedir
;;
--libs)
echo -L@libdir@ -lonig
;;
--version)
echo $ONIG_VERSION
;;
*)
show_usage
;;
esac
shift
done
# END

85
oniguruma/oniggnu.h Normal file
View file

@ -0,0 +1,85 @@
#ifndef ONIGGNU_H
#define ONIGGNU_H
/**********************************************************************
oniggnu.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "oniguruma.h"
#ifdef __cplusplus
extern "C" {
#endif
#define RE_MBCTYPE_ASCII 0
#define RE_MBCTYPE_EUC 1
#define RE_MBCTYPE_SJIS 2
#define RE_MBCTYPE_UTF8 3
/* GNU regex options */
#ifndef RE_NREGS
#define RE_NREGS ONIG_NREGION
#endif
#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY
#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE
#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP
#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP
ONIG_EXTERN
void re_mbcinit P_((int));
ONIG_EXTERN
int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
ONIG_EXTERN
int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
ONIG_EXTERN
void re_free_pattern P_((struct re_pattern_buffer*));
ONIG_EXTERN
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
ONIG_EXTERN
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
ONIG_EXTERN
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
ONIG_EXTERN
void re_set_casetable P_((const char*));
ONIG_EXTERN
void re_free_registers P_((struct re_registers*));
ONIG_EXTERN
int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
#ifdef __cplusplus
}
#endif
#endif /* ONIGGNU_H */

169
oniguruma/onigposix.h Normal file
View file

@ -0,0 +1,169 @@
#ifndef ONIGPOSIX_H
#define ONIGPOSIX_H
/**********************************************************************
onigposix.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/* options */
#define REG_ICASE (1<<0)
#define REG_NEWLINE (1<<1)
#define REG_NOTBOL (1<<2)
#define REG_NOTEOL (1<<3)
#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */
#define REG_NOSUB (1<<5)
/* POSIX error codes */
#define REG_NOMATCH 1
#define REG_BADPAT 2
#define REG_ECOLLATE 3
#define REG_ECTYPE 4
#define REG_EESCAPE 5
#define REG_ESUBREG 6
#define REG_EBRACK 7
#define REG_EPAREN 8
#define REG_EBRACE 9
#define REG_BADBR 10
#define REG_ERANGE 11
#define REG_ESPACE 12
#define REG_BADRPT 13
/* extended error codes */
#define REG_EONIG_INTERNAL 14
#define REG_EONIG_BADWC 15
#define REG_EONIG_BADARG 16
#define REG_EONIG_THREAD 17
/* character encodings (for reg_set_encoding()) */
#define REG_POSIX_ENCODING_ASCII 0
#define REG_POSIX_ENCODING_EUC_JP 1
#define REG_POSIX_ENCODING_SJIS 2
#define REG_POSIX_ENCODING_UTF8 3
#define REG_POSIX_ENCODING_UTF16_BE 4
#define REG_POSIX_ENCODING_UTF16_LE 5
typedef int regoff_t;
typedef struct {
regoff_t rm_so;
regoff_t rm_eo;
} regmatch_t;
/* POSIX regex_t */
typedef struct {
void* onig; /* Oniguruma regex_t* */
size_t re_nsub;
int comp_options;
} regex_t;
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
#else
# define P_(args) ()
#endif
#endif
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
#define ONIG_EXTERN extern __declspec(dllimport)
#endif
#endif
#endif
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
#ifndef ONIGURUMA_H
typedef unsigned int OnigOptionType;
/* syntax */
typedef struct {
unsigned int op;
unsigned int op2;
unsigned int behavior;
OnigOptionType options; /* default option */
} OnigSyntaxType;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
/* default syntax */
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
ONIG_EXTERN const char* onig_version P_((void));
ONIG_EXTERN const char* onig_copyright P_((void));
#endif /* ONIGURUMA_H */
ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
ONIG_EXTERN void regfree P_((regex_t* reg));
ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
/* extended API */
ONIG_EXTERN void reg_set_encoding P_((int enc));
ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums));
ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN int reg_number_of_names P_((regex_t* reg));
#ifdef __cplusplus
}
#endif
#endif /* ONIGPOSIX_H */

822
oniguruma/oniguruma.h Normal file
View file

@ -0,0 +1,822 @@
#ifndef ONIGURUMA_H
#define ONIGURUMA_H
/**********************************************************************
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifdef __cplusplus
extern "C" {
#endif
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 5
#define ONIGURUMA_VERSION_MINOR 9
#define ONIGURUMA_VERSION_TEENY 2
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
# define HAVE_PROTOTYPES 1
# endif
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
#ifdef HAVE_STDARG_H
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
#else
# define P_(args) ()
#endif
#endif
#ifndef PV_
#ifdef HAVE_STDARG_PROTOTYPES
# define PV_(args) args
#else
# define PV_(args) ()
#endif
#endif
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
#define ONIG_EXTERN extern __declspec(dllimport)
#endif
#endif
#endif
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
/* PART: character encoding */
#ifndef ONIG_ESCAPE_UCHAR_COLLISION
#define UChar OnigUChar
#endif
typedef unsigned char OnigUChar;
typedef unsigned long OnigCodePoint;
typedef unsigned int OnigCtype;
typedef unsigned int OnigDistance;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
typedef unsigned int OnigCaseFoldType; /* case fold flag */
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */
/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */
#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20)
#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30)
#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
/* 13 => Unicode:0x1ffc */
/* code range */
#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
typedef struct {
int byte_len; /* argument(original) character(s) byte length */
int code_len; /* number of code */
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
} OnigCaseFoldCodeItem;
typedef struct {
OnigCodePoint esc;
OnigCodePoint anychar;
OnigCodePoint anytime;
OnigCodePoint zero_or_one_time;
OnigCodePoint one_or_more_time;
OnigCodePoint anychar_anytime;
} OnigMetaCharTableType;
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
typedef struct OnigEncodingTypeST {
int (*mbc_enc_len)(const OnigUChar* p);
const char* name;
int max_enc_len;
int min_enc_len;
int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
int (*code_to_mbclen)(OnigCodePoint code);
int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]);
int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype);
int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]);
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
} OnigEncodingType;
typedef OnigEncodingType* OnigEncoding;
ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
ONIG_EXTERN OnigEncodingType OnigEncodingCP1251;
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE)
#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE)
#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE)
#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE)
#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS)
#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
#define ONIG_ENCODING_CP1251 (&OnigEncodingCP1251)
#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
/* work size */
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
/* 18: 6(max-byte) * 3(case-fold chars) */
/* character types */
#define ONIGENC_CTYPE_NEWLINE 0
#define ONIGENC_CTYPE_ALPHA 1
#define ONIGENC_CTYPE_BLANK 2
#define ONIGENC_CTYPE_CNTRL 3
#define ONIGENC_CTYPE_DIGIT 4
#define ONIGENC_CTYPE_GRAPH 5
#define ONIGENC_CTYPE_LOWER 6
#define ONIGENC_CTYPE_PRINT 7
#define ONIGENC_CTYPE_PUNCT 8
#define ONIGENC_CTYPE_SPACE 9
#define ONIGENC_CTYPE_UPPER 10
#define ONIGENC_CTYPE_XDIGIT 11
#define ONIGENC_CTYPE_WORD 12
#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
#define ONIGENC_CTYPE_ASCII 14
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
#define onig_enc_len(enc,p,end) ONIGENC_MBC_ENC_LEN(enc,p)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
#define ONIGENC_NAME(enc) ((enc)->name)
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s)
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
(enc)->apply_all_case_fold(case_fold_flag,f,arg)
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
(enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs)
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
(enc)->property_name_to_ctype(enc,p,end)
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype)
#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
#define ONIGENC_IS_CODE_PRINT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
#define ONIGENC_IS_CODE_ALNUM(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
#define ONIGENC_IS_CODE_ALPHA(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
#define ONIGENC_IS_CODE_LOWER(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
#define ONIGENC_IS_CODE_UPPER(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
#define ONIGENC_IS_CODE_CNTRL(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
#define ONIGENC_IS_CODE_PUNCT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
#define ONIGENC_IS_CODE_SPACE(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
#define ONIGENC_IS_CODE_BLANK(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
#define ONIGENC_IS_CODE_DIGIT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
#define ONIGENC_IS_CODE_WORD(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
(enc)->get_ctype_code_range(ctype,sbout,ranges)
ONIG_EXTERN
OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
/* encoding API */
ONIG_EXTERN
int onigenc_init P_((void));
ONIG_EXTERN
int onigenc_set_default_encoding P_((OnigEncoding enc));
ONIG_EXTERN
OnigEncoding onigenc_get_default_encoding P_((void));
ONIG_EXTERN
void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
ONIG_EXTERN
OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev));
ONIG_EXTERN
OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
ONIG_EXTERN
int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
ONIG_EXTERN
int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
/* PART: regular expression */
/* config parameters */
#define ONIG_NREGION 10
#define ONIG_MAX_BACKREF_NUM 1000
#define ONIG_MAX_REPEAT_NUM 100000
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
/* constants */
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
typedef unsigned int OnigOptionType;
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
/* options */
#define ONIG_OPTION_NONE 0U
#define ONIG_OPTION_IGNORECASE 1U
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
/* options (search time) */
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
/* syntax */
typedef struct {
unsigned int op;
unsigned int op2;
unsigned int behavior;
OnigOptionType options; /* default option */
OnigMetaCharTableType meta_char_table;
} OnigSyntaxType;
ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG)
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
/* default syntax */
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
/* syntax (operators) */
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */
#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
/* syntax (behavior) in char class [...] */
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
/* syntax (behavior) warning */
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
/* meta character specifiers (onig_set_meta_char()) */
#define ONIG_META_CHAR_ESCAPE 0
#define ONIG_META_CHAR_ANYCHAR 1
#define ONIG_META_CHAR_ANYTIME 2
#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
#define ONIG_INEFFECTIVE_META_CHAR 0
/* error codes */
#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
/* normal return */
#define ONIG_NORMAL 0
#define ONIG_MISMATCH -1
#define ONIG_NO_SUPPORT_CONFIG -2
/* internal error */
#define ONIGERR_MEMORY -5
#define ONIGERR_TYPE_BUG -6
#define ONIGERR_PARSER_BUG -11
#define ONIGERR_STACK_BUG -12
#define ONIGERR_UNDEFINED_BYTECODE -13
#define ONIGERR_UNEXPECTED_BYTECODE -14
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
/* general error */
#define ONIGERR_INVALID_ARGUMENT -30
/* syntax error */
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
#define ONIGERR_EMPTY_CHAR_CLASS -102
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
#define ONIGERR_END_PATTERN_AT_ESCAPE -104
#define ONIGERR_END_PATTERN_AT_META -105
#define ONIGERR_END_PATTERN_AT_CONTROL -106
#define ONIGERR_META_CODE_SYNTAX -108
#define ONIGERR_CONTROL_CODE_SYNTAX -109
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
#define ONIGERR_NESTED_REPEAT_OPERATOR -115
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
#define ONIGERR_END_PATTERN_IN_GROUP -118
#define ONIGERR_UNDEFINED_GROUP_OPTION -119
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
/* values error (syntax error) */
#define ONIGERR_TOO_BIG_NUMBER -200
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
#define ONIGERR_INVALID_BACKREF -208
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
#define ONIGERR_EMPTY_GROUP_NAME -214
#define ONIGERR_INVALID_GROUP_NAME -215
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
#define ONIGERR_NEVER_ENDING_RECURSION -221
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
/* errors related to thread */
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
typedef struct OnigCaptureTreeNodeStruct {
int group; /* group number */
int beg;
int end;
int allocated;
int num_childs;
struct OnigCaptureTreeNodeStruct** childs;
} OnigCaptureTreeNode;
/* match result region type */
struct re_registers {
int allocated;
int num_regs;
int* beg;
int* end;
/* extended */
OnigCaptureTreeNode* history_root; /* capture history tree root */
};
/* capture tree traverse */
#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
#define ONIG_REGION_NOTPOS -1
typedef struct re_registers OnigRegion;
typedef struct {
OnigEncoding enc;
OnigUChar* par;
OnigUChar* par_end;
} OnigErrorInfo;
typedef struct {
int lower;
int upper;
} OnigRepeatRange;
typedef void (*OnigWarnFunc) P_((const char* s));
extern void onig_null_warn P_((const char* s));
#define ONIG_NULL_WARN onig_null_warn
#define ONIG_CHAR_TABLE_SIZE 256
/* regex_t state */
#define ONIG_STATE_NORMAL 0
#define ONIG_STATE_SEARCHING 1
#define ONIG_STATE_COMPILING -1
#define ONIG_STATE_MODIFY -2
#define ONIG_STATE(reg) \
((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
typedef struct re_pattern_buffer {
/* common members of BBuf(bytes-buffer) */
unsigned char* p; /* compiled pattern */
unsigned int used; /* used space for p */
unsigned int alloc; /* allocated space for p */
int state; /* normal, searching, compiling */
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
int num_comb_exp_check; /* combination explosion check */
int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */
unsigned int bt_mem_start; /* need backtrack flag */
unsigned int bt_mem_end; /* need backtrack flag */
int stack_pop_level;
int repeat_range_alloc;
OnigRepeatRange* repeat_range;
OnigEncoding enc;
OnigOptionType options;
OnigSyntaxType* syntax;
OnigCaseFoldType case_fold_flag;
void* name_table;
/* optimization info (string search, char-map and anchors) */
int optimize; /* optimize flag */
int threshold_len; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
int *int_map; /* BM skip for exact_len > 255 */
int *int_map_backward; /* BM skip for backward search */
OnigDistance dmin; /* min-distance of exact or map */
OnigDistance dmax; /* max-distance of exact or map */
/* regex_t link chain */
struct re_pattern_buffer* chain; /* escape compile-conflict */
} OnigRegexType;
typedef OnigRegexType* OnigRegex;
#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
typedef OnigRegexType regex_t;
#endif
typedef struct {
int num_of_elements;
OnigEncoding pattern_enc;
OnigEncoding target_enc;
OnigSyntaxType* syntax;
OnigOptionType option;
OnigCaseFoldType case_fold_flag;
} OnigCompileInfo;
/* Oniguruma Native API */
ONIG_EXTERN
int onig_init P_((void));
ONIG_EXTERN
int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
ONIG_EXTERN
void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));
int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
void onig_free P_((OnigRegex));
ONIG_EXTERN
void onig_free_body P_((OnigRegex));
ONIG_EXTERN
int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN
void onig_region_init P_((OnigRegion* region));
ONIG_EXTERN
void onig_region_free P_((OnigRegion* region, int free_self));
ONIG_EXTERN
void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
ONIG_EXTERN
void onig_region_clear P_((OnigRegion* region));
ONIG_EXTERN
int onig_region_resize P_((OnigRegion* region, int n));
ONIG_EXTERN
int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
ONIG_EXTERN
int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
ONIG_EXTERN
int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
ONIG_EXTERN
int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
ONIG_EXTERN
int onig_number_of_names P_((OnigRegex reg));
ONIG_EXTERN
int onig_number_of_captures P_((OnigRegex reg));
ONIG_EXTERN
int onig_number_of_capture_histories P_((OnigRegex reg));
ONIG_EXTERN
OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
ONIG_EXTERN
int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
ONIG_EXTERN
int onig_noname_group_capture_is_active P_((OnigRegex reg));
ONIG_EXTERN
OnigEncoding onig_get_encoding P_((OnigRegex reg));
ONIG_EXTERN
OnigOptionType onig_get_options P_((OnigRegex reg));
ONIG_EXTERN
OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg));
ONIG_EXTERN
OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
ONIG_EXTERN
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
ONIG_EXTERN
unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
ONIG_EXTERN
unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
ONIG_EXTERN
unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
ONIG_EXTERN
OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
ONIG_EXTERN
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
ONIG_EXTERN
void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
ONIG_EXTERN
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
ONIG_EXTERN
int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code));
ONIG_EXTERN
void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
ONIG_EXTERN
OnigCaseFoldType onig_get_default_case_fold_flag P_((void));
ONIG_EXTERN
int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag));
ONIG_EXTERN
unsigned int onig_get_match_stack_limit_size P_((void));
ONIG_EXTERN
int onig_set_match_stack_limit_size P_((unsigned int size));
ONIG_EXTERN
int onig_end P_((void));
ONIG_EXTERN
const char* onig_version P_((void));
ONIG_EXTERN
const char* onig_copyright P_((void));
#ifdef __cplusplus
}
#endif
#endif /* ONIGURUMA_H */

6254
oniguruma/regcomp.c Normal file

File diff suppressed because it is too large Load diff

902
oniguruma/regenc.c Normal file
View file

@ -0,0 +1,902 @@
/**********************************************************************
regenc.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
extern int
onigenc_init(void)
{
return 0;
}
extern OnigEncoding
onigenc_get_default_encoding(void)
{
return OnigEncDefaultCharEncoding;
}
extern int
onigenc_set_default_encoding(OnigEncoding enc)
{
OnigEncDefaultCharEncoding = enc;
return 0;
}
extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
p += enclen(enc, p);
}
return p;
}
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
const UChar* start, const UChar* s, const UChar** prev)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
if (prev) *prev = (const UChar* )p;
p += enclen(enc, p);
}
else {
if (prev) *prev = (const UChar* )NULL; /* Sorry */
}
return p;
}
extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
if (s <= start)
return (UChar* )NULL;
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
extern UChar*
onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
{
while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
if (s <= start)
return (UChar* )NULL;
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
return (UChar* )s;
}
extern UChar*
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
{
UChar* q = (UChar* )p;
while (n-- > 0) {
q += ONIGENC_MBC_ENC_LEN(enc, q);
}
return (q <= end ? q : NULL);
}
extern int
onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
{
int n = 0;
UChar* q = (UChar* )p;
while (q < end) {
q += ONIGENC_MBC_ENC_LEN(enc, q);
n++;
}
return n;
}
extern int
onigenc_strlen_null(OnigEncoding enc, const UChar* s)
{
int n = 0;
UChar* p = (UChar* )s;
while (1) {
if (*p == '\0') {
UChar* q;
int len = ONIGENC_MBC_MINLEN(enc);
if (len == 1) return n;
q = p + 1;
while (len > 1) {
if (*q != '\0') break;
q++;
len--;
}
if (len == 1) return n;
}
p += ONIGENC_MBC_ENC_LEN(enc, p);
n++;
}
}
extern int
onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
{
UChar* start = (UChar* )s;
UChar* p = (UChar* )s;
while (1) {
if (*p == '\0') {
UChar* q;
int len = ONIGENC_MBC_MINLEN(enc);
if (len == 1) return (int )(p - start);
q = p + 1;
while (len > 1) {
if (*q != '\0') break;
q++;
len--;
}
if (len == 1) return (int )(p - start);
}
p += ONIGENC_MBC_ENC_LEN(enc, p);
}
}
const UChar OnigEncAsciiToLowerCaseTable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
#ifdef USE_UPPER_CASE_TABLE
const UChar OnigEncAsciiToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
#endif
const unsigned short OnigEncAsciiCtypeTable[256] = {
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
#ifdef USE_UPPER_CASE_TABLE
const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
};
#endif
extern void
onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
{
/* nothing */
/* obsoleted. */
}
extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
{ 0x41, 0x61 },
{ 0x42, 0x62 },
{ 0x43, 0x63 },
{ 0x44, 0x64 },
{ 0x45, 0x65 },
{ 0x46, 0x66 },
{ 0x47, 0x67 },
{ 0x48, 0x68 },
{ 0x49, 0x69 },
{ 0x4a, 0x6a },
{ 0x4b, 0x6b },
{ 0x4c, 0x6c },
{ 0x4d, 0x6d },
{ 0x4e, 0x6e },
{ 0x4f, 0x6f },
{ 0x50, 0x70 },
{ 0x51, 0x71 },
{ 0x52, 0x72 },
{ 0x53, 0x73 },
{ 0x54, 0x74 },
{ 0x55, 0x75 },
{ 0x56, 0x76 },
{ 0x57, 0x77 },
{ 0x58, 0x78 },
{ 0x59, 0x79 },
{ 0x5a, 0x7a }
};
extern int
onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg)
{
OnigCodePoint code;
int i, r;
for (i = 0;
i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
i++) {
code = OnigAsciiLowerMap[i].to;
r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
if (r != 0) return r;
code = OnigAsciiLowerMap[i].from;
r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
if (r != 0) return r;
}
return 0;
}
extern int
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
return 1;
}
else if (0x61 <= *p && *p <= 0x7a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
return 1;
}
else
return 0;
}
static int
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg)
{
static OnigCodePoint ss[] = { 0x73, 0x73 };
return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
}
extern int
onigenc_apply_all_case_fold_with_map(int map_size,
const OnigPairCaseFoldCodes map[],
int ess_tsett_flag, OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg)
{
OnigCodePoint code;
int i, r;
r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
if (r != 0) return r;
for (i = 0; i < map_size; i++) {
code = map[i].to;
r = (*f)(map[i].from, &code, 1, arg);
if (r != 0) return r;
code = map[i].from;
r = (*f)(map[i].to, &code, 1, arg);
if (r != 0) return r;
}
if (ess_tsett_flag != 0)
return ss_apply_all_case_fold(flag, f, arg);
return 0;
}
extern int
onigenc_get_case_fold_codes_by_str_with_map(int map_size,
const OnigPairCaseFoldCodes map[],
int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
/* SS */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
}
else if (0x61 <= *p && *p <= 0x7a) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
/* ss */
items[1].byte_len = 2;
items[1].code_len = 1;
items[1].code[0] = (OnigCodePoint )0xdf;
return 2;
}
else
return 1;
}
else if (*p == 0xdf && ess_tsett_flag != 0) {
items[0].byte_len = 1;
items[0].code_len = 2;
items[0].code[0] = (OnigCodePoint )'s';
items[0].code[1] = (OnigCodePoint )'s';
items[1].byte_len = 1;
items[1].code_len = 2;
items[1].code[0] = (OnigCodePoint )'S';
items[1].code[1] = (OnigCodePoint )'S';
items[2].byte_len = 1;
items[2].code_len = 2;
items[2].code[0] = (OnigCodePoint )'s';
items[2].code[1] = (OnigCodePoint )'S';
items[3].byte_len = 1;
items[3].code_len = 2;
items[3].code[0] = (OnigCodePoint )'S';
items[3].code[1] = (OnigCodePoint )'s';
return 4;
}
else {
int i;
for (i = 0; i < map_size; i++) {
if (*p == map[i].from) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = map[i].to;
return 1;
}
else if (*p == map[i].to) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = map[i].from;
return 1;
}
}
}
return 0;
}
extern int
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
OnigCodePoint* sb_out ARG_UNUSED,
const OnigCodePoint* ranges[] ARG_UNUSED)
{
return ONIG_NO_SUPPORT_CONFIG;
}
extern int
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
{
if (p < end) {
if (*p == 0x0a) return 1;
}
return 0;
}
/* for single byte encodings */
extern int
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
const UChar*end ARG_UNUSED, UChar* lower)
{
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
(*p)++;
return 1; /* return byte length of converted char to lower */
}
#if 0
extern int
onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp)++;
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
#endif
extern int
onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
{
return 1;
}
extern OnigCodePoint
onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
return (OnigCodePoint )(*p);
}
extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
{
return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
}
extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
{
*buf = (UChar )(code & 0xff);
return 1;
}
extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
const UChar* s)
{
return (UChar* )s;
}
extern int
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED)
{
return TRUE;
}
extern int
onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED)
{
return FALSE;
}
extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
len = enclen(enc, p);
n = (OnigCodePoint )(*p++);
if (len == 1) return n;
for (i = 1; i < len; i++) {
if (p >= end) break;
c = *p++;
n <<= 8; n += c;
}
return n;
}
extern int
onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED,
UChar* lower)
{
int len;
const UChar *p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
(*pp)++;
return 1;
}
else {
int i;
len = enclen(enc, p);
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
(*pp) += len;
return len; /* return byte length of converted to lower char */
}
}
#if 0
extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
(*pp)++;
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
(*pp) += enclen(enc, p);
return FALSE;
}
#endif
extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff00) != 0) return 2;
else return 1;
}
extern int
onigenc_mb4_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff000000) != 0) return 4;
else if ((code & 0xff0000) != 0) return 3;
else if ((code & 0xff00) != 0) return 2;
else return 1;
}
extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff00) != 0) {
*p++ = (UChar )((code >> 8) & 0xff);
}
*p++ = (UChar )(code & 0xff);
#if 1
if (enclen(enc, buf) != (p - buf))
return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
return p - buf;
}
extern int
onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
if ((code & 0xff000000) != 0) {
*p++ = (UChar )((code >> 24) & 0xff);
}
if ((code & 0xff0000) != 0 || p != buf) {
*p++ = (UChar )((code >> 16) & 0xff);
}
if ((code & 0xff00) != 0 || p != buf) {
*p++ = (UChar )((code >> 8) & 0xff);
}
*p++ = (UChar )(code & 0xff);
#if 1
if (enclen(enc, buf) != (p - buf))
return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
return p - buf;
}
extern int
onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
static PosixBracketEntryType PBS[] = {
{ (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
{ (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
{ (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
{ (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
{ (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
{ (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
{ (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
{ (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
{ (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
{ (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
{ (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
{ (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
{ (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
{ (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },
{ (UChar* )NULL, -1, 0 }
};
PosixBracketEntryType *pb;
int len;
len = onigenc_strlen(enc, p, end);
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
if (len == pb->len &&
onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
return pb->ctype;
}
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
}
extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
}
}
return FALSE;
}
extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
}
}
return FALSE;
}
extern int
onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
const UChar* sascii /* ascii */, int n)
{
int x, c;
while (n-- > 0) {
if (p >= end) return (int )(*sascii);
c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
x = *sascii - c;
if (x) return x;
sascii++;
p += enclen(enc, p);
}
return 0;
}
/* Property management */
static int
resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
{
int size;
const OnigCodePoint **list = *plist;
size = sizeof(OnigCodePoint*) * new_size;
if (IS_NULL(list)) {
list = (const OnigCodePoint** )xmalloc(size);
}
else {
list = (const OnigCodePoint** )xrealloc((void* )list, size);
}
if (IS_NULL(list)) return ONIGERR_MEMORY;
*plist = list;
*psize = new_size;
return 0;
}
extern int
onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
int *psize)
{
#define PROP_INIT_SIZE 16
int r;
if (*psize <= *pnum) {
int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
r = resize_property_list(new_size, plist, psize);
if (r != 0) return r;
}
(*plist)[*pnum] = prop;
if (ONIG_IS_NULL(*table)) {
*table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
}
*pnum = *pnum + 1;
onig_st_insert_strend(*table, name, name + strlen((char* )name),
(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
return 0;
}
extern int
onigenc_property_list_init(int (*f)(void))
{
int r;
THREAD_ATOMIC_START;
r = f();
THREAD_ATOMIC_END;
return r;
}

189
oniguruma/regenc.h Normal file
View file

@ -0,0 +1,189 @@
#ifndef REGENC_H
#define REGENC_H
/**********************************************************************
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef PACKAGE
/* PACKAGE is defined in config.h */
#include "config.h"
#endif
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#include "oniguruma.h"
typedef struct {
OnigCodePoint from;
OnigCodePoint to;
} OnigPairCaseFoldCodes;
#ifndef NULL
#define NULL ((void* )0)
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifndef ARG_UNUSED
#if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
#else
# define ARG_UNUSED
#endif
#endif
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
/* character types bit flag */
#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)
#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)
#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)
#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)
#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)
#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)
#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)
#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)
#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)
#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)
#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)
#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)
#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)
#define CTYPE_TO_BIT(ctype) (1<<(ctype))
#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
(ctype) == ONIGENC_CTYPE_PRINT)
typedef struct {
UChar *name;
int ctype;
short int len;
} PosixBracketEntryType;
/* #define USE_CRNL_AS_LINE_TERMINATOR */
#define USE_UNICODE_PROPERTIES
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
/* methods for single byte encoding */
ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
/* methods for multi byte encoding */
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
/* in enc/unicode.c */
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
OnigEncISO_8859_1_ToLowerCaseTable[c]
#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
OnigEncISO_8859_1_ToUpperCaseTable[c]
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
ONIG_EXTERN int
onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
ONIG_EXTERN UChar*
onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
/* defined in regexec.c, but used in enc/xxx.c */
extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
(ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
#endif /* REGENC_H */

387
oniguruma/regerror.c Normal file
View file

@ -0,0 +1,387 @@
/**********************************************************************
regerror.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#include <stdio.h> /* for vsnprintf() */
#ifdef HAVE_STDARG_PROTOTYPES
#include <stdarg.h>
#define va_init_list(a,b) va_start(a,b)
#else
#include <varargs.h>
#define va_init_list(a,b) va_start(a)
#endif
extern UChar*
onig_error_code_to_format(int code)
{
char *p;
if (code >= 0) return (UChar* )0;
switch (code) {
case ONIG_MISMATCH:
p = "mismatch"; break;
case ONIG_NO_SUPPORT_CONFIG:
p = "no support in this configuration"; break;
case ONIGERR_MEMORY:
p = "fail to memory allocation"; break;
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
case ONIGERR_TYPE_BUG:
p = "undefined type (bug)"; break;
case ONIGERR_PARSER_BUG:
p = "internal parser error (bug)"; break;
case ONIGERR_STACK_BUG:
p = "stack error (bug)"; break;
case ONIGERR_UNDEFINED_BYTECODE:
p = "undefined bytecode (bug)"; break;
case ONIGERR_UNEXPECTED_BYTECODE:
p = "unexpected bytecode (bug)"; break;
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
p = "default multibyte-encoding is not setted"; break;
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
p = "can't convert to wide-char on specified multibyte-encoding"; break;
case ONIGERR_INVALID_ARGUMENT:
p = "invalid argument"; break;
case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
p = "end pattern at left brace"; break;
case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
p = "end pattern at left bracket"; break;
case ONIGERR_EMPTY_CHAR_CLASS:
p = "empty char-class"; break;
case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
p = "premature end of char-class"; break;
case ONIGERR_END_PATTERN_AT_ESCAPE:
p = "end pattern at escape"; break;
case ONIGERR_END_PATTERN_AT_META:
p = "end pattern at meta"; break;
case ONIGERR_END_PATTERN_AT_CONTROL:
p = "end pattern at control"; break;
case ONIGERR_META_CODE_SYNTAX:
p = "invalid meta-code syntax"; break;
case ONIGERR_CONTROL_CODE_SYNTAX:
p = "invalid control-code syntax"; break;
case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
p = "char-class value at end of range"; break;
case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
p = "char-class value at start of range"; break;
case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
p = "unmatched range specifier in char-class"; break;
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
p = "target of repeat operator is not specified"; break;
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
p = "target of repeat operator is invalid"; break;
case ONIGERR_NESTED_REPEAT_OPERATOR:
p = "nested repeat operator"; break;
case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
p = "unmatched close parenthesis"; break;
case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
p = "end pattern with unmatched parenthesis"; break;
case ONIGERR_END_PATTERN_IN_GROUP:
p = "end pattern in group"; break;
case ONIGERR_UNDEFINED_GROUP_OPTION:
p = "undefined group option"; break;
case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
p = "invalid POSIX bracket type"; break;
case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
p = "invalid pattern in look-behind"; break;
case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
p = "invalid repeat range {lower,upper}"; break;
case ONIGERR_TOO_BIG_NUMBER:
p = "too big number"; break;
case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
p = "too big number for repeat range"; break;
case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
p = "upper is smaller than lower in repeat range"; break;
case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
p = "empty range in char class"; break;
case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
p = "mismatch multibyte code length in char-class range"; break;
case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
p = "too many multibyte code ranges are specified"; break;
case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
p = "too short multibyte code string"; break;
case ONIGERR_TOO_BIG_BACKREF_NUMBER:
p = "too big backref number"; break;
case ONIGERR_INVALID_BACKREF:
#ifdef USE_NAMED_GROUP
p = "invalid backref number/name"; break;
#else
p = "invalid backref number"; break;
#endif
case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
p = "numbered backref/call is not allowed. (use name)"; break;
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
p = "too big wide-char value"; break;
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
p = "too long wide-char value"; break;
case ONIGERR_INVALID_CODE_POINT_VALUE:
p = "invalid code point value"; break;
case ONIGERR_EMPTY_GROUP_NAME:
p = "group name is empty"; break;
case ONIGERR_INVALID_GROUP_NAME:
p = "invalid group name <%n>"; break;
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
#ifdef USE_NAMED_GROUP
p = "invalid char in group name <%n>"; break;
#else
p = "invalid char in group number <%n>"; break;
#endif
case ONIGERR_UNDEFINED_NAME_REFERENCE:
p = "undefined name <%n> reference"; break;
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
p = "undefined group <%n> reference"; break;
case ONIGERR_MULTIPLEX_DEFINED_NAME:
p = "multiplex defined name <%n>"; break;
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
p = "multiplex definition name <%n> call"; break;
case ONIGERR_NEVER_ENDING_RECURSION:
p = "never ending recursion"; break;
case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
p = "group number is too big for capture history"; break;
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
p = "invalid character property name {%n}"; break;
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
p = "not supported encoding combination"; break;
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
p = "invalid combination of options"; break;
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
p = "over thread pass limit count"; break;
default:
p = "undefined error code"; break;
}
return (UChar* )p;
}
static void sprint_byte(char* s, unsigned int v)
{
sprintf(s, "%02x", (v & 0377));
}
static void sprint_byte_with_x(char* s, unsigned int v)
{
sprintf(s, "\\x%02x", (v & 0377));
}
static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
UChar buf[], int buf_size, int *is_over)
{
int len;
UChar *p;
OnigCodePoint code;
if (ONIGENC_MBC_MINLEN(enc) > 1) {
p = s;
len = 0;
while (p < end) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (code >= 0x80) {
if (code > 0xffff && len + 10 <= buf_size) {
sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
len += 10;
}
else if (len + 6 <= buf_size) {
sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
len += 6;
}
else {
break;
}
}
else {
buf[len++] = (UChar )code;
}
p += enclen(enc, p);
if (len >= buf_size) break;
}
*is_over = ((p < end) ? 1 : 0);
}
else {
len = MIN((end - s), buf_size);
xmemcpy(buf, s, (size_t )len);
*is_over = ((buf_size < (end - s)) ? 1 : 0);
}
return len;
}
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
extern int
#ifdef HAVE_STDARG_PROTOTYPES
onig_error_code_to_str(UChar* s, int code, ...)
#else
onig_error_code_to_str(s, code, va_alist)
UChar* s;
int code;
va_dcl
#endif
{
UChar *p, *q;
OnigErrorInfo* einfo;
int len, is_over;
UChar parbuf[MAX_ERROR_PAR_LEN];
va_list vargs;
va_init_list(vargs, code);
switch (code) {
case ONIGERR_UNDEFINED_NAME_REFERENCE:
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
case ONIGERR_MULTIPLEX_DEFINED_NAME:
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
case ONIGERR_INVALID_GROUP_NAME:
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
einfo = va_arg(vargs, OnigErrorInfo*);
len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
q = onig_error_code_to_format(code);
p = s;
while (*q != '\0') {
if (*q == '%') {
q++;
if (*q == 'n') { /* '%n': name */
xmemcpy(p, parbuf, len);
p += len;
if (is_over != 0) {
xmemcpy(p, "...", 3);
p += 3;
}
q++;
}
else
goto normal_char;
}
else {
normal_char:
*p++ = *q++;
}
}
*p = '\0';
len = p - s;
break;
default:
q = onig_error_code_to_format(code);
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
xmemcpy(s, q, len);
s[len] = '\0';
break;
}
va_end(vargs);
return len;
}
void
#ifdef HAVE_STDARG_PROTOTYPES
onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
UChar* pat, UChar* pat_end, const UChar *fmt, ...)
#else
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
UChar buf[];
int bufsize;
OnigEncoding enc;
UChar* pat;
UChar* pat_end;
const UChar *fmt;
va_dcl
#endif
{
int n, need, len;
UChar *p, *s, *bp;
UChar bs[6];
va_list args;
va_init_list(args, fmt);
n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
va_end(args);
need = (pat_end - pat) * 4 + 4;
if (n + need < bufsize) {
strcat((char* )buf, ": /");
s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
p = pat;
while (p < pat_end) {
if (*p == '\\') {
*s++ = *p++;
len = enclen(enc, p);
while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
*s++ = (unsigned char )'\\';
*s++ = *p++;
}
else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
len = enclen(enc, p);
if (ONIGENC_MBC_MINLEN(enc) == 1) {
while (len-- > 0) *s++ = *p++;
}
else { /* for UTF16 */
int blen;
while (len-- > 0) {
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (blen-- > 0) *s++ = *bp++;
}
}
}
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (len-- > 0) *s++ = *bp++;
}
else {
*s++ = *p++;
}
}
*s++ = '/';
*s = '\0';
}
}

3803
oniguruma/regexec.c Normal file

File diff suppressed because it is too large Load diff

222
oniguruma/regext.c Normal file
View file

@ -0,0 +1,222 @@
/**********************************************************************
regext.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
static void
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
}
}
static void
conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
}
}
static void
conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[3];
*conv++ = s[2];
*conv++ = s[1];
*conv++ = s[0];
s += 4;
}
}
static void
conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[1];
*conv++ = s[0];
s += 2;
}
}
static int
conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
UChar** conv, UChar** conv_end)
{
int len = end - s;
if (to == ONIG_ENCODING_UTF16_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 2);
conv_ext0be(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_LE) {
swap16:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + len;
conv_swap2bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF16_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 2);
conv_ext0le(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_BE) {
goto swap16;
}
}
if (to == ONIG_ENCODING_UTF32_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 4);
conv_ext0be32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_LE) {
swap32:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + len;
conv_swap4bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF32_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_MEMERR(*conv);
*conv_end = *conv + (len * 4);
conv_ext0le32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_BE) {
goto swap32;
}
}
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
extern int
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
{
int r;
UChar *cpat, *cpat_end;
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
if (ci->pattern_enc != ci->target_enc) {
r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
&cpat, &cpat_end);
if (r) return r;
}
else {
cpat = (UChar* )pattern;
cpat_end = (UChar* )pattern_end;
}
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) {
r = ONIGERR_MEMORY;
goto err2;
}
r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc,
ci->syntax);
if (r) goto err;
r = onig_compile(*reg, cpat, cpat_end, einfo);
if (r) {
err:
onig_free(*reg);
*reg = NULL;
}
err2:
if (cpat != pattern) xfree(cpat);
return r;
}
#ifdef USE_RECOMPILE_API
extern int
onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
{
int r;
regex_t *new_reg;
r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
if (r) return r;
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_transfer(reg, new_reg);
}
else {
onig_chain_link_add(reg, new_reg);
}
return 0;
}
#endif

167
oniguruma/reggnu.c Normal file
View file

@ -0,0 +1,167 @@
/**********************************************************************
reggnu.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#ifndef ONIGGNU_H
#include "oniggnu.h"
#endif
extern void
re_free_registers(OnigRegion* r)
{
/* 0: don't free self */
onig_region_free(r, 0);
}
extern int
re_adjust_startpos(regex_t* reg, const char* string, int size,
int startpos, int range)
{
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
UChar *p;
UChar *s = (UChar* )string + startpos;
if (range > 0) {
p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
}
else {
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
}
return p - (UChar* )string;
}
return startpos;
}
extern int
re_match(regex_t* reg, const char* str, int size, int pos,
struct re_registers* regs)
{
return onig_match(reg, (UChar* )str, (UChar* )(str + size),
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
}
extern int
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
struct re_registers* regs)
{
return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
(UChar* )(string + startpos),
(UChar* )(string + startpos + range),
regs, ONIG_OPTION_NONE);
}
extern int
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
OnigErrorInfo einfo;
r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
if (r != ONIG_NORMAL) {
if (IS_NOT_NULL(ebuf))
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
}
#ifdef USE_RECOMPILE_API
extern int
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
{
int r;
OnigErrorInfo einfo;
OnigEncoding enc;
/* I think encoding and options should be arguments of this function.
But this is adapted to present re.c. (2002/11/29)
*/
enc = OnigEncDefaultCharEncoding;
r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
reg->options, enc, OnigDefaultSyntax, &einfo);
if (r != ONIG_NORMAL) {
if (IS_NOT_NULL(ebuf))
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
}
return r;
}
#endif
extern void
re_free_pattern(regex_t* reg)
{
onig_free(reg);
}
extern int
re_alloc_pattern(regex_t** reg)
{
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
return onig_reg_init(*reg, ONIG_OPTION_DEFAULT,
ONIGENC_CASE_FOLD_DEFAULT,
OnigEncDefaultCharEncoding,
OnigDefaultSyntax);
}
extern void
re_set_casetable(const char* table)
{
onigenc_set_default_caseconv_table((UChar* )table);
}
extern void
re_mbcinit(int mb_code)
{
OnigEncoding enc;
switch (mb_code) {
case RE_MBCTYPE_ASCII:
enc = ONIG_ENCODING_ASCII;
break;
case RE_MBCTYPE_EUC:
enc = ONIG_ENCODING_EUC_JP;
break;
case RE_MBCTYPE_SJIS:
enc = ONIG_ENCODING_SJIS;
break;
case RE_MBCTYPE_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
default:
return ;
break;
}
onigenc_set_default_encoding(enc);
}

808
oniguruma/regint.h Normal file
View file

@ -0,0 +1,808 @@
#ifndef REGINT_H
#define REGINT_H
/**********************************************************************
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* for debug */
/* #define ONIG_DEBUG_PARSE_TREE */
/* #define ONIG_DEBUG_COMPILE */
/* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */
/* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */
/* #define ONIG_DEBUG_STATISTICS */
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
defined(ONIG_DEBUG_STATISTICS)
#ifndef ONIG_DEBUG
#define ONIG_DEBUG
#endif
#endif
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
(defined(__ppc__) && defined(__APPLE__)) || \
defined(__x86_64) || defined(__x86_64__) || \
defined(__mc68020__)
#define PLATFORM_UNALIGNED_WORD_ACCESS
#endif
/* config */
/* spec. config */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
/* #define USE_RECOMPILE_API */
/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
/* internal config */
#define USE_PARSE_TREE_NODE_RECYCLE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QTFR_PEEK_NEXT
#define USE_ST_LIBRARY
#define USE_SHARED_CCLASS_TABLE
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
#if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
#else
# define ARG_UNUSED
#endif
/* */
/* escape other system UChar definition */
#include "config.h"
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#define USE_WORD_BEGIN_END /* "\<", "\>" */
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_POSIX_API_REGION_OPTION
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
/* #define USE_MULTI_THREAD_SYSTEM */
#define THREAD_SYSTEM_INIT /* depend on thread system */
#define THREAD_SYSTEM_END /* depend on thread system */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
#define xfree free
#define CHECK_INTERRUPT_IN_MATCH_AT
#define st_init_table onig_st_init_table
#define st_init_table_with_size onig_st_init_table_with_size
#define st_init_numtable onig_st_init_numtable
#define st_init_numtable_with_size onig_st_init_numtable_with_size
#define st_init_strtable onig_st_init_strtable
#define st_init_strtable_with_size onig_st_init_strtable_with_size
#define st_delete onig_st_delete
#define st_delete_safe onig_st_delete_safe
#define st_insert onig_st_insert
#define st_lookup onig_st_lookup
#define st_foreach onig_st_foreach
#define st_add_direct onig_st_add_direct
#define st_free_table onig_st_free_table
#define st_cleanup_safe onig_st_cleanup_safe
#define st_copy onig_st_copy
#define st_nothing_key_clone onig_st_nothing_key_clone
#define st_nothing_key_free onig_st_nothing_key_free
/* */
#define onig_st_is_member st_is_member
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
#if defined(_WIN32) && !defined(__GNUC__)
#define xalloca _alloca
#define xvsnprintf _vsnprintf
#else
#define xalloca alloca
#define xvsnprintf vsnprintf
#endif
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
#define ONIG_STATE_INC(reg) (reg)->state++
#define ONIG_STATE_DEC(reg) (reg)->state--
#define ONIG_STATE_INC_THREAD(reg) do {\
THREAD_ATOMIC_START;\
(reg)->state++;\
THREAD_ATOMIC_END;\
} while(0)
#define ONIG_STATE_DEC_THREAD(reg) do {\
THREAD_ATOMIC_START;\
(reg)->state--;\
THREAD_ATOMIC_END;\
} while(0)
#else
#define ONIG_STATE_INC(reg) /* Nothing */
#define ONIG_STATE_DEC(reg) /* Nothing */
#define ONIG_STATE_INC_THREAD(reg) /* Nothing */
#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__)
#include <alloca.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <ctype.h>
#ifdef HAVE_SYS_TYPES_H
#ifndef __BORLANDC__
#include <sys/types.h>
#endif
#endif
#ifdef __BORLANDC__
#include <malloc.h>
#endif
#ifdef ONIG_DEBUG
# include <stdio.h>
#endif
#include "regenc.h"
#ifdef MIN
#undef MIN
#endif
#ifdef MAX
#undef MAX
#endif
#define MIN(a,b) (((a)>(b))?(b):(a))
#define MAX(a,b) (((a)<(b))?(b):(a))
#define IS_NULL(p) (((void*)(p)) == (void*)0)
#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
#define NULL_UCHARP ((UChar* )0)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define PLATFORM_GET_INC(val,p,type) do{\
val = *(type* )p;\
(p) += sizeof(type);\
} while(0)
#else
#define PLATFORM_GET_INC(val,p,type) do{\
xmemcpy(&val, (p), sizeof(type));\
(p) += sizeof(type);\
} while(0)
/* sizeof(OnigCodePoint) */
#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
(pad_size) = WORD_ALIGNMENT_SIZE \
- ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
} while (0)
#define ALIGNMENT_RIGHT(addr) do {\
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
(addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
} while (0)
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
/* stack pop level */
#define STACK_POP_LEVEL_FREE 0
#define STACK_POP_LEVEL_MEM_START 1
#define STACK_POP_LEVEL_ALL 2
/* optimize flags */
#define ONIG_OPTIMIZE_NONE 0
#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
#define ONIG_OPTIMIZE_MAP 5 /* char map */
/* bit status */
typedef unsigned int BitStatusType;
#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
#define BIT_STATUS_CLEAR(stats) (stats) = 0
#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
#define BIT_STATUS_AT(stats,n) \
((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
#define BIT_STATUS_ON_AT(stats,n) do {\
if ((n) < (int )BIT_STATUS_BITS_NUM) \
(stats) |= (1 << (n));\
else\
(stats) |= 1;\
} while (0)
#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
if ((n) < (int )BIT_STATUS_BITS_NUM)\
(stats) |= (1 << (n));\
} while (0)
#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
#define DIGITVAL(code) ((code) - '0')
#define ODIGITVAL(code) DIGITVAL(code)
#define XDIGITVAL(enc,code) \
(ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
: (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
#define IS_FIND_CONDITION(option) ((option) & \
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
/* OP_SET_OPTION is required for these options.
#define IS_DYNAMIC_OPTION(option) \
(((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
*/
/* ignore-case and multibyte status are included in compiled code. */
#define IS_DYNAMIC_OPTION(option) 0
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
/* bitset */
#define BITS_PER_BYTE 8
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
typedef unsigned int Bits;
#else
typedef unsigned char Bits;
#endif
typedef Bits BitSet[BITSET_SIZE];
typedef Bits* BitSetRef;
#define SIZE_BITSET sizeof(BitSet)
#define BITSET_CLEAR(bs) do {\
int i;\
for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \
} while (0)
#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM))
#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
/* bytes buffer */
typedef struct _BBuf {
UChar* p;
unsigned int used;
unsigned int alloc;
} BBuf;
#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
#define BBUF_SIZE_INC(buf,inc) do{\
(buf)->alloc += (inc);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_EXPAND(buf,low) do{\
do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
#define BBUF_ENSURE_SIZE(buf,size) do{\
unsigned int new_alloc = (buf)->alloc;\
while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
if ((buf)->alloc != new_alloc) {\
(buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
(buf)->alloc = new_alloc;\
}\
} while (0)
#define BBUF_WRITE(buf,pos,bytes,n) do{\
int used = (pos) + (n);\
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
xmemcpy((buf)->p + (pos), (bytes), (n));\
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
} while (0)
#define BBUF_WRITE1(buf,pos,byte) do{\
int used = (pos) + 1;\
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
(buf)->p[(pos)] = (byte);\
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
} while (0)
#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
#define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
/* from < to */
#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
} while (0)
/* from > to */
#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
} while (0)
/* from > to */
#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
(buf)->used -= (from - to);\
} while (0)
#define BBUF_INSERT(buf,pos,bytes,n) do {\
if (pos >= (buf)->used) {\
BBUF_WRITE(buf,pos,bytes,n);\
}\
else {\
BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
xmemcpy((buf)->p + (pos), (bytes), (n));\
}\
} while (0)
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
#define ANCHOR_BEGIN_BUF (1<<0)
#define ANCHOR_BEGIN_LINE (1<<1)
#define ANCHOR_BEGIN_POSITION (1<<2)
#define ANCHOR_END_BUF (1<<3)
#define ANCHOR_SEMI_END_BUF (1<<4)
#define ANCHOR_END_LINE (1<<5)
#define ANCHOR_WORD_BOUND (1<<6)
#define ANCHOR_NOT_WORD_BOUND (1<<7)
#define ANCHOR_WORD_BEGIN (1<<8)
#define ANCHOR_WORD_END (1<<9)
#define ANCHOR_PREC_READ (1<<10)
#define ANCHOR_PREC_READ_NOT (1<<11)
#define ANCHOR_LOOK_BEHIND (1<<12)
#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
/* operation code */
enum OpCode {
OP_FINISH = 0, /* matching process terminator (no more alternative) */
OP_END = 1, /* pattern code terminator (success end) */
OP_EXACT1 = 2, /* single byte, N = 1 */
OP_EXACT2, /* single byte, N = 2 */
OP_EXACT3, /* single byte, N = 3 */
OP_EXACT4, /* single byte, N = 4 */
OP_EXACT5, /* single byte, N = 5 */
OP_EXACTN, /* single byte */
OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
OP_EXACTMB2N, /* mb-length = 2 */
OP_EXACTMB3N, /* mb-length = 3 */
OP_EXACTMBN, /* other length */
OP_EXACT1_IC, /* single byte, N = 1, ignore case */
OP_EXACTN_IC, /* single byte, ignore case */
OP_CCLASS,
OP_CCLASS_MB,
OP_CCLASS_MIX,
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
OP_CCLASS_NODE, /* pointer to CClassNode node */
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
OP_ANYCHAR_STAR, /* ".*" */
OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
OP_ANYCHAR_STAR_PEEK_NEXT,
OP_ANYCHAR_ML_STAR_PEEK_NEXT,
OP_WORD,
OP_NOT_WORD,
OP_WORD_BOUND,
OP_NOT_WORD_BOUND,
OP_WORD_BEGIN,
OP_WORD_END,
OP_BEGIN_BUF,
OP_END_BUF,
OP_BEGIN_LINE,
OP_END_LINE,
OP_SEMI_END_BUF,
OP_BEGIN_POSITION,
OP_BACKREF1,
OP_BACKREF2,
OP_BACKREFN,
OP_BACKREFN_IC,
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
OP_MEMORY_START,
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
OP_MEMORY_END_PUSH, /* push back-tracker to stack */
OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
OP_MEMORY_END,
OP_MEMORY_END_REC, /* push marker to stack */
OP_FAIL, /* pop stack and move */
OP_JUMP,
OP_PUSH,
OP_POP,
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
OP_REPEAT, /* {n,m} */
OP_REPEAT_NG, /* {n,m}? (non greedy) */
OP_REPEAT_INC,
OP_REPEAT_INC_NG, /* non greedy */
OP_REPEAT_INC_SG, /* search and get in stack */
OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
OP_NULL_CHECK_START, /* null loop checker start */
OP_NULL_CHECK_END, /* null loop checker end */
OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
OP_PUSH_POS, /* (?=...) start */
OP_POP_POS, /* (?=...) end */
OP_PUSH_POS_NOT, /* (?!...) start */
OP_FAIL_POS, /* (?!...) end */
OP_PUSH_STOP_BT, /* (?>...) start */
OP_POP_STOP_BT, /* (?>...) end */
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
OP_CALL, /* \g<name> */
OP_RETURN,
OP_STATE_CHECK_PUSH, /* combination explosion check and push */
OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
OP_STATE_CHECK, /* check only */
OP_STATE_CHECK_ANYCHAR_STAR,
OP_STATE_CHECK_ANYCHAR_ML_STAR,
/* no need: IS_DYNAMIC_OPTION() == 0 */
OP_SET_OPTION_PUSH, /* set option and push recover option */
OP_SET_OPTION /* set option */
};
typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
typedef short int MemNumType;
typedef short int StateCheckNumType;
typedef void* PointerType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR sizeof(RelAddrType)
#define SIZE_ABSADDR sizeof(AbsAddrType)
#define SIZE_LENGTH sizeof(LengthType)
#define SIZE_MEMNUM sizeof(MemNumType)
#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType)
#define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
#define SIZE_POINTER sizeof(PointerType)
#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
#define GET_BYTE_INC(byte,p) do{\
byte = *(p);\
(p)++;\
} while(0)
/* op-code + arg size */
#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_POP SIZE_OPCODE
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_PUSH_POS SIZE_OPCODE
#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_POP_POS SIZE_OPCODE
#define SIZE_OP_FAIL_POS SIZE_OPCODE
#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
#define SIZE_OP_FAIL SIZE_OPCODE
#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
#define SIZE_OP_RETURN SIZE_OPCODE
#ifdef USE_COMBINATION_EXPLOSION_CHECK
#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
#endif
#define MC_ESC(syn) (syn)->meta_char_table.esc
#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar
#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime
#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time
#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time
#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime
#define IS_MC_ESC_CODE(code, syn) \
((code) == MC_ESC(syn) && \
!IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
#define SYN_POSIX_COMMON_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
ONIG_SYN_OP_DECIMAL_BACKREF | \
ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
ONIG_SYN_OP_LINE_ANCHOR | \
ONIG_SYN_OP_ESC_CONTROL_CHARS )
#define SYN_GNU_REGEX_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
ONIG_SYN_OP_VBAR_ALT | \
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
ONIG_SYN_OP_QMARK_ZERO_ONE | \
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
ONIG_SYN_OP_ESC_W_WORD | \
ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
ONIG_SYN_OP_LINE_ANCHOR )
#define SYN_GNU_REGEX_BV \
( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
#define NCCLASS_FLAGS(cc) ((cc)->flags)
#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag))
#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag))
#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0)
/* cclass node */
#define FLAG_NCCLASS_NOT (1<<0)
#define FLAG_NCCLASS_SHARE (1<<1)
#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
typedef struct {
int type;
/* struct _Node* next; */
/* unsigned int flags; */
} NodeBase;
typedef struct {
NodeBase base;
unsigned int flags;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
typedef long OnigStackIndex;
typedef struct _OnigStackType {
unsigned int type;
union {
struct {
UChar *pcode; /* byte code position */
UChar *pstr; /* string position */
UChar *pstr_prev; /* previous char position of pstr */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
unsigned int state_check;
#endif
} state;
struct {
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
UChar *pcode; /* byte code position (head of repeated target) */
int num; /* repeat id */
} repeat;
struct {
OnigStackIndex si; /* index of stack */
} repeat_inc;
struct {
int num; /* memory num */
UChar *pstr; /* start/end position */
/* Following information is setted, if this stack type is MEM-START */
OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
} mem;
struct {
int num; /* null check id */
UChar *pstr; /* start position */
} null_check;
#ifdef USE_SUBEXP_CALL
struct {
UChar *ret_addr; /* byte code position */
int num; /* null check id */
UChar *pstr; /* string position */
} call_frame;
#endif
} u;
} OnigStackType;
typedef struct {
void* stack_p;
int stack_n;
OnigOptionType options;
OnigRegion* region;
const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
int best_len; /* for ONIG_OPTION_FIND_LONGEST */
UChar* best_s;
#endif
#ifdef USE_COMBINATION_EXPLOSION_CHECK
void* state_check_buff;
int state_check_buff_size;
#endif
} OnigMatchArg;
#define IS_CODE_SB_WORD(enc,code) \
(ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
#ifdef ONIG_DEBUG
typedef struct {
short int opcode;
char* name;
short int arg_type;
} OnigOpInfoType;
extern OnigOpInfoType OnigOpInfo[];
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
extern void onig_print_statistics P_((FILE* f));
#endif
#endif
extern UChar* onig_error_code_to_format P_((int code));
extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
/* strend hash */
typedef void hash_table_type;
typedef unsigned long hash_data_type;
extern hash_table_type* onig_st_init_strend_table_with_size P_((int size));
extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));
/* encoding property management */
#define PROPERTY_LIST_ADD_PROP(Name, CR) \
r = onigenc_property_list_add_property((UChar* )Name, CR,\
&PropertyNameTable, &PropertyList, &PropertyListNum,\
&PropertyListSize);\
if (r != 0) goto end
#define PROPERTY_LIST_INIT_CHECK \
if (PropertyInited == 0) {\
int r = onigenc_property_list_init(init_property_list);\
if (r != 0) return r;\
}
extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize));
typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE));
#endif /* REGINT_H */

5552
oniguruma/regparse.c Normal file

File diff suppressed because it is too large Load diff

351
oniguruma/regparse.h Normal file
View file

@ -0,0 +1,351 @@
#ifndef REGPARSE_H
#define REGPARSE_H
/**********************************************************************
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
/* node type */
#define NT_STR 0
#define NT_CCLASS 1
#define NT_CTYPE 2
#define NT_CANY 3
#define NT_BREF 4
#define NT_QTFR 5
#define NT_ENCLOSE 6
#define NT_ANCHOR 7
#define NT_LIST 8
#define NT_ALT 9
#define NT_CALL 10
/* node type bit */
#define NTYPE2BIT(type) (1<<(type))
#define BIT_NT_STR NTYPE2BIT(NT_STR)
#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
#define IS_NODE_TYPE_SIMPLE(type) \
((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
BIT_NT_CANY | BIT_NT_BREF)) != 0)
#define NTYPE(node) ((node)->u.base.type)
#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
#define NSTR(node) (&((node)->u.str))
#define NCCLASS(node) (&((node)->u.cclass))
#define NCTYPE(node) (&((node)->u.ctype))
#define NBREF(node) (&((node)->u.bref))
#define NQTFR(node) (&((node)->u.qtfr))
#define NENCLOSE(node) (&((node)->u.enclose))
#define NANCHOR(node) (&((node)->u.anchor))
#define NCONS(node) (&((node)->u.cons))
#define NCALL(node) (&((node)->u.call))
#define NCAR(node) (NCONS(node)->car)
#define NCDR(node) (NCONS(node)->cdr)
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
#define ENCLOSE_MEMORY (1<<0)
#define ENCLOSE_OPTION (1<<1)
#define ENCLOSE_STOP_BACKTRACK (1<<2)
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
#define NSTR_RAW (1<<0) /* by backslashed number */
#define NSTR_AMBIG (1<<1)
#define NSTR_DONT_GET_OPT_INFO (1<<2)
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
(node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
(((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
#define NQ_TARGET_ISNOT_EMPTY 0
#define NQ_TARGET_IS_EMPTY 1
#define NQ_TARGET_IS_EMPTY_MEM 2
#define NQ_TARGET_IS_EMPTY_REC 3
/* status bits */
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
#define NST_CLEN_FIXED (1<<2)
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
#define NST_MEM_BACKREFED (1<<5)
#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
#define NST_RECURSION (1<<7)
#define NST_CALLED (1<<8)
#define NST_ADDR_FIXED (1<<9)
#define NST_NAMED_GROUP (1<<10)
#define NST_NAME_REF (1<<11)
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
#define NST_NEST_LEVEL (1<<13)
#define NST_BY_NUMBER (1<<14) /* {n,m} */
#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
(((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
#define CALLNODE_REFNUM_UNDEF -1
typedef struct {
NodeBase base;
UChar* s;
UChar* end;
unsigned int flag;
int capa; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
typedef struct {
NodeBase base;
int state;
struct _Node* target;
int lower;
int upper;
int greedy;
int target_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
#endif
} QtfrNode;
typedef struct {
NodeBase base;
int state;
int type;
int regnum;
OnigOptionType option;
struct _Node* target;
AbsAddrType call_addr;
/* for multiple call reference */
OnigDistance min_len; /* min length (byte) */
OnigDistance max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_node_left() */
} EncloseNode;
#ifdef USE_SUBEXP_CALL
typedef struct {
int offset;
struct _Node* target;
} UnsetAddr;
typedef struct {
int num;
int alloc;
UnsetAddr* us;
} UnsetAddrList;
typedef struct {
NodeBase base;
int state;
int group_num;
UChar* name;
UChar* name_end;
struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
UnsetAddrList* unset_addr_list;
} CallNode;
#endif
typedef struct {
NodeBase base;
int state;
int back_num;
int back_static[NODE_BACKREFS_SIZE];
int* back_dynamic;
int nest_level;
} BRefNode;
typedef struct {
NodeBase base;
int type;
struct _Node* target;
int char_len;
} AnchorNode;
typedef struct {
NodeBase base;
struct _Node* car;
struct _Node* cdr;
} ConsAltNode;
typedef struct {
NodeBase base;
int ctype;
int not;
} CtypeNode;
typedef struct _Node {
union {
NodeBase base;
StrNode str;
CClassNode cclass;
QtfrNode qtfr;
EncloseNode enclose;
BRefNode bref;
AnchorNode anchor;
ConsAltNode cons;
CtypeNode ctype;
#ifdef USE_SUBEXP_CALL
CallNode call;
#endif
} u;
} Node;
#define NULL_NODE ((Node* )0)
#define SCANENV_MEMNODES_SIZE 8
#define SCANENV_MEM_NODES(senv) \
(IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
typedef struct {
OnigOptionType option;
OnigCaseFoldType case_fold_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
BitStatusType capture_history;
BitStatusType bt_mem_start;
BitStatusType bt_mem_end;
BitStatusType backrefed_mem;
UChar* pattern;
UChar* pattern_end;
UChar* error;
UChar* error_end;
regex_t* reg; /* for reg->names only */
int num_call;
#ifdef USE_SUBEXP_CALL
UnsetAddrList* unset_addr_list;
#endif
int num_mem;
#ifdef USE_NAMED_GROUP
int num_named;
#endif
int mem_alloc;
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
Node** mem_nodes_dynamic;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int num_comb_exp_check;
int comb_exp_max_regnum;
int curr_max_regnum;
int has_recursion;
#endif
} ScanEnv;
#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
#ifdef USE_NAMED_GROUP
typedef struct {
int new_val;
} GroupNumRemap;
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
#endif
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_enclose P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
extern Node* onig_node_list_add P_((Node* list, Node* x));
extern Node* onig_node_new_alt P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
extern int onig_free_node_list P_((void));
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
extern int onig_print_names(FILE*, regex_t*);
#endif
#endif
#endif /* REGPARSE_H */

98
oniguruma/regposerr.c Normal file
View file

@ -0,0 +1,98 @@
/**********************************************************************
regposerr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#include "onigposix.h"
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
#else
# define ARG_UNUSED
#endif
static char* ESTRING[] = {
NULL,
"failed to match", /* REG_NOMATCH */
"Invalid regular expression", /* REG_BADPAT */
"invalid collating element referenced", /* REG_ECOLLATE */
"invalid character class type referenced", /* REG_ECTYPE */
"bad backslash-escape sequence", /* REG_EESCAPE */
"invalid back reference number", /* REG_ESUBREG */
"imbalanced [ and ]", /* REG_EBRACK */
"imbalanced ( and )", /* REG_EPAREN */
"imbalanced { and }", /* REG_EBRACE */
"invalid repeat range {n,m}", /* REG_BADBR */
"invalid range", /* REG_ERANGE */
"Out of memory", /* REG_ESPACE */
"? * + not preceded by valid regular expression", /* REG_BADRPT */
/* Extended errors */
"internal error", /* REG_EONIG_INTERNAL */
"invalid wide char value", /* REG_EONIG_BADWC */
"invalid argument", /* REG_EONIG_BADARG */
"multi-thread error" /* REG_EONIG_THREAD */
};
#include <stdio.h>
extern size_t
regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf,
size_t size)
{
char* s;
char tbuf[35];
size_t len;
if (posix_ecode > 0
&& posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) {
s = ESTRING[posix_ecode];
}
else if (posix_ecode == 0) {
s = "";
}
else {
sprintf(tbuf, "undefined error code (%d)", posix_ecode);
s = tbuf;
}
len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */
if (buf != NULL && size > 0) {
strncpy(buf, s, size - 1);
buf[size - 1] = '\0';
}
return len;
}

303
oniguruma/regposix.c Normal file
View file

@ -0,0 +1,303 @@
/**********************************************************************
regposix.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#define regex_t onig_regex_t
#include "regint.h"
#undef regex_t
#include "onigposix.h"
#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
#define ENC_STRING_LEN(enc,s,len) do { \
if (ONIGENC_MBC_MINLEN(enc) == 1) { \
UChar* tmps = (UChar* )(s); \
while (*tmps != 0) tmps++; \
len = tmps - (UChar* )(s); \
} \
else { \
len = onigenc_str_bytelen_null(enc, (UChar* )s); \
} \
} while(0)
typedef struct {
int onig_err;
int posix_err;
} O2PERR;
static int
onig2posix_error_code(int code)
{
static const O2PERR o2p[] = {
{ ONIG_MISMATCH, REG_NOMATCH },
{ ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
{ ONIGERR_MEMORY, REG_ESPACE },
{ ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
{ ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
{ ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
{ ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
{ ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
{ ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
{ ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
{ ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
{ ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
{ ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
{ ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
{ ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
{ ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
{ ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
{ ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
{ ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
{ ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
{ ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
{ ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
{ ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
{ ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
{ ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
{ ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
{ ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
{ ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
{ ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
{ ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
{ ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
{ ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
{ ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
{ ONIGERR_INVALID_BACKREF, REG_ESUBREG },
{ ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
{ ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
{ ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC },
{ ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
{ ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
{ ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
{ ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
{ ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
{ ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
{ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
{ ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
{ ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
{ ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
{ ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
{ ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
};
int i;
if (code >= 0) return 0;
for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) {
if (code == o2p[i].onig_err)
return o2p[i].posix_err;
}
return REG_EONIG_INTERNAL; /* but, unknown error code */
}
extern int
regcomp(regex_t* reg, const char* pattern, int posix_options)
{
int r, len;
OnigSyntaxType* syntax = OnigDefaultSyntax;
OnigOptionType options;
if ((posix_options & REG_EXTENDED) == 0)
syntax = ONIG_SYNTAX_POSIX_BASIC;
options = syntax->options;
if ((posix_options & REG_ICASE) != 0)
ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
if ((posix_options & REG_NEWLINE) != 0) {
ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
}
reg->comp_options = posix_options;
ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
options, OnigEncDefaultCharEncoding, syntax,
(OnigErrorInfo* )NULL);
if (r != ONIG_NORMAL) {
return onig2posix_error_code(r);
}
reg->re_nsub = ONIG_C(reg)->num_mem;
return 0;
}
extern int
regexec(regex_t* reg, const char* str, size_t nmatch,
regmatch_t pmatch[], int posix_options)
{
int r, i, len;
UChar* end;
regmatch_t* pm;
OnigOptionType options;
options = ONIG_OPTION_POSIX_REGION;
if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
pm = (regmatch_t* )NULL;
nmatch = 0;
}
else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
* (ONIG_C(reg)->num_mem + 1));
if (pm == NULL)
return REG_ESPACE;
}
else {
pm = pmatch;
}
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
end = (UChar* )(str + len);
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
(OnigRegion* )pm, options);
if (r >= 0) {
r = 0; /* Match */
if (pm != pmatch && pm != NULL) {
xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
}
}
else if (r == ONIG_MISMATCH) {
r = REG_NOMATCH;
for (i = 0; i < (int )nmatch; i++)
pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
}
else {
r = onig2posix_error_code(r);
}
if (pm != pmatch && pm != NULL)
xfree(pm);
#if 0
if (reg->re_nsub > nmatch - 1)
reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
#endif
return r;
}
extern void
regfree(regex_t* reg)
{
onig_free(ONIG_C(reg));
}
extern void
reg_set_encoding(int mb_code)
{
OnigEncoding enc;
switch (mb_code) {
case REG_POSIX_ENCODING_ASCII:
enc = ONIG_ENCODING_ASCII;
break;
case REG_POSIX_ENCODING_EUC_JP:
enc = ONIG_ENCODING_EUC_JP;
break;
case REG_POSIX_ENCODING_SJIS:
enc = ONIG_ENCODING_SJIS;
break;
case REG_POSIX_ENCODING_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
case REG_POSIX_ENCODING_UTF16_BE:
enc = ONIG_ENCODING_UTF16_BE;
break;
case REG_POSIX_ENCODING_UTF16_LE:
enc = ONIG_ENCODING_UTF16_LE;
break;
default:
return ;
break;
}
onigenc_set_default_encoding(enc);
}
extern int
reg_name_to_group_numbers(regex_t* reg,
const unsigned char* name, const unsigned char* name_end, int** nums)
{
return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
}
typedef struct {
int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
regex_t* reg;
void* arg;
} i_wrap;
static int
i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
onig_regex_t* reg ARG_UNUSED, void* arg)
{
i_wrap* warg = (i_wrap* )arg;
return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
}
extern int
reg_foreach_name(regex_t* reg,
int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
void* arg)
{
i_wrap warg;
warg.func = func;
warg.reg = reg;
warg.arg = arg;
return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
}
extern int
reg_number_of_names(regex_t* reg)
{
return onig_number_of_names(ONIG_C(reg));
}

315
oniguruma/regsyntax.c Normal file
View file

@ -0,0 +1,315 @@
/**********************************************************************
regsyntax.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
OnigSyntaxType OnigSyntaxASIS = {
0
, ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
, 0
, ONIG_OPTION_NONE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxPosixBasic = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_BRACE_INTERVAL )
, 0
, 0
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxPosixExtended = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
ONIG_SYN_OP_BRACE_INTERVAL |
ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
, 0
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxEmacs = {
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
ONIG_SYN_OP_ESC_BRACE_INTERVAL |
ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
, ONIG_OPTION_NONE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxGrep = {
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_VBAR_ALT |
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
, 0
, ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
, ONIG_OPTION_NONE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxGnuRegex = {
SYN_GNU_REGEX_OP
, 0
, SYN_GNU_REGEX_BV
, ONIG_OPTION_NONE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxJava = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
, ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
, ONIG_OPTION_SINGLELINE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
OnigSyntaxType OnigSyntaxPerl = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT )
, SYN_GNU_REGEX_BV
, ONIG_OPTION_SINGLELINE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
/* Perl + named group */
OnigSyntaxType OnigSyntaxPerl_NG = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
, ( SYN_GNU_REGEX_BV |
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
, ONIG_OPTION_SINGLELINE
,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
}
};
extern int
onig_set_default_syntax(OnigSyntaxType* syntax)
{
if (IS_NULL(syntax))
syntax = ONIG_SYNTAX_RUBY;
OnigDefaultSyntax = syntax;
return 0;
}
extern void
onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
{
*to = *from;
}
extern void
onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
{
syntax->op = op;
}
extern void
onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
{
syntax->op2 = op2;
}
extern void
onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
{
syntax->behavior = behavior;
}
extern void
onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
{
syntax->options = options;
}
extern unsigned int
onig_get_syntax_op(OnigSyntaxType* syntax)
{
return syntax->op;
}
extern unsigned int
onig_get_syntax_op2(OnigSyntaxType* syntax)
{
return syntax->op2;
}
extern unsigned int
onig_get_syntax_behavior(OnigSyntaxType* syntax)
{
return syntax->behavior;
}
extern OnigOptionType
onig_get_syntax_options(OnigSyntaxType* syntax)
{
return syntax->options;
}
#ifdef USE_VARIABLE_META_CHARS
extern int onig_set_meta_char(OnigSyntaxType* enc,
unsigned int what, OnigCodePoint code)
{
switch (what) {
case ONIG_META_CHAR_ESCAPE:
enc->meta_char_table.esc = code;
break;
case ONIG_META_CHAR_ANYCHAR:
enc->meta_char_table.anychar = code;
break;
case ONIG_META_CHAR_ANYTIME:
enc->meta_char_table.anytime = code;
break;
case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
enc->meta_char_table.zero_or_one_time = code;
break;
case ONIG_META_CHAR_ONE_OR_MORE_TIME:
enc->meta_char_table.one_or_more_time = code;
break;
case ONIG_META_CHAR_ANYCHAR_ANYTIME:
enc->meta_char_table.anychar_anytime = code;
break;
default:
return ONIGERR_INVALID_ARGUMENT;
break;
}
return 0;
}
#endif /* USE_VARIABLE_META_CHARS */

76
oniguruma/regtrav.c Normal file
View file

@ -0,0 +1,76 @@
/**********************************************************************
regtrav.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#ifdef USE_CAPTURE_HISTORY
static int
capture_tree_traverse(OnigCaptureTreeNode* node, int at,
int(*callback_func)(int,int,int,int,int,void*),
int level, void* arg)
{
int r, i;
if (node == (OnigCaptureTreeNode* )0)
return 0;
if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) {
r = (*callback_func)(node->group, node->beg, node->end,
level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg);
if (r != 0) return r;
}
for (i = 0; i < node->num_childs; i++) {
r = capture_tree_traverse(node->childs[i], at,
callback_func, level + 1, arg);
if (r != 0) return r;
}
if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) {
r = (*callback_func)(node->group, node->beg, node->end,
level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg);
if (r != 0) return r;
}
return 0;
}
#endif /* USE_CAPTURE_HISTORY */
extern int
onig_capture_tree_traverse(OnigRegion* region, int at,
int(*callback_func)(int,int,int,int,int,void*), void* arg)
{
#ifdef USE_CAPTURE_HISTORY
return capture_tree_traverse(region->history_root, at,
callback_func, 0, arg);
#else
return ONIG_NO_SUPPORT_CONFIG;
#endif
}

56
oniguruma/regversion.c Normal file
View file

@ -0,0 +1,56 @@
/**********************************************************************
regversion.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#include "oniguruma.h"
#include <stdio.h>
extern const char*
onig_version(void)
{
static char s[12];
sprintf(s, "%d.%d.%d",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
return s;
}
extern const char*
onig_copyright(void)
{
static char s[58];
sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
return s;
}

View file

@ -0,0 +1,25 @@
noinst_PROGRAMS = encode listcap names posix simple sql syntax crnl
libname = $(top_builddir)/libonig.la
LDADD = $(libname)
INCLUDES = -I$(top_srcdir) -I$(includedir)
encode_SOURCES = encode.c
listcap_SOURCES = listcap.c
names_SOURCES = names.c
posix_SOURCES = posix.c
simple_SOURCES = simple.c
sql_SOURCES = sql.c
syntax_SOURCES = syntax.c
sampledir = $(top_builddir)/sample
test: encode listcap names posix simple sql syntax
@$(sampledir)/encode
@$(sampledir)/listcap
@$(sampledir)/names
@$(sampledir)/posix
@$(sampledir)/simple
@$(sampledir)/sql
@$(sampledir)/syntax

View file

@ -0,0 +1,518 @@
# Makefile.in generated by automake 1.10 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
noinst_PROGRAMS = encode$(EXEEXT) listcap$(EXEEXT) names$(EXEEXT) \
posix$(EXEEXT) simple$(EXEEXT) sql$(EXEEXT) syntax$(EXEEXT) \
crnl$(EXEEXT)
subdir = sample
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.in
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
PROGRAMS = $(noinst_PROGRAMS)
crnl_SOURCES = crnl.c
crnl_OBJECTS = crnl.$(OBJEXT)
crnl_LDADD = $(LDADD)
crnl_DEPENDENCIES = $(libname)
am_encode_OBJECTS = encode.$(OBJEXT)
encode_OBJECTS = $(am_encode_OBJECTS)
encode_LDADD = $(LDADD)
encode_DEPENDENCIES = $(libname)
am_listcap_OBJECTS = listcap.$(OBJEXT)
listcap_OBJECTS = $(am_listcap_OBJECTS)
listcap_LDADD = $(LDADD)
listcap_DEPENDENCIES = $(libname)
am_names_OBJECTS = names.$(OBJEXT)
names_OBJECTS = $(am_names_OBJECTS)
names_LDADD = $(LDADD)
names_DEPENDENCIES = $(libname)
am_posix_OBJECTS = posix.$(OBJEXT)
posix_OBJECTS = $(am_posix_OBJECTS)
posix_LDADD = $(LDADD)
posix_DEPENDENCIES = $(libname)
am_simple_OBJECTS = simple.$(OBJEXT)
simple_OBJECTS = $(am_simple_OBJECTS)
simple_LDADD = $(LDADD)
simple_DEPENDENCIES = $(libname)
am_sql_OBJECTS = sql.$(OBJEXT)
sql_OBJECTS = $(am_sql_OBJECTS)
sql_LDADD = $(LDADD)
sql_DEPENDENCIES = $(libname)
am_syntax_OBJECTS = syntax.$(OBJEXT)
syntax_OBJECTS = $(am_syntax_OBJECTS)
syntax_LDADD = $(LDADD)
syntax_DEPENDENCIES = $(libname)
DEFAULT_INCLUDES = -I. -I$(top_builddir)@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
SOURCES = crnl.c $(encode_SOURCES) $(listcap_SOURCES) $(names_SOURCES) \
$(posix_SOURCES) $(simple_SOURCES) $(sql_SOURCES) \
$(syntax_SOURCES)
DIST_SOURCES = crnl.c $(encode_SOURCES) $(listcap_SOURCES) \
$(names_SOURCES) $(posix_SOURCES) $(simple_SOURCES) \
$(sql_SOURCES) $(syntax_SOURCES)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
ALLOCA = @ALLOCA@
AMTAR = @AMTAR@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LTVERSION = @LTVERSION@
MAKEINFO = @MAKEINFO@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
RUBYDIR = @RUBYDIR@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STATISTICS = @STATISTICS@
STRIP = @STRIP@
VERSION = @VERSION@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_CC = @ac_ct_CC@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
lt_ECHO = @lt_ECHO@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
libname = $(top_builddir)/libonig.la
LDADD = $(libname)
INCLUDES = -I$(top_srcdir) -I$(includedir)
encode_SOURCES = encode.c
listcap_SOURCES = listcap.c
names_SOURCES = names.c
posix_SOURCES = posix.c
simple_SOURCES = simple.c
sql_SOURCES = sql.c
syntax_SOURCES = syntax.c
sampledir = $(top_builddir)/sample
all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
&& exit 0; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sample/Makefile'; \
cd $(top_srcdir) && \
$(AUTOMAKE) --foreign sample/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
clean-noinstPROGRAMS:
@list='$(noinst_PROGRAMS)'; for p in $$list; do \
f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
echo " rm -f $$p $$f"; \
rm -f $$p $$f ; \
done
crnl$(EXEEXT): $(crnl_OBJECTS) $(crnl_DEPENDENCIES)
@rm -f crnl$(EXEEXT)
$(LINK) $(crnl_OBJECTS) $(crnl_LDADD) $(LIBS)
encode$(EXEEXT): $(encode_OBJECTS) $(encode_DEPENDENCIES)
@rm -f encode$(EXEEXT)
$(LINK) $(encode_OBJECTS) $(encode_LDADD) $(LIBS)
listcap$(EXEEXT): $(listcap_OBJECTS) $(listcap_DEPENDENCIES)
@rm -f listcap$(EXEEXT)
$(LINK) $(listcap_OBJECTS) $(listcap_LDADD) $(LIBS)
names$(EXEEXT): $(names_OBJECTS) $(names_DEPENDENCIES)
@rm -f names$(EXEEXT)
$(LINK) $(names_OBJECTS) $(names_LDADD) $(LIBS)
posix$(EXEEXT): $(posix_OBJECTS) $(posix_DEPENDENCIES)
@rm -f posix$(EXEEXT)
$(LINK) $(posix_OBJECTS) $(posix_LDADD) $(LIBS)
simple$(EXEEXT): $(simple_OBJECTS) $(simple_DEPENDENCIES)
@rm -f simple$(EXEEXT)
$(LINK) $(simple_OBJECTS) $(simple_LDADD) $(LIBS)
sql$(EXEEXT): $(sql_OBJECTS) $(sql_DEPENDENCIES)
@rm -f sql$(EXEEXT)
$(LINK) $(sql_OBJECTS) $(sql_LDADD) $(LIBS)
syntax$(EXEEXT): $(syntax_OBJECTS) $(syntax_DEPENDENCIES)
@rm -f syntax$(EXEEXT)
$(LINK) $(syntax_OBJECTS) $(syntax_LDADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crnl.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encode.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/listcap.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/names.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/posix.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/simple.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sql.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/syntax.Po@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
.c.obj:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$tags $$unique; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
test -z "$(CTAGS_ARGS)$$tags$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$tags $$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& cd $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) $$here
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
fi; \
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
else \
test -f $(distdir)/$$file \
|| cp -p $$d/$$file $(distdir)/$$file \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(PROGRAMS)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \
mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-exec-am:
install-html: install-html-am
install-info: install-info-am
install-man:
install-pdf: install-pdf-am
install-ps: install-ps-am
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstPROGRAMS ctags distclean \
distclean-compile distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
pdf pdf-am ps ps-am tags uninstall uninstall-am
test: encode listcap names posix simple sql syntax
@$(sampledir)/encode
@$(sampledir)/listcap
@$(sampledir)/names
@$(sampledir)/posix
@$(sampledir)/simple
@$(sampledir)/sql
@$(sampledir)/syntax
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

127
oniguruma/sample/crnl.c Normal file
View file

@ -0,0 +1,127 @@
/*
* crnl.c 2007/05/30 K.Kosako
*
* !!! You should enable USE_CRNL_AS_LINE_TERMINATOR. !!!
*
* USE_CRNL_AS_LINE_TERMINATOR config test program.
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
static int nfail = 0;
static void result(int no, int from, int to,
int expected_from, int expected_to)
{
fprintf(stderr, "%3d: ", no);
if (from == expected_from && to == expected_to) {
fprintf(stderr, "Success\n");
}
else {
fprintf(stderr, "Fail: expected: (%d-%d), result: (%d-%d)\n",
expected_from, expected_to, from, to);
nfail++;
}
}
static int
x(int no, char* pattern_arg, char* str_arg,
int expected_from, int expected_to)
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
UChar *pattern, *str;
pattern = (UChar* )pattern_arg;
str = (UChar* )str_arg;
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0 || r == ONIG_MISMATCH) {
result(no, region->beg[0], region->end[0], expected_from, expected_to);
}
else if (r == ONIG_MISMATCH) {
result(no, r, -1, expected_from, expected_to);
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
return 0;
}
static int
f(int no, char* pattern_arg, char* str_arg)
{
return x(no, pattern_arg, str_arg, -1, -1);
}
extern int main(int argc, char* argv[])
{
x( 1, "", "\r\n", 0, 0);
x( 2, ".", "\r\n", 0, 1);
f( 3, "..", "\r\n");
x( 4, "^", "\r\n", 0, 0);
x( 5, "\\n^", "\r\nf", 1, 2);
x( 6, "\\n^a", "\r\na", 1, 3);
x( 7, "$", "\r\n", 0, 0);
x( 8, "T$", "T\r\n", 0, 1);
x( 9, "T$", "T\raT\r\n", 3, 4);
x(10, "\\z", "\r\n", 2, 2);
f(11, "a\\z", "a\r\n");
x(12, "\\Z", "\r\n", 0, 0);
x(13, "\\Z", "\r\na", 3, 3);
x(14, "\\Z", "\r\n\r\n\n", 4, 4);
x(15, "\\Z", "\r\n\r\nX", 5, 5);
x(16, "a\\Z", "a\r\n", 0, 1);
x(17, "aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15);
x(18, "a|$", "b\r\n", 1, 1);
x(19, "$|b", "\rb", 1, 2);
x(20, "a$|ab$", "\r\nab\r\n", 2, 4);
x(21, "a|\\Z", "b\r\n", 1, 1);
x(22, "\\Z|b", "\rb", 1, 2);
x(23, "a\\Z|ab\\Z", "\r\nab\r\n", 2, 4);
x(24, "(?=a$).", "a\r\n", 0, 1);
f(25, "(?=a$).", "a\r");
x(26, "(?!a$)..", "a\r", 0, 2);
x(27, "(?<=a$).\\n", "a\r\n", 1, 3);
f(28, "(?<!a$).\\n", "a\r\n");
x(29, "(?=a\\Z).", "a\r\n", 0, 1);
f(30, "(?=a\\Z).", "a\r");
x(31, "(?!a\\Z)..", "a\r", 0, 2);
onig_end();
if (nfail > 0) {
fprintf(stderr, "\n");
fprintf(stderr, "!!! You have to enable USE_CRNL_AS_LINE_TERMINATOR\n");
fprintf(stderr, "!!! in regenc.h for this test program.\n");
fprintf(stderr, "\n");
}
return 0;
}

298
oniguruma/sample/encode.c Normal file
View file

@ -0,0 +1,298 @@
/*
* encode.c
*/
#include <stdio.h>
#include "oniguruma.h"
static int
search(regex_t* reg, unsigned char* str, unsigned char* end)
{
int r;
unsigned char *start, *range;
OnigRegion *region;
region = onig_region_new();
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d (%s)\n", r,
ONIGENC_NAME(onig_get_encoding(reg)));
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail (%s)\n",
ONIGENC_NAME(onig_get_encoding(reg)));
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
return 0;
}
static int
exec(OnigEncoding enc, OnigOptionType options,
char* apattern, char* astr)
{
int r;
unsigned char *end;
regex_t* reg;
OnigErrorInfo einfo;
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
r = onig_new(&reg, pattern,
pattern + onigenc_str_bytelen_null(enc, pattern),
options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
end = str + onigenc_str_bytelen_null(enc, str);
r = search(reg, str, end);
onig_free(reg);
onig_end();
return 0;
}
static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;
#if 0
static void
set_case_fold(OnigCaseFoldType cf)
{
CF = cf;
}
#endif
static int
exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
OnigOptionType options, char* apattern, char* astr)
{
int r;
unsigned char *end;
regex_t* reg;
OnigCompileInfo ci;
OnigErrorInfo einfo;
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
ci.num_of_elements = 5;
ci.pattern_enc = pattern_enc;
ci.target_enc = str_enc;
ci.syntax = ONIG_SYNTAX_DEFAULT;
ci.option = options;
ci.case_fold_flag = CF;
r = onig_new_deluxe(&reg, pattern,
pattern + onigenc_str_bytelen_null(pattern_enc, pattern),
&ci, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
end = str + onigenc_str_bytelen_null(str_enc, str);
r = search(reg, str, end);
onig_free(reg);
onig_end();
return 0;
}
extern int main(int argc, char* argv[])
{
int r;
/* ISO 8859-1 test */
static unsigned char str[] = { 0xc7, 0xd6, 0xfe, 0xea, 0xe0, 0xe2, 0x00 };
static unsigned char pattern[] = { 0xe7, 0xf6, 0xde, '\\', 'w', '+', 0x00 };
r = exec(ONIG_ENCODING_CP1251, ONIG_OPTION_IGNORECASE,
"aBc", " AbC");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
" [a-c\337z] ", " SS ");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
" [\330-\341] ", " SS ");
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
"\337 ", " Ss ");
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
"SS ", " \337 ");
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
"\\A\\S\\z", "ss");
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_3, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_4, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_5, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_6, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_7, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_8, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_9, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_10, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_11, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_13, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_14, ONIG_OPTION_IGNORECASE,
"[ac]+", "bbbaAaCCC");
r = exec(ONIG_ENCODING_ISO_8859_15, ONIG_OPTION_IGNORECASE,
(char* )pattern, (char* )str);
r = exec(ONIG_ENCODING_ISO_8859_16, ONIG_OPTION_IGNORECASE,
(char* )pattern, (char* )str);
r = exec(ONIG_ENCODING_KOI8_R, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
r = exec(ONIG_ENCODING_EUC_TW, ONIG_OPTION_NONE, "b*a+?c+", "bbbaaaccc");
r = exec(ONIG_ENCODING_EUC_KR, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
r = exec(ONIG_ENCODING_EUC_CN, ONIG_OPTION_NONE, "c+", "bbbaaaccc");
r = exec(ONIG_ENCODING_BIG5, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"\337", "SS");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"SS", "\337");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"SSb\337ssc", "a\337bSS\337cd");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"[a\337]{0,2}", "aSS");
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"is", "iss");
r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_NONE, "a+",
"\000b\000a\000a\000a\000c\000c\000\000");
r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_LE,
ONIG_OPTION_NONE, "a+",
"b\000a\000a\000a\000a\000c\000\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_LE,
ONIG_OPTION_NONE,
"\000b\000a\000a\000a\000c\000c\000\000",
"x\000b\000a\000a\000a\000c\000c\000\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\337", "\000S\000S\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"SS", "\000\337\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_LE,
ONIG_OPTION_IGNORECASE,
"\337", "S\000S\000\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_BE,
ONIG_OPTION_IGNORECASE,
"SS", "\000\000\000\337\000\000\000\000");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_LE,
ONIG_OPTION_IGNORECASE,
"\337", "S\000\000\000S\000\000\000\000\000\000\000");
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
"\000[\000[\000:\000a\000l\000n\000u\000m\000:\000]\000]\000+\000\000",
"\000#\002\120\000a\000Z\012\077\012\076\012\075\000\000");
/* 0x0a3d == \012\075 : is not alnum */
/* 0x0a3e == \012\076 : is alnum */
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
"\000\\\000d\000+\000\000",
"\0003\0001\377\020\377\031\377\032\000\000");
r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_IGNORECASE,
"(Aa\\d)+", "BaA5Aa0234");
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_NONE,
"^\\P{Hiragana}\\p{^Hiragana}(\\p{Hiragana}+)$",
"\060\100\060\240\060\101\060\102\060\226\060\237\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000[\000\337\000]\000\000", "\000S\000S\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000[\000\337\000]\000\000", "\000s\000S\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000^\000[\000\001\000-\377\375\000]\000$\000\000",
"\000s\000S\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000S\000S\000\000",
"\000S\000T\000\337\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000S\000T\000S\000S\000\000",
"\000S\000t\000s\000S\000\000");
{
UChar pat[] = { 0x1f, 0xfc, 0x00, 0x00 };
UChar str1[] = { 0x21, 0x26, 0x1f, 0xbe, 0x00, 0x00 };
UChar str2[] = { 0x1f, 0xf3, 0x00, 0x00 };
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
(char* )pat, (char* )str1);
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
(char* )pat, (char* )str2);
}
#if 0
/* You should define USE_UNICODE_CASE_FOLD_TURKISH_AZERI in regenc.h. */
set_case_fold(ONIGENC_CASE_FOLD_TURKISH_AZERI);
r = exec_deluxe(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8,
ONIG_OPTION_IGNORECASE,
"Ii", "\304\261\304\260");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\000I\000i\000\000", "\001\061\001\060\000\000");
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
ONIG_OPTION_IGNORECASE,
"\001\061\001\060\000\000", "\000I\000i\000\000");
set_case_fold(ONIGENC_CASE_FOLD_MIN);
#endif
return 0;
}

107
oniguruma/sample/listcap.c Normal file
View file

@ -0,0 +1,107 @@
/*
* listcap.c
*
* capture history (?@...) sample.
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
static int
node_callback(int group, int beg, int end, int level, int at, void* arg)
{
int i;
if (at != ONIG_TRAVERSE_CALLBACK_AT_FIRST)
return -1; /* error */
/* indent */
for (i = 0; i < level * 2; i++)
fputc(' ', stderr);
fprintf(stderr, "%d: (%d-%d)\n", group, beg, end);
return 0;
}
extern int ex(unsigned char* str, unsigned char* pattern,
OnigSyntaxType* syntax)
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
fprintf(stderr, "number of captures: %d\n", onig_number_of_captures(reg));
fprintf(stderr, "number of capture histories: %d\n",
onig_number_of_capture_histories(reg));
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
}
fprintf(stderr, "\n");
r = onig_capture_tree_traverse(region, ONIG_TRAVERSE_CALLBACK_AT_FIRST,
node_callback, (void* )0);
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
return 0;
}
extern int main(int argc, char* argv[])
{
int r;
OnigSyntaxType syn;
static UChar* str1 = (UChar* )"((())())";
static UChar* pattern1
= (UChar* )"\\g<p>(?@<p>\\(\\g<s>\\)){0}(?@<s>(?:\\g<p>)*|){0}";
static UChar* str2 = (UChar* )"x00x00x00";
static UChar* pattern2 = (UChar* )"(?@x(?@\\d+))+";
static UChar* str3 = (UChar* )"0123";
static UChar* pattern3 = (UChar* )"(?@.)(?@.)(?@.)(?@.)";
/* enable capture hostory */
onig_copy_syntax(&syn, ONIG_SYNTAX_DEFAULT);
onig_set_syntax_op2(&syn,
onig_get_syntax_op2(&syn) | ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY);
r = ex(str1, pattern1, &syn);
r = ex(str2, pattern2, &syn);
r = ex(str3, pattern3, &syn);
onig_end();
return 0;
}

72
oniguruma/sample/names.c Normal file
View file

@ -0,0 +1,72 @@
/*
* names.c -- example of group name callback.
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
static int
name_callback(const UChar* name, const UChar* name_end,
int ngroup_num, int* group_nums,
regex_t* reg, void* arg)
{
int i, gn, ref;
char* s;
OnigRegion *region = (OnigRegion* )arg;
for (i = 0; i < ngroup_num; i++) {
gn = group_nums[i];
ref = onig_name_to_backref_number(reg, name, name_end, region);
s = (ref == gn ? "*" : "");
fprintf(stderr, "%s (%d): ", name, gn);
fprintf(stderr, "(%d-%d) %s\n", region->beg[gn], region->end[gn], s);
}
return 0; /* 0: continue */
}
extern int main(int argc, char* argv[])
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
static UChar* pattern = (UChar* )"(?<foo>a*)(?<bar>b*)(?<foo>c*)";
static UChar* str = (UChar* )"aaabbbbcc";
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
fprintf(stderr, "number of names: %d\n", onig_number_of_names(reg));
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
fprintf(stderr, "match at %d\n\n", r);
r = onig_foreach_name(reg, name_callback, (void* )region);
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
onig_end();
return 0;
}

93
oniguruma/sample/posix.c Normal file
View file

@ -0,0 +1,93 @@
/*
* posix.c
*/
#include <stdio.h>
#include "onigposix.h"
typedef unsigned char UChar;
static int x(regex_t* reg, unsigned char* pattern, unsigned char* str)
{
int r, i;
char buf[200];
regmatch_t pmatch[20];
r = regexec(reg, (char* )str, reg->re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
if (r == REG_NOMATCH) {
fprintf(stderr, "FAIL: /%s/ '%s'\n", pattern, str);
}
else {
fprintf(stderr, "OK: /%s/ '%s'\n", pattern, str);
for (i = 0; i <= (int )reg->re_nsub; i++) {
fprintf(stderr, "%d: %d-%d\n", i, pmatch[i].rm_so, pmatch[i].rm_eo);
}
}
return 0;
}
extern int main(int argc, char* argv[])
{
int r;
char buf[200];
regex_t reg;
UChar* pattern;
/* default syntax (ONIG_SYNTAX_RUBY) */
pattern = (UChar* )"^a+b{2,7}[c-f]?$|uuu";
r = regcomp(&reg, (char* )pattern, REG_EXTENDED);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"aaabbbbd");
/* POSIX Basic RE (REG_EXTENDED is not specified.) */
pattern = (UChar* )"^a+b{2,7}[c-f]?|uuu";
r = regcomp(&reg, (char* )pattern, 0);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"a+b{2,7}d?|uuu");
/* POSIX Basic RE (REG_EXTENDED is not specified.) */
pattern = (UChar* )"^a*b\\{2,7\\}\\([c-f]\\)$";
r = regcomp(&reg, (char* )pattern, 0);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"aaaabbbbbbd");
/* POSIX Extended RE */
onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED);
pattern = (UChar* )"^a+b{2,7}[c-f]?)$|uuu";
r = regcomp(&reg, (char* )pattern, REG_EXTENDED);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"aaabbbbd)");
pattern = (UChar* )"^b.";
r = regcomp(&reg, (char* )pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
return -1;
}
x(&reg, pattern, (UChar* )"a\nb\n");
regfree(&reg);
return 0;
}

56
oniguruma/sample/simple.c Normal file
View file

@ -0,0 +1,56 @@
/*
* simple.c
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
extern int main(int argc, char* argv[])
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
static UChar* pattern = (UChar* )"a(.*)b|[e-f]+";
static UChar* str = (UChar* )"zzzzaffffffffb";
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
onig_end();
return 0;
}

73
oniguruma/sample/sql.c Normal file
View file

@ -0,0 +1,73 @@
/*
* sql.c
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
extern int main(int argc, char* argv[])
{
static OnigSyntaxType SQLSyntax;
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
static UChar* pattern = (UChar* )"\\_%\\\\__zz";
static UChar* str = (UChar* )"a_abcabcabc\\ppzz";
onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS);
onig_set_syntax_op2 (&SQLSyntax, 0);
onig_set_syntax_behavior(&SQLSyntax, 0);
onig_set_syntax_options (&SQLSyntax, ONIG_OPTION_MULTILINE);
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ESCAPE, (OnigCodePoint )'\\');
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR, (OnigCodePoint )'_');
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYTIME,
ONIG_INEFFECTIVE_META_CHAR);
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ZERO_OR_ONE_TIME,
ONIG_INEFFECTIVE_META_CHAR);
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ONE_OR_MORE_TIME,
ONIG_INEFFECTIVE_META_CHAR);
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR_ANYTIME,
(OnigCodePoint )'%');
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, &SQLSyntax, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
onig_end();
return 0;
}

74
oniguruma/sample/syntax.c Normal file
View file

@ -0,0 +1,74 @@
/*
* syntax.c
*/
#include <stdio.h>
#include <string.h>
#include "oniguruma.h"
extern int exec(OnigSyntaxType* syntax,
char* apattern, char* astr)
{
int r;
unsigned char *start, *range, *end;
regex_t* reg;
OnigErrorInfo einfo;
OnigRegion *region;
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
if (r != ONIG_NORMAL) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
region = onig_region_new();
end = str + strlen((char* )str);
start = str;
range = end;
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
if (r >= 0) {
int i;
fprintf(stderr, "match at %d\n", r);
for (i = 0; i < region->num_regs; i++) {
fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
}
}
else if (r == ONIG_MISMATCH) {
fprintf(stderr, "search fail\n");
}
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
return -1;
}
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
onig_free(reg);
onig_end();
return 0;
}
extern int main(int argc, char* argv[])
{
int r;
r = exec(ONIG_SYNTAX_PERL,
"\\p{XDigit}\\P{XDigit}\\p{^XDigit}\\P{^XDigit}\\p{XDigit}",
"bgh3a");
r = exec(ONIG_SYNTAX_JAVA,
"\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc");
r = exec(ONIG_SYNTAX_ASIS,
"abc def* e+ g?ddd[a-rvvv] (vv){3,7}hv\\dvv(?:aczui ss)\\W\\w$",
"abc def* e+ g?ddd[a-rvvv] (vv){3,7}hv\\dvv(?:aczui ss)\\W\\w$");
onig_end();
return 0;
}

578
oniguruma/st.c Normal file
View file

@ -0,0 +1,578 @@
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#include <malloc.h>
#endif
#include "regint.h"
#include "st.h"
typedef struct st_table_entry st_table_entry;
struct st_table_entry {
unsigned int hash;
st_data_t key;
st_data_t record;
st_table_entry *next;
};
#define ST_DEFAULT_MAX_DENSITY 5
#define ST_DEFAULT_INIT_TABLE_SIZE 11
/*
* DEFAULT_MAX_DENSITY is the default for the largest we allow the
* average number of items per bin before increasing the number of
* bins
*
* DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
* allocated initially
*
*/
static int numcmp(long, long);
static int numhash(long);
static struct st_hash_type type_numhash = {
numcmp,
numhash,
};
/* extern int strcmp(const char *, const char *); */
static int strhash(const char *);
static struct st_hash_type type_strhash = {
strcmp,
strhash,
};
static void rehash(st_table *);
#define alloc(type) (type*)xmalloc((unsigned)sizeof(type))
#define Calloc(n,s) (char*)xcalloc((n),(s))
#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key))
#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)
/*
* MINSIZE is the minimum size of a dictionary.
*/
#define MINSIZE 8
/*
Table of prime numbers 2^n+a, 2<=n<=30.
*/
static const long primes[] = {
8 + 3,
16 + 3,
32 + 5,
64 + 3,
128 + 3,
256 + 27,
512 + 9,
1024 + 9,
2048 + 5,
4096 + 3,
8192 + 27,
16384 + 43,
32768 + 3,
65536 + 45,
131072 + 29,
262144 + 3,
524288 + 21,
1048576 + 7,
2097152 + 17,
4194304 + 15,
8388608 + 9,
16777216 + 43,
33554432 + 35,
67108864 + 15,
134217728 + 29,
268435456 + 3,
536870912 + 11,
1073741824 + 85,
0
};
static int
new_size(size)
int size;
{
int i;
#if 0
for (i=3; i<31; i++) {
if ((1<<i) > size) return 1<<i;
}
return -1;
#else
int newsize;
for (i = 0, newsize = MINSIZE;
i < (int )(sizeof(primes)/sizeof(primes[0]));
i++, newsize <<= 1)
{
if (newsize > size) return primes[i];
}
/* Ran out of polynomials */
return -1; /* should raise exception */
#endif
}
#ifdef HASH_LOG
static int collision = 0;
static int init_st = 0;
static void
stat_col()
{
FILE *f = fopen("/tmp/col", "w");
fprintf(f, "collision: %d\n", collision);
fclose(f);
}
#endif
st_table*
st_init_table_with_size(type, size)
struct st_hash_type *type;
int size;
{
st_table *tbl;
#ifdef HASH_LOG
if (init_st == 0) {
init_st = 1;
atexit(stat_col);
}
#endif
size = new_size(size); /* round up to prime number */
tbl = alloc(st_table);
tbl->type = type;
tbl->num_entries = 0;
tbl->num_bins = size;
tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
return tbl;
}
st_table*
st_init_table(type)
struct st_hash_type *type;
{
return st_init_table_with_size(type, 0);
}
st_table*
st_init_numtable(void)
{
return st_init_table(&type_numhash);
}
st_table*
st_init_numtable_with_size(size)
int size;
{
return st_init_table_with_size(&type_numhash, size);
}
st_table*
st_init_strtable(void)
{
return st_init_table(&type_strhash);
}
st_table*
st_init_strtable_with_size(size)
int size;
{
return st_init_table_with_size(&type_strhash, size);
}
void
st_free_table(table)
st_table *table;
{
register st_table_entry *ptr, *next;
int i;
for(i = 0; i < table->num_bins; i++) {
ptr = table->bins[i];
while (ptr != 0) {
next = ptr->next;
free(ptr);
ptr = next;
}
}
free(table->bins);
free(table);
}
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key)))
#ifdef HASH_LOG
#define COLLISION collision++
#else
#define COLLISION
#endif
#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
bin_pos = hash_val%(table)->num_bins;\
ptr = (table)->bins[bin_pos];\
if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
COLLISION;\
while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
ptr = ptr->next;\
}\
ptr = ptr->next;\
}\
} while (0)
int
st_lookup(table, key, value)
st_table *table;
register st_data_t key;
st_data_t *value;
{
unsigned int hash_val, bin_pos;
register st_table_entry *ptr;
hash_val = do_hash(key, table);
FIND_ENTRY(table, ptr, hash_val, bin_pos);
if (ptr == 0) {
return 0;
}
else {
if (value != 0) *value = ptr->record;
return 1;
}
}
#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
do {\
st_table_entry *entry;\
if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
rehash(table);\
bin_pos = hash_val % table->num_bins;\
}\
\
entry = alloc(st_table_entry);\
\
entry->hash = hash_val;\
entry->key = key;\
entry->record = value;\
entry->next = table->bins[bin_pos];\
table->bins[bin_pos] = entry;\
table->num_entries++;\
} while (0)
int
st_insert(table, key, value)
register st_table *table;
register st_data_t key;
st_data_t value;
{
unsigned int hash_val, bin_pos;
register st_table_entry *ptr;
hash_val = do_hash(key, table);
FIND_ENTRY(table, ptr, hash_val, bin_pos);
if (ptr == 0) {
ADD_DIRECT(table, key, value, hash_val, bin_pos);
return 0;
}
else {
ptr->record = value;
return 1;
}
}
void
st_add_direct(table, key, value)
st_table *table;
st_data_t key;
st_data_t value;
{
unsigned int hash_val, bin_pos;
hash_val = do_hash(key, table);
bin_pos = hash_val % table->num_bins;
ADD_DIRECT(table, key, value, hash_val, bin_pos);
}
static void
rehash(table)
register st_table *table;
{
register st_table_entry *ptr, *next, **new_bins;
int i, old_num_bins = table->num_bins, new_num_bins;
unsigned int hash_val;
new_num_bins = new_size(old_num_bins+1);
new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
for(i = 0; i < old_num_bins; i++) {
ptr = table->bins[i];
while (ptr != 0) {
next = ptr->next;
hash_val = ptr->hash % new_num_bins;
ptr->next = new_bins[hash_val];
new_bins[hash_val] = ptr;
ptr = next;
}
}
free(table->bins);
table->num_bins = new_num_bins;
table->bins = new_bins;
}
st_table*
st_copy(old_table)
st_table *old_table;
{
st_table *new_table;
st_table_entry *ptr, *entry;
int i, num_bins = old_table->num_bins;
new_table = alloc(st_table);
if (new_table == 0) {
return 0;
}
*new_table = *old_table;
new_table->bins = (st_table_entry**)
Calloc((unsigned)num_bins, sizeof(st_table_entry*));
if (new_table->bins == 0) {
free(new_table);
return 0;
}
for(i = 0; i < num_bins; i++) {
new_table->bins[i] = 0;
ptr = old_table->bins[i];
while (ptr != 0) {
entry = alloc(st_table_entry);
if (entry == 0) {
free(new_table->bins);
free(new_table);
return 0;
}
*entry = *ptr;
entry->next = new_table->bins[i];
new_table->bins[i] = entry;
ptr = ptr->next;
}
}
return new_table;
}
int
st_delete(table, key, value)
register st_table *table;
register st_data_t *key;
st_data_t *value;
{
unsigned int hash_val;
st_table_entry *tmp;
register st_table_entry *ptr;
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];
if (ptr == 0) {
if (value != 0) *value = 0;
return 0;
}
if (EQUAL(table, *key, ptr->key)) {
table->bins[hash_val] = ptr->next;
table->num_entries--;
if (value != 0) *value = ptr->record;
*key = ptr->key;
free(ptr);
return 1;
}
for(; ptr->next != 0; ptr = ptr->next) {
if (EQUAL(table, ptr->next->key, *key)) {
tmp = ptr->next;
ptr->next = ptr->next->next;
table->num_entries--;
if (value != 0) *value = tmp->record;
*key = tmp->key;
free(tmp);
return 1;
}
}
return 0;
}
int
st_delete_safe(table, key, value, never)
register st_table *table;
register st_data_t *key;
st_data_t *value;
st_data_t never;
{
unsigned int hash_val;
register st_table_entry *ptr;
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];
if (ptr == 0) {
if (value != 0) *value = 0;
return 0;
}
for(; ptr != 0; ptr = ptr->next) {
if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
table->num_entries--;
*key = ptr->key;
if (value != 0) *value = ptr->record;
ptr->key = ptr->record = never;
return 1;
}
}
return 0;
}
static int
#if defined(__GNUC__)
delete_never(st_data_t key __attribute__ ((unused)), st_data_t value,
st_data_t never)
#else
delete_never(key, value, never)
st_data_t key, value, never;
#endif
{
if (value == never) return ST_DELETE;
return ST_CONTINUE;
}
void
st_cleanup_safe(table, never)
st_table *table;
st_data_t never;
{
int num_entries = table->num_entries;
st_foreach(table, delete_never, never);
table->num_entries = num_entries;
}
int
st_foreach(table, func, arg)
st_table *table;
int (*func)();
st_data_t arg;
{
st_table_entry *ptr, *last, *tmp;
enum st_retval retval;
int i;
for(i = 0; i < table->num_bins; i++) {
last = 0;
for(ptr = table->bins[i]; ptr != 0;) {
retval = (*func)(ptr->key, ptr->record, arg);
switch (retval) {
case ST_CHECK: /* check if hash is modified during iteration */
tmp = 0;
if (i < table->num_bins) {
for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
if (tmp == ptr) break;
}
}
if (!tmp) {
/* call func with error notice */
return 1;
}
/* fall through */
case ST_CONTINUE:
last = ptr;
ptr = ptr->next;
break;
case ST_STOP:
return 0;
case ST_DELETE:
tmp = ptr;
if (last == 0) {
table->bins[i] = ptr->next;
}
else {
last->next = ptr->next;
}
ptr = ptr->next;
free(tmp);
table->num_entries--;
}
}
}
return 0;
}
static int
strhash(string)
register const char *string;
{
register int c;
#ifdef HASH_ELFHASH
register unsigned int h = 0, g;
while ((c = *string++) != '\0') {
h = ( h << 4 ) + c;
if ( g = h & 0xF0000000 )
h ^= g >> 24;
h &= ~g;
}
return h;
#elif HASH_PERL
register int val = 0;
while ((c = *string++) != '\0') {
val += c;
val += (val << 10);
val ^= (val >> 6);
}
val += (val << 3);
val ^= (val >> 11);
return val + (val << 15);
#else
register int val = 0;
while ((c = *string++) != '\0') {
val = val*997 + c;
}
return val + (val>>5);
#endif
}
static int
numcmp(x, y)
long x, y;
{
return x != y;
}
static int
numhash(n)
long n;
{
return n;
}

63
oniguruma/st.h Normal file
View file

@ -0,0 +1,63 @@
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
/* @(#) st.h 5.1 89/12/14 */
#ifndef ST_INCLUDED
#define ST_INCLUDED
typedef unsigned long st_data_t;
#define ST_DATA_T_DEFINED
typedef struct st_table st_table;
struct st_hash_type {
int (*compare)();
int (*hash)();
};
struct st_table {
struct st_hash_type *type;
int num_bins;
int num_entries;
struct st_table_entry **bins;
};
#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
#ifndef _
# define _(args) args
#endif
#ifndef ANYARGS
# ifdef __cplusplus
# define ANYARGS ...
# else
# define ANYARGS
# endif
#endif
st_table *st_init_table _((struct st_hash_type *));
st_table *st_init_table_with_size _((struct st_hash_type *, int));
st_table *st_init_numtable _((void));
st_table *st_init_numtable_with_size _((int));
st_table *st_init_strtable _((void));
st_table *st_init_strtable_with_size _((int));
int st_delete _((st_table *, st_data_t *, st_data_t *));
int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
int st_insert _((st_table *, st_data_t, st_data_t));
int st_lookup _((st_table *, st_data_t, st_data_t *));
int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
void st_add_direct _((st_table *, st_data_t, st_data_t));
void st_free_table _((st_table *));
void st_cleanup_safe _((st_table *, st_data_t));
st_table *st_copy _((st_table *));
#define ST_NUMCMP ((int (*)()) 0)
#define ST_NUMHASH ((int (*)()) -2)
#define st_numcmp ST_NUMCMP
#define st_numhash ST_NUMHASH
#endif /* ST_INCLUDED */

863
oniguruma/testc.c Normal file
View file

@ -0,0 +1,863 @@
/*
* This program was generated by testconv.rb.
*/
#include "config.h"
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#include <stdio.h>
#ifdef POSIX_TEST
#include "onigposix.h"
#else
#include "oniguruma.h"
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#define SLEN(s) strlen(s)
static int nsucc = 0;
static int nfail = 0;
static int nerror = 0;
static FILE* err_file;
#ifndef POSIX_TEST
static OnigRegion* region;
#endif
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
{
int r;
#ifdef POSIX_TEST
regex_t reg;
char buf[200];
regmatch_t pmatch[25];
r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
if (r == REG_NOMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
nfail++;
}
else {
if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
nfail++;
}
}
}
regfree(&reg);
#else
regex_t* reg;
OnigErrorInfo einfo;
r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_EUC_JP, ONIG_SYNTAX_DEFAULT, &einfo);
if (r) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r, &einfo);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
(UChar* )str, (UChar* )(str + SLEN(str)),
region, ONIG_OPTION_NONE);
if (r < ONIG_MISMATCH) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
if (r == ONIG_MISMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
nfail++;
}
else {
if (region->beg[mem] == from && region->end[mem] == to) {
fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
from, to, region->beg[mem], region->end[mem]);
nfail++;
}
}
}
onig_free(reg);
#endif
}
static void x2(char* pattern, char* str, int from, int to)
{
xx(pattern, str, from, to, 0, 0);
}
static void x3(char* pattern, char* str, int from, int to, int mem)
{
xx(pattern, str, from, to, mem, 0);
}
static void n(char* pattern, char* str)
{
xx(pattern, str, 0, 0, 0, 1);
}
extern int main(int argc, char* argv[])
{
err_file = stdout;
#ifdef POSIX_TEST
reg_set_encoding(REG_POSIX_ENCODING_EUC_JP);
#else
region = onig_region_new();
#endif
x2("", "", 0, 0);
x2("^", "", 0, 0);
x2("$", "", 0, 0);
x2("\\G", "", 0, 0);
x2("\\A", "", 0, 0);
x2("\\Z", "", 0, 0);
x2("\\z", "", 0, 0);
x2("^$", "", 0, 0);
x2("\\ca", "\001", 0, 1);
x2("\\C-b", "\002", 0, 1);
x2("\\c\\\\", "\034", 0, 1);
x2("q[\\c\\\\]", "q\034", 0, 2);
x2("", "a", 0, 0);
x2("a", "a", 0, 1);
x2("\\x61", "a", 0, 1);
x2("aa", "aa", 0, 2);
x2("aaa", "aaa", 0, 3);
x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
x2("ab", "ab", 0, 2);
x2("b", "ab", 1, 2);
x2("bc", "abc", 1, 3);
x2("(?i:#RET#)", "#INS##RET#", 5, 10);
x2("\\17", "\017", 0, 1);
x2("\\x1f", "\x1f", 0, 1);
x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
x2(".", "a", 0, 1);
n(".", "");
x2("..", "ab", 0, 2);
x2("\\w", "e", 0, 1);
n("\\W", "e");
x2("\\s", " ", 0, 1);
x2("\\S", "b", 0, 1);
x2("\\d", "4", 0, 1);
n("\\D", "4");
x2("\\b", "z ", 0, 0);
x2("\\b", " z", 1, 1);
x2("\\B", "zz ", 1, 1);
x2("\\B", "z ", 2, 2);
x2("\\B", " z", 0, 0);
x2("[ab]", "b", 0, 1);
n("[ab]", "c");
x2("[a-z]", "t", 0, 1);
n("[^a]", "a");
x2("[^a]", "\n", 0, 1);
x2("[]]", "]", 0, 1);
n("[^]]", "]");
x2("[\\^]+", "0^^1", 1, 3);
x2("[b-]", "b", 0, 1);
x2("[b-]", "-", 0, 1);
x2("[\\w]", "z", 0, 1);
n("[\\w]", " ");
x2("[\\W]", "b$", 1, 2);
x2("[\\d]", "5", 0, 1);
n("[\\d]", "e");
x2("[\\D]", "t", 0, 1);
n("[\\D]", "3");
x2("[\\s]", " ", 0, 1);
n("[\\s]", "a");
x2("[\\S]", "b", 0, 1);
n("[\\S]", " ");
x2("[\\w\\d]", "2", 0, 1);
n("[\\w\\d]", " ");
x2("[[:upper:]]", "B", 0, 1);
x2("[*[:xdigit:]+]", "+", 0, 1);
x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
x2("[*[:xdigit:]+]", "-@^+", 3, 4);
n("[[:upper]]", "A");
x2("[[:upper]]", ":", 0, 1);
x2("[\\044-\\047]", "\046", 0, 1);
x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
n("[\\x6A-\\x6D]", "\x6E");
n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
x2("[\\[]", "[", 0, 1);
x2("[\\]]", "]", 0, 1);
x2("[&]", "&", 0, 1);
x2("[[ab]]", "b", 0, 1);
x2("[[ab]c]", "c", 0, 1);
n("[[^a]]", "a");
n("[^[a]]", "a");
x2("[[ab]&&bc]", "b", 0, 1);
n("[[ab]&&bc]", "a");
n("[[ab]&&bc]", "c");
x2("[a-z&&b-y&&c-x]", "w", 0, 1);
n("[^a-z&&b-y&&c-x]", "w");
x2("[[^a&&a]&&a-z]", "b", 0, 1);
n("[[^a&&a]&&a-z]", "a");
x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
n("[[^a-z&&bcdef]&&[^c-g]]", "c");
x2("[^[^abc]&&[^cde]]", "c", 0, 1);
x2("[^[^abc]&&[^cde]]", "e", 0, 1);
n("[^[^abc]&&[^cde]]", "f");
x2("[a-&&-a]", "-", 0, 1);
n("[a\\-&&\\-a]", "&");
n("\\wabc", " abc");
x2("a\\Wbc", "a bc", 0, 4);
x2("a.b.c", "aabbc", 0, 5);
x2(".\\wb\\W..c", "abb bcc", 0, 7);
x2("\\s\\wzzz", " zzzz", 0, 5);
x2("aa.b", "aabb", 0, 4);
n(".a", "ab");
x2(".a", "aa", 0, 2);
x2("^a", "a", 0, 1);
x2("^a$", "a", 0, 1);
x2("^\\w$", "a", 0, 1);
n("^\\w$", " ");
x2("^\\wab$", "zab", 0, 3);
x2("^\\wabcdef$", "zabcdef", 0, 7);
x2("^\\w...def$", "zabcdef", 0, 7);
x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
x2("\\A\\Z", "", 0, 0);
x2("\\Axyz", "xyz", 0, 3);
x2("xyz\\Z", "xyz", 0, 3);
x2("xyz\\z", "xyz", 0, 3);
x2("a\\Z", "a", 0, 1);
x2("\\Gaz", "az", 0, 2);
n("\\Gz", "bza");
n("az\\G", "az");
n("az\\A", "az");
n("a\\Az", "az");
x2("\\^\\$", "^$", 0, 2);
x2("^x?y", "xy", 0, 2);
x2("^(x?y)", "xy", 0, 2);
x2("\\w", "_", 0, 1);
n("\\W", "_");
x2("(?=z)z", "z", 0, 1);
n("(?=z).", "a");
x2("(?!z)a", "a", 0, 1);
n("(?!z)a", "z");
x2("(?i:a)", "a", 0, 1);
x2("(?i:a)", "A", 0, 1);
x2("(?i:A)", "a", 0, 1);
n("(?i:A)", "b");
x2("(?i:[A-Z])", "a", 0, 1);
x2("(?i:[f-m])", "H", 0, 1);
x2("(?i:[f-m])", "h", 0, 1);
n("(?i:[f-m])", "e");
x2("(?i:[A-c])", "D", 0, 1);
n("(?i:[^a-z])", "A");
n("(?i:[^a-z])", "a");
x2("(?i:[!-k])", "Z", 0, 1);
x2("(?i:[!-k])", "7", 0, 1);
x2("(?i:[T-}])", "b", 0, 1);
x2("(?i:[T-}])", "{", 0, 1);
x2("(?i:\\?a)", "?A", 0, 2);
x2("(?i:\\*A)", "*a", 0, 2);
n(".", "\n");
x2("(?m:.)", "\n", 0, 1);
x2("(?m:a.)", "a\n", 0, 2);
x2("(?m:.b)", "a\nb", 1, 3);
x2(".*abc", "dddabdd\nddabc", 8, 13);
x2("(?m:.*abc)", "dddabddabc", 0, 10);
n("(?i)(?-i)a", "A");
n("(?i)(?-i:a)", "A");
x2("a?", "", 0, 0);
x2("a?", "b", 0, 0);
x2("a?", "a", 0, 1);
x2("a*", "", 0, 0);
x2("a*", "a", 0, 1);
x2("a*", "aaa", 0, 3);
x2("a*", "baaaa", 0, 0);
n("a+", "");
x2("a+", "a", 0, 1);
x2("a+", "aaaa", 0, 4);
x2("a+", "aabbb", 0, 2);
x2("a+", "baaaa", 1, 5);
x2(".?", "", 0, 0);
x2(".?", "f", 0, 1);
x2(".?", "\n", 0, 0);
x2(".*", "", 0, 0);
x2(".*", "abcde", 0, 5);
x2(".+", "z", 0, 1);
x2(".+", "zdswer\n", 0, 6);
x2("(.*)a\\1f", "babfbac", 0, 4);
x2("(.*)a\\1f", "bacbabf", 3, 7);
x2("((.*)a\\2f)", "bacbabf", 3, 7);
x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
x2("a|b", "a", 0, 1);
x2("a|b", "b", 0, 1);
x2("|a", "a", 0, 0);
x2("(|a)", "a", 0, 0);
x2("ab|bc", "ab", 0, 2);
x2("ab|bc", "bc", 0, 2);
x2("z(?:ab|bc)", "zbc", 0, 3);
x2("a(?:ab|bc)c", "aabc", 0, 4);
x2("ab|(?:ac|az)", "az", 0, 2);
x2("a|b|c", "dc", 1, 2);
x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
x2("a|^z", "ba", 1, 2);
x2("a|^z", "za", 0, 1);
x2("a|\\Gz", "bza", 2, 3);
x2("a|\\Gz", "za", 0, 1);
x2("a|\\Az", "bza", 2, 3);
x2("a|\\Az", "za", 0, 1);
x2("a|b\\Z", "ba", 1, 2);
x2("a|b\\Z", "b", 0, 1);
x2("a|b\\z", "ba", 1, 2);
x2("a|b\\z", "b", 0, 1);
x2("\\w|\\s", " ", 0, 1);
n("\\w|\\w", " ");
x2("\\w|%", "%", 0, 1);
x2("\\w|[&$]", "&", 0, 1);
x2("[b-d]|[^e-z]", "a", 0, 1);
x2("(?:a|[c-f])|bz", "dz", 0, 1);
x2("(?:a|[c-f])|bz", "bz", 0, 2);
x2("abc|(?=zz)..f", "zzf", 0, 3);
x2("abc|(?!zz)..f", "abf", 0, 3);
x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
n("(?>a|abd)c", "abdc");
x2("(?>abd|a)c", "abdc", 0, 4);
x2("a?|b", "a", 0, 1);
x2("a?|b", "b", 0, 0);
x2("a?|b", "", 0, 0);
x2("a*|b", "aa", 0, 2);
x2("a*|b*", "ba", 0, 0);
x2("a*|b*", "ab", 0, 1);
x2("a+|b*", "", 0, 0);
x2("a+|b*", "bbb", 0, 3);
x2("a+|b*", "abbb", 0, 1);
n("a+|b+", "");
x2("(a|b)?", "b", 0, 1);
x2("(a|b)*", "ba", 0, 2);
x2("(a|b)+", "bab", 0, 3);
x2("(ab|ca)+", "caabbc", 0, 4);
x2("(ab|ca)+", "aabca", 1, 5);
x2("(ab|ca)+", "abzca", 0, 2);
x2("(a|bab)+", "ababa", 0, 5);
x2("(a|bab)+", "ba", 1, 2);
x2("(a|bab)+", "baaaba", 1, 4);
x2("(?:a|b)(?:a|b)", "ab", 0, 2);
x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
x2("(?:a+|b+){2}", "aaabbb", 0, 6);
x2("h{0,}", "hhhh", 0, 4);
x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
n("ax{2}*a", "0axxxa1");
n("a.{0,2}a", "0aXXXa0");
n("a.{0,2}?a", "0aXXXa0");
n("a.{0,2}?a", "0aXXXXa0");
x2("^a{2,}?a$", "aaa", 0, 3);
x2("^[a-z]{2,}?$", "aaa", 0, 3);
x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
n("(?:a+|\\Ab*)cc", "abcc");
x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
x2("a|(?i)c", "C", 0, 1);
x2("(?i)c|a", "C", 0, 1);
x2("(?i)c|a", "A", 0, 1);
x2("(?i:c)|a", "C", 0, 1);
n("(?i:c)|a", "A");
x2("[abc]?", "abc", 0, 1);
x2("[abc]*", "abc", 0, 3);
x2("[^abc]*", "abc", 0, 0);
n("[^abc]+", "abc");
x2("a?\?", "aaa", 0, 0);
x2("ba?\?b", "bab", 0, 3);
x2("a*?", "aaa", 0, 0);
x2("ba*?", "baa", 0, 1);
x2("ba*?b", "baab", 0, 4);
x2("a+?", "aaa", 0, 1);
x2("ba+?", "baa", 0, 2);
x2("ba+?b", "baab", 0, 4);
x2("(?:a?)?\?", "a", 0, 0);
x2("(?:a?\?)?", "a", 0, 0);
x2("(?:a?)+?", "aaa", 0, 1);
x2("(?:a+)?\?", "aaa", 0, 0);
x2("(?:a+)?\?b", "aaab", 0, 4);
x2("(?:ab)?{2}", "", 0, 0);
x2("(?:ab)?{2}", "ababa", 0, 4);
x2("(?:ab)*{0}", "ababa", 0, 0);
x2("(?:ab){3,}", "abababab", 0, 8);
n("(?:ab){3,}", "abab");
x2("(?:ab){2,4}", "ababab", 0, 6);
x2("(?:ab){2,4}", "ababababab", 0, 8);
x2("(?:ab){2,4}?", "ababababab", 0, 4);
x2("(?:ab){,}", "ab{,}", 0, 5);
x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
x2("(d+)([^abc]z)", "dddz", 0, 4);
x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
x2("(\\w+)(\\wz)", "dddz", 0, 4);
x3("(a)", "a", 0, 1, 1);
x3("(ab)", "ab", 0, 2, 1);
x2("((ab))", "ab", 0, 2);
x3("((ab))", "ab", 0, 2, 1);
x3("((ab))", "ab", 0, 2, 2);
x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
x3("(ab)(cd)", "abcd", 0, 2, 1);
x3("(ab)(cd)", "abcd", 2, 4, 2);
x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
x2("(^a)", "a", 0, 1);
x3("(a)|(a)", "ba", 1, 2, 1);
x3("(^a)|(a)", "ba", 1, 2, 2);
x3("(a?)", "aaa", 0, 1, 1);
x3("(a*)", "aaa", 0, 3, 1);
x3("(a*)", "", 0, 0, 1);
x3("(a+)", "aaaaaaa", 0, 7, 1);
x3("(a+|b*)", "bbbaa", 0, 3, 1);
x3("(a+|b?)", "bbbaa", 0, 1, 1);
x3("(abc)?", "abc", 0, 3, 1);
x3("(abc)*", "abc", 0, 3, 1);
x3("(abc)+", "abc", 0, 3, 1);
x3("(xyz|abc)+", "abc", 0, 3, 1);
x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
x3("((?i:abc))", "AbC", 0, 3, 1);
x2("(abc)(?i:\\1)", "abcABC", 0, 6);
x3("((?m:a.c))", "a\nc", 0, 3, 1);
x3("((?=az)a)", "azb", 0, 1, 1);
x3("abc|(.abd)", "zabd", 0, 4, 1);
x2("(?:abc)|(ABC)", "abc", 0, 3);
x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
x3("a*(.)", "aaaaz", 4, 5, 1);
x3("a*?(.)", "aaaaz", 0, 1, 1);
x3("a*?(c)", "aaaac", 4, 5, 1);
x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
x3("(\\Abb)cc", "bbcc", 0, 2, 1);
n("(\\Abb)cc", "zbbcc");
x3("(^bb)cc", "bbcc", 0, 2, 1);
n("(^bb)cc", "zbbcc");
x3("cc(bb$)", "ccbb", 2, 4, 1);
n("cc(bb$)", "ccbbb");
n("(\\1)", "");
n("\\1(a)", "aa");
n("(a(b)\\1)\\2+", "ababb");
n("(?:(?:\\1|z)(a))+$", "zaa");
x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
x2("(a)(?=\\1)", "aa", 0, 1);
n("(a)$|\\1", "az");
x2("(a)\\1", "aa", 0, 2);
n("(a)\\1", "ab");
x2("(a?)\\1", "aa", 0, 2);
x2("(a?\?)\\1", "aa", 0, 0);
x2("(a*)\\1", "aaaaa", 0, 4);
x3("(a*)\\1", "aaaaa", 0, 2, 1);
x2("a(b*)\\1", "abbbb", 0, 5);
x2("a(b*)\\1", "ab", 0, 1);
x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
x2("([a-d])\\1", "cc", 0, 2);
x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
n("(\\w\\d\\s)\\1", "f5 f5");
x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
x2("(^a)\\1", "aa", 0, 2);
n("(^a)\\1", "baa");
n("(a$)\\1", "aa");
n("(ab\\Z)\\1", "ab");
x2("(a*\\Z)\\1", "a", 1, 1);
x2(".(a*\\Z)\\1", "ba", 1, 2);
x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
x2("((?i:az))\\1", "AzAz", 0, 4);
n("((?i:az))\\1", "Azaz");
x2("(?<=a)b", "ab", 1, 2);
n("(?<=a)b", "bb");
x2("(?<=a|b)b", "bb", 1, 2);
x2("(?<=a|bc)b", "bcb", 2, 3);
x2("(?<=a|bc)b", "ab", 1, 2);
x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
x2("(a)\\g<1>", "aa", 0, 2);
x2("(?<!a)b", "cb", 1, 2);
n("(?<!a)b", "ab");
x2("(?<!a|bc)b", "bbb", 0, 1);
n("(?<!a|bc)z", "bcz");
x2("(?<name1>a)", "a", 0, 1);
x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
x2("(?<n>|a\\g<n>)+", "", 0, 0);
x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
x2("()*\\1", "", 0, 0);
x2("(?:()|())*\\1\\2", "", 0, 0);
x3("(?:\\1a|())*", "a", 0, 0, 1);
x2("x((.)*)*x", "0x1x2x3", 1, 6);
x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
x2("\\xED\\xF2", "\xed\xf2", 0, 2);
x2("", "", 0, 0);
x2("", "", 0, 2);
n("", "");
x2("うう", "うう", 0, 4);
x2("あいう", "あいう", 0, 6);
x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70);
x2("", "いあ", 2, 4);
x2("いう", "あいう", 2, 6);
x2("\\xca\\xb8", "\xca\xb8", 0, 2);
x2(".", "", 0, 2);
x2("..", "かき", 0, 4);
x2("\\w", "", 0, 2);
n("\\W", "");
x2("[\\W]", "う$", 2, 3);
x2("\\S", "", 0, 2);
x2("\\S", "", 0, 2);
x2("\\b", "", 0, 0);
x2("\\b", "", 1, 1);
x2("\\B", "せそ ", 2, 2);
x2("\\B", "", 3, 3);
x2("\\B", "", 0, 0);
x2("[たち]", "", 0, 2);
n("[なに]", "");
x2("[う-お]", "", 0, 2);
n("[^け]", "");
x2("[\\w]", "", 0, 2);
n("[\\d]", "");
x2("[\\D]", "", 0, 2);
n("[\\s]", "");
x2("[\\S]", "", 0, 2);
x2("[\\w\\d]", "", 0, 2);
x2("[\\w\\d]", "", 3, 5);
n("\\w鬼車", " 鬼車");
x2("\\W車", "鬼 車", 0, 5);
x2("あ.い.う", "ああいいう", 0, 10);
x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13);
x2("\\s\\wこここ", " ここここ", 0, 9);
x2("ああ.け", "ああけけ", 0, 8);
n(".い", "いえ");
x2(".お", "おお", 0, 4);
x2("^あ", "", 0, 2);
x2("^む$", "", 0, 2);
x2("^\\w$", "", 0, 2);
x2("^\\wかきくけこ$", "zかきくけこ", 0, 11);
x2("^\\w...うえお$", "zあいううえお", 0, 13);
x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12);
x2("\\Aたちつ", "たちつ", 0, 6);
x2("むめも\\Z", "むめも", 0, 6);
x2("かきく\\z", "かきく", 0, 6);
x2("かきく\\Z", "かきく\n", 0, 6);
x2("\\Gぽぴ", "ぽぴ", 0, 4);
n("\\Gえ", "うえお");
n("とて\\G", "とて");
n("まみ\\A", "まみ");
n("\\Aみ", "まみ");
x2("(?=せ)せ", "", 0, 2);
n("(?=う).", "");
x2("(?!う)か", "", 0, 2);
n("(?!と)あ", "");
x2("(?i:あ)", "", 0, 2);
x2("(?i:ぶべ)", "ぶべ", 0, 4);
n("(?i:い)", "");
x2("(?m:よ.)", "\n", 0, 3);
x2("(?m:.め)", "\n", 2, 5);
x2("あ?", "", 0, 0);
x2("変?", "", 0, 0);
x2("変?", "", 0, 2);
x2("量*", "", 0, 0);
x2("量*", "", 0, 2);
x2("子*", "子子子", 0, 6);
x2("馬*", "鹿馬馬馬馬", 0, 0);
n("山+", "");
x2("河+", "", 0, 2);
x2("時+", "時時時時", 0, 8);
x2("え+", "ええううう", 0, 4);
x2("う+", "おうううう", 2, 10);
x2(".?", "", 0, 2);
x2(".*", "ぱぴぷぺ", 0, 8);
x2(".+", "", 0, 2);
x2(".+", "いうえか\n", 0, 8);
x2("あ|い", "", 0, 2);
x2("あ|い", "", 0, 2);
x2("あい|いう", "あい", 0, 4);
x2("あい|いう", "いう", 0, 4);
x2("を(?:かき|きく)", "をかき", 0, 6);
x2("を(?:かき|きく)け", "をきくけ", 0, 8);
x2("あい|(?:あう|あを)", "あを", 0, 4);
x2("あ|い|う", "えう", 2, 4);
x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6);
n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
x2("あ|^わ", "ぶあ", 2, 4);
x2("あ|^を", "をあ", 0, 2);
x2("鬼|\\G車", "け車鬼", 4, 6);
x2("鬼|\\G車", "車鬼", 0, 2);
x2("鬼|\\A車", "b車鬼", 3, 5);
x2("鬼|\\A車", "", 0, 2);
x2("鬼|車\\Z", "車鬼", 2, 4);
x2("鬼|車\\Z", "", 0, 2);
x2("鬼|車\\Z", "\n", 0, 2);
x2("鬼|車\\z", "車鬼", 2, 4);
x2("鬼|車\\z", "", 0, 2);
x2("\\w|\\s", "", 0, 2);
x2("\\w|%", "%お", 0, 1);
x2("\\w|[&$]", "う&", 0, 2);
x2("[い-け]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("[^あ]", "\n", 0, 1);
x2("(?:あ|[う-き])|いを", "うを", 0, 2);
x2("(?:あ|[う-き])|いを", "いを", 0, 4);
x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6);
x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6);
x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6);
x2("(?<=あ|いう)い", "いうい", 4, 6);
n("(?>あ|あいえ)う", "あいえう");
x2("(?>あいえ|あ)う", "あいえう", 0, 8);
x2("あ?|い", "", 0, 2);
x2("あ?|い", "", 0, 0);
x2("あ?|い", "", 0, 0);
x2("あ*|い", "ああ", 0, 4);
x2("あ*|い*", "いあ", 0, 0);
x2("あ*|い*", "あい", 0, 2);
x2("[aあ]*|い*", "aあいいい", 0, 3);
x2("あ+|い*", "", 0, 0);
x2("あ+|い*", "いいい", 0, 6);
x2("あ+|い*", "あいいい", 0, 2);
x2("あ+|い*", "aあいいい", 0, 0);
n("あ+|い+", "");
x2("(あ|い)?", "", 0, 2);
x2("(あ|い)*", "いあ", 0, 4);
x2("(あ|い)+", "いあい", 0, 6);
x2("(あい|うあ)+", "うああいうえ", 0, 8);
x2("(あい|うえ)+", "うああいうえ", 4, 12);
x2("(あい|うあ)+", "ああいうあ", 2, 10);
x2("(あい|うあ)+", "あいをうあ", 0, 4);
x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10);
x2("(あ|いあい)+", "あいあいあ", 0, 10);
x2("(あ|いあい)+", "いあ", 2, 4);
x2("(あ|いあい)+", "いあああいあ", 2, 8);
x2("(?:あ|い)(?:あ|い)", "あい", 0, 4);
x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6);
x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12);
x2("(?:あ+|い+){2}", "あああいいい", 0, 12);
x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12);
x2("(?:あ+|\\Aい*)うう", "うう", 0, 4);
n("(?:あ+|\\Aい*)うう", "あいうう");
x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16);
x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14);
x2("う{0,}", "うううう", 0, 8);
x2("あ|(?i)c", "C", 0, 1);
x2("(?i)c|あ", "C", 0, 1);
x2("(?i:あ)|a", "a", 0, 1);
n("(?i:あ)|a", "A");
x2("[あいう]?", "あいう", 0, 2);
x2("[あいう]*", "あいう", 0, 6);
x2("[^あいう]*", "あいう", 0, 0);
n("[^あいう]+", "あいう");
x2("あ?\?", "あああ", 0, 0);
x2("いあ?\?い", "いあい", 0, 6);
x2("あ*?", "あああ", 0, 0);
x2("いあ*?", "いああ", 0, 2);
x2("いあ*?い", "いああい", 0, 8);
x2("あ+?", "あああ", 0, 2);
x2("いあ+?", "いああ", 0, 4);
x2("いあ+?い", "いああい", 0, 8);
x2("(?:天?)?\?", "", 0, 0);
x2("(?:天?\?)?", "", 0, 0);
x2("(?:夢?)+?", "夢夢夢", 0, 2);
x2("(?:風+)?\?", "風風風", 0, 0);
x2("(?:雪+)?\?霜", "雪雪雪霜", 0, 8);
x2("(?:あい)?{2}", "", 0, 0);
x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8);
x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16);
n("(?:鬼車){3,}", "鬼車鬼車");
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12);
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16);
x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
x2("(?:鬼車){,}", "鬼車{,}", 0, 7);
x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12);
x3("(火)", "", 0, 2, 1);
x3("(火水)", "火水", 0, 4, 1);
x2("((時間))", "時間", 0, 4);
x3("((風水))", "風水", 0, 4, 1);
x3("((昨日))", "昨日", 0, 4, 2);
x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20);
x3("(あい)(うえ)", "あいうえ", 0, 4, 1);
x3("(あい)(うえ)", "あいうえ", 4, 8, 2);
x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3);
x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4);
x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2);
x2("(^あ)", "", 0, 2);
x3("(あ)|(あ)", "いあ", 2, 4, 1);
x3("(^あ)|(あ)", "いあ", 2, 4, 2);
x3("(あ?)", "あああ", 0, 2, 1);
x3("(ま*)", "ままま", 0, 6, 1);
x3("(と*)", "", 0, 0, 1);
x3("(る+)", "るるるるるるる", 0, 14, 1);
x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1);
x3("(あ+|い?)", "いいいああ", 0, 2, 1);
x3("(あいう)?", "あいう", 0, 6, 1);
x3("(あいう)*", "あいう", 0, 6, 1);
x3("(あいう)+", "あいう", 0, 6, 1);
x3("(さしす|あいう)+", "あいう", 0, 6, 1);
x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1);
x3("((?i:あいう))", "あいう", 0, 6, 1);
x3("((?m:あ.う))", "\n", 0, 5, 1);
x3("((?=あん)あ)", "あんい", 0, 2, 1);
x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1);
x3("あ*(.)", "ああああん", 8, 10, 1);
x3("あ*?(.)", "ああああん", 0, 2, 1);
x3("あ*?(ん)", "ああああん", 8, 10, 1);
x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1);
x3("(\\Aいい)うう", "いいうう", 0, 4, 1);
n("(\\Aいい)うう", "んいいうう");
x3("(^いい)うう", "いいうう", 0, 4, 1);
n("(^いい)うう", "んいいうう");
x3("ろろ(るる$)", "ろろるる", 4, 8, 1);
n("ろろ(るる$)", "ろろるるる");
x2("(無)\\1", "無無", 0, 4);
n("(無)\\1", "無武");
x2("(空?)\\1", "空空", 0, 4);
x2("(空?\?)\\1", "空空", 0, 0);
x2("(空*)\\1", "空空空空空", 0, 8);
x3("(空*)\\1", "空空空空空", 0, 4, 1);
x2("あ(い*)\\1", "あいいいい", 0, 10);
x2("あ(い*)\\1", "あい", 0, 2);
x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20);
x2("(あ*)(い*)\\2", "あああいいいい", 0, 14);
x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2);
x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16);
x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7);
x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12);
x2("([き-け])\\1", "くく", 0, 4);
x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8);
n("(\\w\\d\\s)\\1", "あ5 あ5");
x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8);
x2("...(誰?|[あ-う]{3})\\1", "あaあ誰", 0, 13);
x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12);
x2("(^こ)\\1", "ここ", 0, 4);
n("(^む)\\1", "めむむ");
n("(あ$)\\1", "ああ");
n("(あい\\Z)\\1", "あい");
x2("(あ*\\Z)\\1", "", 2, 2);
x2(".(あ*\\Z)\\1", "いあ", 2, 4);
x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1);
x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1);
x2("((?i:あvず))\\1", "あvずあvず", 0, 10);
x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14);
x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26);
x2("[[ひふ]]", "", 0, 2);
x2("[[いおう]か]", "", 0, 2);
n("[[^あ]]", "");
n("[^[あ]]", "");
x2("[^[^あ]]", "", 0, 2);
x2("[[かきく]&&きく]", "", 0, 2);
n("[[かきく]&&きく]", "");
n("[[かきく]&&きく]", "");
x2("[あ-ん&&い-を&&う-ゑ]", "", 0, 2);
n("[^あ-ん&&い-を&&う-ゑ]", "");
x2("[[^あ&&あ]&&あ-ん]", "", 0, 2);
n("[[^あ&&あ]&&あ-ん]", "");
x2("[[^あ-ん&&いうえお]&&[^う-か]]", "", 0, 2);
n("[[^あ-ん&&いうえお]&&[^う-か]]", "");
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
n("[^[^あいう]&&[^うえお]]", "");
x2("[あ-&&-あ]", "-", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "", 0, 2);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 32);
x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 32);
fprintf(stdout,
"\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",
nsucc, nfail, nerror, onig_version());
#ifndef POSIX_TEST
onig_region_free(region, 1);
onig_end();
#endif
return ((nfail == 0 && nerror == 0) ? 0 : -1);
}

911
oniguruma/testu.c Normal file
View file

@ -0,0 +1,911 @@
/*
* This program was generated by testconv.rb.
*/
#include<stdio.h>
#ifdef POSIX_TEST
#include "onigposix.h"
#else
#include "oniguruma.h"
#endif
static int nsucc = 0;
static int nfail = 0;
static int nerror = 0;
static FILE* err_file;
#ifndef POSIX_TEST
static OnigRegion* region;
static OnigEncoding ENC;
#endif
#define ulen(p) onigenc_str_bytelen_null(ENC, (UChar* )p)
static void uconv(char* from, char* to, int len)
{
int i;
unsigned char c;
char *q;
q = to;
for (i = 0; i < len; i += 2) {
c = (unsigned char )from[i];
if (c == 0) {
c = (unsigned char )from[i+1];
if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) {
sprintf(q, "\\%03o", c);
q += 4;
}
else {
sprintf(q, "%c", c);
q++;
}
}
else {
sprintf(q, "\\%03o", c);
q += 4;
c = (unsigned char )from[i+1];
sprintf(q, "\\%03o", c);
q += 4;
}
}
*q = 0;
}
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
{
int r;
char cpat[4000], cstr[4000];
#ifdef POSIX_TEST
regex_t reg;
char buf[200];
regmatch_t pmatch[20];
uconv(pattern, cpat, ulen(pattern));
uconv(str, cstr, ulen(str));
r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
if (r == REG_NOMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", cpat, cstr);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\n", cpat, cstr);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\n", cpat, cstr);
nfail++;
}
else {
if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
fprintf(stdout, "OK: /%s/ '%s'\n", cpat, cstr);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", cpat, cstr,
from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
nfail++;
}
}
}
regfree(&reg);
#else
regex_t* reg;
OnigCompileInfo ci;
OnigErrorInfo einfo;
uconv(pattern, cpat, ulen(pattern));
uconv(str, cstr, ulen(str));
#if 0
r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + ulen(pattern)),
ONIG_OPTION_DEFAULT, ENC, ONIG_SYNTAX_DEFAULT, &einfo);
#else
ci.num_of_elements = 5;
ci.pattern_enc = ENC;
ci.target_enc = ENC;
ci.syntax = ONIG_SYNTAX_DEFAULT;
ci.option = ONIG_OPTION_DEFAULT;
ci.case_fold_flag = ONIGENC_CASE_FOLD_DEFAULT;
r = onig_new_deluxe(&reg, (UChar* )pattern,
(UChar* )(pattern + ulen(pattern)),
&ci, &einfo);
#endif
if (r) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
r = onig_search(reg, (UChar* )str, (UChar* )(str + ulen(str)),
(UChar* )str, (UChar* )(str + ulen(str)),
region, ONIG_OPTION_NONE);
if (r < ONIG_MISMATCH) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
if (r == ONIG_MISMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", cpat, cstr);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\n", cpat, cstr);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\n", cpat, cstr);
nfail++;
}
else {
if (region->beg[mem] == from && region->end[mem] == to) {
fprintf(stdout, "OK: /%s/ '%s'\n", cpat, cstr);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", cpat, cstr,
from, to, region->beg[mem], region->end[mem]);
nfail++;
}
}
}
onig_free(reg);
#endif
}
static void x2(char* pattern, char* str, int from, int to)
{
xx(pattern, str, from, to, 0, 0);
}
static void x3(char* pattern, char* str, int from, int to, int mem)
{
xx(pattern, str, from, to, mem, 0);
}
static void n(char* pattern, char* str)
{
xx(pattern, str, 0, 0, 0, 1);
}
extern int main(int argc, char* argv[])
{
err_file = stdout;
#ifndef POSIX_TEST
region = onig_region_new();
#endif
#ifdef POSIX_TEST
reg_set_encoding(REG_POSIX_ENCODING_UTF16_BE);
#else
ENC = ONIG_ENCODING_UTF16_BE;
#endif
x2("\000\000", "\000\000", 0, 0);
x2("\000^\000\000", "\000\000", 0, 0);
x2("\000$\000\000", "\000\000", 0, 0);
x2("\000\134\000G\000\000", "\000\000", 0, 0);
x2("\000\134\000A\000\000", "\000\000", 0, 0);
x2("\000\134\000Z\000\000", "\000\000", 0, 0);
x2("\000\134\000z\000\000", "\000\000", 0, 0);
x2("\000^\000$\000\000", "\000\000", 0, 0);
x2("\000\134\000c\000a\000\000", "\000\001\000\000", 0, 2);
x2("\000\134\000C\000-\000b\000\000", "\000\002\000\000", 0, 2);
x2("\000\134\000c\000\134\000\134\000\000", "\000\034\000\000", 0, 2);
x2("\000q\000[\000\134\000c\000\134\000\134\000]\000\000", "\000q\000\034\000\000", 0, 4);
x2("\000\000", "\000a\000\000", 0, 0);
x2("\000a\000\000", "\000a\000\000", 0, 2);
x2("\000\134\000x\0000\0000\000\134\000x\0006\0001\000\000", "\000a\000\000", 0, 2);
x2("\000a\000a\000\000", "\000a\000a\000\000", 0, 4);
x2("\000a\000a\000a\000\000", "\000a\000a\000a\000\000", 0, 6);
x2("\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", "\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 70);
x2("\000a\000b\000\000", "\000a\000b\000\000", 0, 4);
x2("\000b\000\000", "\000a\000b\000\000", 2, 4);
x2("\000b\000c\000\000", "\000a\000b\000c\000\000", 2, 6);
x2("\000(\000?\000i\000:\000#\000R\000E\000T\000#\000)\000\000", "\000#\000I\000N\000S\000#\000#\000R\000E\000T\000#\000\000", 10, 20);
x2("\000\134\0000\0000\0000\000\134\0001\0007\000\000", "\000\017\000\000", 0, 2);
x2("\000\134\000x\0000\0000\000\134\000x\0001\000f\000\000", "\000\037\000\000", 0, 2);
x2("\000a\000(\000?\000#\000.\000.\000.\000.\000\134\000\134\000J\000J\000J\000J\000)\000b\000\000", "\000a\000b\000\000", 0, 4);
x2("\000(\000?\000x\000)\000 \000 \000G\000 \000(\000o\000 \000O\000(\000?\000-\000x\000)\000o\000O\000)\000 \000g\000 \000L\000\000", "\000G\000o\000O\000o\000O\000g\000L\000e\000\000", 0, 14);
x2("\000.\000\000", "\000a\000\000", 0, 2);
n("\000.\000\000", "\000\000");
x2("\000.\000.\000\000", "\000a\000b\000\000", 0, 4);
x2("\000\134\000w\000\000", "\000e\000\000", 0, 2);
n("\000\134\000W\000\000", "\000e\000\000");
x2("\000\134\000s\000\000", "\000 \000\000", 0, 2);
x2("\000\134\000S\000\000", "\000b\000\000", 0, 2);
x2("\000\134\000d\000\000", "\0004\000\000", 0, 2);
n("\000\134\000D\000\000", "\0004\000\000");
x2("\000\134\000b\000\000", "\000z\000 \000\000", 0, 0);
x2("\000\134\000b\000\000", "\000 \000z\000\000", 2, 2);
x2("\000\134\000B\000\000", "\000z\000z\000 \000\000", 2, 2);
x2("\000\134\000B\000\000", "\000z\000 \000\000", 4, 4);
x2("\000\134\000B\000\000", "\000 \000z\000\000", 0, 0);
x2("\000[\000a\000b\000]\000\000", "\000b\000\000", 0, 2);
n("\000[\000a\000b\000]\000\000", "\000c\000\000");
x2("\000[\000a\000-\000z\000]\000\000", "\000t\000\000", 0, 2);
n("\000[\000^\000a\000]\000\000", "\000a\000\000");
x2("\000[\000^\000a\000]\000\000", "\000\012\000\000", 0, 2);
x2("\000[\000]\000]\000\000", "\000]\000\000", 0, 2);
n("\000[\000^\000]\000]\000\000", "\000]\000\000");
x2("\000[\000\134\000^\000]\000+\000\000", "\0000\000^\000^\0001\000\000", 2, 6);
x2("\000[\000b\000-\000]\000\000", "\000b\000\000", 0, 2);
x2("\000[\000b\000-\000]\000\000", "\000-\000\000", 0, 2);
x2("\000[\000\134\000w\000]\000\000", "\000z\000\000", 0, 2);
n("\000[\000\134\000w\000]\000\000", "\000 \000\000");
x2("\000[\000\134\000W\000]\000\000", "\000b\000$\000\000", 2, 4);
x2("\000[\000\134\000d\000]\000\000", "\0005\000\000", 0, 2);
n("\000[\000\134\000d\000]\000\000", "\000e\000\000");
x2("\000[\000\134\000D\000]\000\000", "\000t\000\000", 0, 2);
n("\000[\000\134\000D\000]\000\000", "\0003\000\000");
x2("\000[\000\134\000s\000]\000\000", "\000 \000\000", 0, 2);
n("\000[\000\134\000s\000]\000\000", "\000a\000\000");
x2("\000[\000\134\000S\000]\000\000", "\000b\000\000", 0, 2);
n("\000[\000\134\000S\000]\000\000", "\000 \000\000");
x2("\000[\000\134\000w\000\134\000d\000]\000\000", "\0002\000\000", 0, 2);
n("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000\000");
x2("\000[\000[\000:\000u\000p\000p\000e\000r\000:\000]\000]\000\000", "\000B\000\000", 0, 2);
x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000+\000\000", 0, 2);
x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000G\000H\000I\000K\000K\000-\0009\000+\000*\000\000", 12, 14);
x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000-\000@\000^\000+\000\000", 6, 8);
n("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000A\000\000");
x2("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000:\000\000", 0, 2);
x2("\000[\000\134\0000\0000\0000\000\134\0000\0004\0004\000-\000\134\0000\0000\0000\000\134\0000\0004\0007\000]\000\000", "\000&\000\000", 0, 2);
x2("\000[\000\134\000x\0000\0000\000\134\000x\0005\000a\000-\000\134\000x\0000\0000\000\134\000x\0005\000c\000]\000\000", "\000[\000\000", 0, 2);
x2("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000l\000\000", 0, 2);
n("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000n\000\000");
n("\000^\000[\0000\000-\0009\000A\000-\000F\000]\000+\000 \0000\000+\000 \000U\000N\000D\000E\000F\000 \000\000", "\0007\0005\000F\000 \0000\0000\0000\0000\0000\0000\0000\0000\000 \000S\000E\000C\000T\0001\0004\000A\000 \000n\000o\000t\000y\000p\000e\000 \000(\000)\000 \000 \000 \000 \000E\000x\000t\000e\000r\000n\000a\000l\000 \000 \000 \000 \000|\000 \000_\000r\000b\000_\000a\000p\000p\000l\000y\000\000");
x2("\000[\000\134\000[\000]\000\000", "\000[\000\000", 0, 2);
x2("\000[\000\134\000]\000]\000\000", "\000]\000\000", 0, 2);
x2("\000[\000&\000]\000\000", "\000&\000\000", 0, 2);
x2("\000[\000[\000a\000b\000]\000]\000\000", "\000b\000\000", 0, 2);
x2("\000[\000[\000a\000b\000]\000c\000]\000\000", "\000c\000\000", 0, 2);
n("\000[\000[\000^\000a\000]\000]\000\000", "\000a\000\000");
n("\000[\000^\000[\000a\000]\000]\000\000", "\000a\000\000");
x2("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000b\000\000", 0, 2);
n("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000a\000\000");
n("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000c\000\000");
x2("\000[\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000", 0, 2);
n("\000[\000^\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000");
x2("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000b\000\000", 0, 2);
n("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000a\000\000");
x2("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000h\000\000", 0, 2);
n("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000c\000\000");
x2("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000c\000\000", 0, 2);
x2("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000e\000\000", 0, 2);
n("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000f\000\000");
x2("\000[\000a\000-\000&\000&\000-\000a\000]\000\000", "\000-\000\000", 0, 2);
n("\000[\000a\000\134\000-\000&\000&\000\134\000-\000a\000]\000\000", "\000&\000\000");
n("\000\134\000w\000a\000b\000c\000\000", "\000 \000a\000b\000c\000\000");
x2("\000a\000\134\000W\000b\000c\000\000", "\000a\000 \000b\000c\000\000", 0, 8);
x2("\000a\000.\000b\000.\000c\000\000", "\000a\000a\000b\000b\000c\000\000", 0, 10);
x2("\000.\000\134\000w\000b\000\134\000W\000.\000.\000c\000\000", "\000a\000b\000b\000 \000b\000c\000c\000\000", 0, 14);
x2("\000\134\000s\000\134\000w\000z\000z\000z\000\000", "\000 \000z\000z\000z\000z\000\000", 0, 10);
x2("\000a\000a\000.\000b\000\000", "\000a\000a\000b\000b\000\000", 0, 8);
n("\000.\000a\000\000", "\000a\000b\000\000");
x2("\000.\000a\000\000", "\000a\000a\000\000", 0, 4);
x2("\000^\000a\000\000", "\000a\000\000", 0, 2);
x2("\000^\000a\000$\000\000", "\000a\000\000", 0, 2);
x2("\000^\000\134\000w\000$\000\000", "\000a\000\000", 0, 2);
n("\000^\000\134\000w\000$\000\000", "\000 \000\000");
x2("\000^\000\134\000w\000a\000b\000$\000\000", "\000z\000a\000b\000\000", 0, 6);
x2("\000^\000\134\000w\000a\000b\000c\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14);
x2("\000^\000\134\000w\000.\000.\000.\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14);
x2("\000\134\000w\000\134\000w\000\134\000s\000\134\000W\000a\000a\000a\000\134\000d\000\000", "\000a\000a\000 \000 \000a\000a\000a\0004\000\000", 0, 16);
x2("\000\134\000A\000\134\000Z\000\000", "\000\000", 0, 0);
x2("\000\134\000A\000x\000y\000z\000\000", "\000x\000y\000z\000\000", 0, 6);
x2("\000x\000y\000z\000\134\000Z\000\000", "\000x\000y\000z\000\000", 0, 6);
x2("\000x\000y\000z\000\134\000z\000\000", "\000x\000y\000z\000\000", 0, 6);
x2("\000a\000\134\000Z\000\000", "\000a\000\000", 0, 2);
x2("\000\134\000G\000a\000z\000\000", "\000a\000z\000\000", 0, 4);
n("\000\134\000G\000z\000\000", "\000b\000z\000a\000\000");
n("\000a\000z\000\134\000G\000\000", "\000a\000z\000\000");
n("\000a\000z\000\134\000A\000\000", "\000a\000z\000\000");
n("\000a\000\134\000A\000z\000\000", "\000a\000z\000\000");
x2("\000\134\000^\000\134\000$\000\000", "\000^\000$\000\000", 0, 4);
x2("\000^\000x\000?\000y\000\000", "\000x\000y\000\000", 0, 4);
x2("\000^\000(\000x\000?\000y\000)\000\000", "\000x\000y\000\000", 0, 4);
x2("\000\134\000w\000\000", "\000_\000\000", 0, 2);
n("\000\134\000W\000\000", "\000_\000\000");
x2("\000(\000?\000=\000z\000)\000z\000\000", "\000z\000\000", 0, 2);
n("\000(\000?\000=\000z\000)\000.\000\000", "\000a\000\000");
x2("\000(\000?\000!\000z\000)\000a\000\000", "\000a\000\000", 0, 2);
n("\000(\000?\000!\000z\000)\000a\000\000", "\000z\000\000");
x2("\000(\000?\000i\000:\000a\000)\000\000", "\000a\000\000", 0, 2);
x2("\000(\000?\000i\000:\000a\000)\000\000", "\000A\000\000", 0, 2);
x2("\000(\000?\000i\000:\000A\000)\000\000", "\000a\000\000", 0, 2);
n("\000(\000?\000i\000:\000A\000)\000\000", "\000b\000\000");
x2("\000(\000?\000i\000:\000[\000A\000-\000Z\000]\000)\000\000", "\000a\000\000", 0, 2);
x2("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000H\000\000", 0, 2);
x2("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000h\000\000", 0, 2);
n("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000e\000\000");
x2("\000(\000?\000i\000:\000[\000A\000-\000c\000]\000)\000\000", "\000D\000\000", 0, 2);
n("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000A\000\000");
n("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000a\000\000");
x2("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\000Z\000\000", 0, 2);
x2("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\0007\000\000", 0, 2);
x2("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000b\000\000", 0, 2);
x2("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000{\000\000", 0, 2);
x2("\000(\000?\000i\000:\000\134\000?\000a\000)\000\000", "\000?\000A\000\000", 0, 4);
x2("\000(\000?\000i\000:\000\134\000*\000A\000)\000\000", "\000*\000a\000\000", 0, 4);
n("\000.\000\000", "\000\012\000\000");
x2("\000(\000?\000m\000:\000.\000)\000\000", "\000\012\000\000", 0, 2);
x2("\000(\000?\000m\000:\000a\000.\000)\000\000", "\000a\000\012\000\000", 0, 4);
x2("\000(\000?\000m\000:\000.\000b\000)\000\000", "\000a\000\012\000b\000\000", 2, 6);
x2("\000.\000*\000a\000b\000c\000\000", "\000d\000d\000d\000a\000b\000d\000d\000\012\000d\000d\000a\000b\000c\000\000", 16, 26);
x2("\000(\000?\000m\000:\000.\000*\000a\000b\000c\000)\000\000", "\000d\000d\000d\000a\000b\000d\000d\000a\000b\000c\000\000", 0, 20);
n("\000(\000?\000i\000)\000(\000?\000-\000i\000)\000a\000\000", "\000A\000\000");
n("\000(\000?\000i\000)\000(\000?\000-\000i\000:\000a\000)\000\000", "\000A\000\000");
x2("\000a\000?\000\000", "\000\000", 0, 0);
x2("\000a\000?\000\000", "\000b\000\000", 0, 0);
x2("\000a\000?\000\000", "\000a\000\000", 0, 2);
x2("\000a\000*\000\000", "\000\000", 0, 0);
x2("\000a\000*\000\000", "\000a\000\000", 0, 2);
x2("\000a\000*\000\000", "\000a\000a\000a\000\000", 0, 6);
x2("\000a\000*\000\000", "\000b\000a\000a\000a\000a\000\000", 0, 0);
n("\000a\000+\000\000", "\000\000");
x2("\000a\000+\000\000", "\000a\000\000", 0, 2);
x2("\000a\000+\000\000", "\000a\000a\000a\000a\000\000", 0, 8);
x2("\000a\000+\000\000", "\000a\000a\000b\000b\000b\000\000", 0, 4);
x2("\000a\000+\000\000", "\000b\000a\000a\000a\000a\000\000", 2, 10);
x2("\000.\000?\000\000", "\000\000", 0, 0);
x2("\000.\000?\000\000", "\000f\000\000", 0, 2);
x2("\000.\000?\000\000", "\000\012\000\000", 0, 0);
x2("\000.\000*\000\000", "\000\000", 0, 0);
x2("\000.\000*\000\000", "\000a\000b\000c\000d\000e\000\000", 0, 10);
x2("\000.\000+\000\000", "\000z\000\000", 0, 2);
x2("\000.\000+\000\000", "\000z\000d\000s\000w\000e\000r\000\012\000\000", 0, 12);
x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000b\000f\000b\000a\000c\000\000", 0, 8);
x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14);
x2("\000(\000(\000.\000*\000)\000a\000\134\0002\000f\000)\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14);
x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000z\000z\000z\000z\000z\000z\000\012\000b\000a\000z\000z\000\012\000z\000z\000z\000z\000b\000a\000b\000f\000\000", 38, 46);
x2("\000a\000|\000b\000\000", "\000a\000\000", 0, 2);
x2("\000a\000|\000b\000\000", "\000b\000\000", 0, 2);
x2("\000|\000a\000\000", "\000a\000\000", 0, 0);
x2("\000(\000|\000a\000)\000\000", "\000a\000\000", 0, 0);
x2("\000a\000b\000|\000b\000c\000\000", "\000a\000b\000\000", 0, 4);
x2("\000a\000b\000|\000b\000c\000\000", "\000b\000c\000\000", 0, 4);
x2("\000z\000(\000?\000:\000a\000b\000|\000b\000c\000)\000\000", "\000z\000b\000c\000\000", 0, 6);
x2("\000a\000(\000?\000:\000a\000b\000|\000b\000c\000)\000c\000\000", "\000a\000a\000b\000c\000\000", 0, 8);
x2("\000a\000b\000|\000(\000?\000:\000a\000c\000|\000a\000z\000)\000\000", "\000a\000z\000\000", 0, 4);
x2("\000a\000|\000b\000|\000c\000\000", "\000d\000c\000\000", 2, 4);
x2("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000p\000q\000r\000\000", 0, 4);
n("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000m\000n\000\000");
x2("\000a\000|\000^\000z\000\000", "\000b\000a\000\000", 2, 4);
x2("\000a\000|\000^\000z\000\000", "\000z\000a\000\000", 0, 2);
x2("\000a\000|\000\134\000G\000z\000\000", "\000b\000z\000a\000\000", 4, 6);
x2("\000a\000|\000\134\000G\000z\000\000", "\000z\000a\000\000", 0, 2);
x2("\000a\000|\000\134\000A\000z\000\000", "\000b\000z\000a\000\000", 4, 6);
x2("\000a\000|\000\134\000A\000z\000\000", "\000z\000a\000\000", 0, 2);
x2("\000a\000|\000b\000\134\000Z\000\000", "\000b\000a\000\000", 2, 4);
x2("\000a\000|\000b\000\134\000Z\000\000", "\000b\000\000", 0, 2);
x2("\000a\000|\000b\000\134\000z\000\000", "\000b\000a\000\000", 2, 4);
x2("\000a\000|\000b\000\134\000z\000\000", "\000b\000\000", 0, 2);
x2("\000\134\000w\000|\000\134\000s\000\000", "\000 \000\000", 0, 2);
n("\000\134\000w\000|\000\134\000w\000\000", "\000 \000\000");
x2("\000\134\000w\000|\000%\000\000", "\000%\000\000", 0, 2);
x2("\000\134\000w\000|\000[\000&\000$\000]\000\000", "\000&\000\000", 0, 2);
x2("\000[\000b\000-\000d\000]\000|\000[\000^\000e\000-\000z\000]\000\000", "\000a\000\000", 0, 2);
x2("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000d\000z\000\000", 0, 2);
x2("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000b\000z\000\000", 0, 4);
x2("\000a\000b\000c\000|\000(\000?\000=\000z\000z\000)\000.\000.\000f\000\000", "\000z\000z\000f\000\000", 0, 6);
x2("\000a\000b\000c\000|\000(\000?\000!\000z\000z\000)\000.\000.\000f\000\000", "\000a\000b\000f\000\000", 0, 6);
x2("\000(\000?\000=\000z\000a\000)\000.\000.\000a\000|\000(\000?\000=\000z\000z\000)\000.\000.\000a\000\000", "\000z\000z\000a\000\000", 0, 6);
n("\000(\000?\000>\000a\000|\000a\000b\000d\000)\000c\000\000", "\000a\000b\000d\000c\000\000");
x2("\000(\000?\000>\000a\000b\000d\000|\000a\000)\000c\000\000", "\000a\000b\000d\000c\000\000", 0, 8);
x2("\000a\000?\000|\000b\000\000", "\000a\000\000", 0, 2);
x2("\000a\000?\000|\000b\000\000", "\000b\000\000", 0, 0);
x2("\000a\000?\000|\000b\000\000", "\000\000", 0, 0);
x2("\000a\000*\000|\000b\000\000", "\000a\000a\000\000", 0, 4);
x2("\000a\000*\000|\000b\000*\000\000", "\000b\000a\000\000", 0, 0);
x2("\000a\000*\000|\000b\000*\000\000", "\000a\000b\000\000", 0, 2);
x2("\000a\000+\000|\000b\000*\000\000", "\000\000", 0, 0);
x2("\000a\000+\000|\000b\000*\000\000", "\000b\000b\000b\000\000", 0, 6);
x2("\000a\000+\000|\000b\000*\000\000", "\000a\000b\000b\000b\000\000", 0, 2);
n("\000a\000+\000|\000b\000+\000\000", "\000\000");
x2("\000(\000a\000|\000b\000)\000?\000\000", "\000b\000\000", 0, 2);
x2("\000(\000a\000|\000b\000)\000*\000\000", "\000b\000a\000\000", 0, 4);
x2("\000(\000a\000|\000b\000)\000+\000\000", "\000b\000a\000b\000\000", 0, 6);
x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000c\000a\000a\000b\000b\000c\000\000", 0, 8);
x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000a\000b\000c\000a\000\000", 2, 10);
x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000b\000z\000c\000a\000\000", 0, 4);
x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 10);
x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000\000", 2, 4);
x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000a\000a\000b\000a\000\000", 2, 8);
x2("\000(\000?\000:\000a\000|\000b\000)\000(\000?\000:\000a\000|\000b\000)\000\000", "\000a\000b\000\000", 0, 4);
x2("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000*\000|\000b\000*\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 6);
x2("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000+\000|\000b\000+\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12);
x2("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12);
x2("\000h\000{\0000\000,\000}\000\000", "\000h\000h\000h\000h\000\000", 0, 8);
x2("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0001\000,\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12);
n("\000a\000x\000{\0002\000}\000*\000a\000\000", "\0000\000a\000x\000x\000x\000a\0001\000\000");
n("\000a\000.\000{\0000\000,\0002\000}\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000");
n("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000");
n("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000X\000a\0000\000\000");
x2("\000^\000a\000{\0002\000,\000}\000?\000a\000$\000\000", "\000a\000a\000a\000\000", 0, 6);
x2("\000^\000[\000a\000-\000z\000]\000{\0002\000,\000}\000?\000$\000\000", "\000a\000a\000a\000\000", 0, 6);
x2("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000c\000c\000\000", 0, 4);
n("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000a\000b\000c\000c\000\000");
x2("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000a\000b\000c\000\000", 12, 16);
x2("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000b\000c\000\000", 0, 14);
x2("\000a\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2);
x2("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000C\000\000", 0, 2);
x2("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000A\000\000", 0, 2);
x2("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000C\000\000", 0, 2);
n("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000A\000\000");
x2("\000[\000a\000b\000c\000]\000?\000\000", "\000a\000b\000c\000\000", 0, 2);
x2("\000[\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 6);
x2("\000[\000^\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 0);
n("\000[\000^\000a\000b\000c\000]\000+\000\000", "\000a\000b\000c\000\000");
x2("\000a\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0);
x2("\000b\000a\000?\000?\000b\000\000", "\000b\000a\000b\000\000", 0, 6);
x2("\000a\000*\000?\000\000", "\000a\000a\000a\000\000", 0, 0);
x2("\000b\000a\000*\000?\000\000", "\000b\000a\000a\000\000", 0, 2);
x2("\000b\000a\000*\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8);
x2("\000a\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2);
x2("\000b\000a\000+\000?\000\000", "\000b\000a\000a\000\000", 0, 4);
x2("\000b\000a\000+\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8);
x2("\000(\000?\000:\000a\000?\000)\000?\000?\000\000", "\000a\000\000", 0, 0);
x2("\000(\000?\000:\000a\000?\000?\000)\000?\000\000", "\000a\000\000", 0, 0);
x2("\000(\000?\000:\000a\000?\000)\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2);
x2("\000(\000?\000:\000a\000+\000)\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0);
x2("\000(\000?\000:\000a\000+\000)\000?\000?\000b\000\000", "\000a\000a\000a\000b\000\000", 0, 8);
x2("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0);
x2("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 8);
x2("\000(\000?\000:\000a\000b\000)\000*\000{\0000\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 0);
x2("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16);
n("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000\000");
x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000\000", 0, 12);
x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16);
x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000?\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 8);
x2("\000(\000?\000:\000a\000b\000)\000{\000,\000}\000\000", "\000a\000b\000{\000,\000}\000\000", 0, 10);
x2("\000(\000?\000:\000a\000b\000c\000)\000+\000?\000{\0002\000}\000\000", "\000a\000b\000c\000a\000b\000c\000a\000b\000c\000\000", 0, 12);
x2("\000(\000?\000:\000X\000*\000)\000(\000?\000i\000:\000x\000a\000)\000\000", "\000X\000X\000X\000a\000\000", 0, 8);
x2("\000(\000d\000+\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8);
x2("\000(\000[\000^\000a\000b\000c\000]\000*\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8);
x2("\000(\000\134\000w\000+\000)\000(\000\134\000w\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8);
x3("\000(\000a\000)\000\000", "\000a\000\000", 0, 2, 1);
x3("\000(\000a\000b\000)\000\000", "\000a\000b\000\000", 0, 4, 1);
x2("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4);
x3("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 1);
x3("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 2);
x3("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000a\000b\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 20);
x3("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 0, 4, 1);
x3("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 4, 8, 2);
x3("\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 3);
x3("\000(\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000)\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 4);
x2("\000(\000^\000a\000)\000\000", "\000a\000\000", 0, 2);
x3("\000(\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 1);
x3("\000(\000^\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 2);
x3("\000(\000a\000?\000)\000\000", "\000a\000a\000a\000\000", 0, 2, 1);
x3("\000(\000a\000*\000)\000\000", "\000a\000a\000a\000\000", 0, 6, 1);
x3("\000(\000a\000*\000)\000\000", "\000\000", 0, 0, 1);
x3("\000(\000a\000+\000)\000\000", "\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 14, 1);
x3("\000(\000a\000+\000|\000b\000*\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 6, 1);
x3("\000(\000a\000+\000|\000b\000?\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 2, 1);
x3("\000(\000a\000b\000c\000)\000?\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
x3("\000(\000a\000b\000c\000)\000*\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
x3("\000(\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
x3("\000(\000x\000y\000z\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
x3("\000(\000[\000x\000y\000z\000]\000[\000a\000b\000c\000]\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
x3("\000(\000(\000?\000i\000:\000a\000b\000c\000)\000)\000\000", "\000A\000b\000C\000\000", 0, 6, 1);
x2("\000(\000a\000b\000c\000)\000(\000?\000i\000:\000\134\0001\000)\000\000", "\000a\000b\000c\000A\000B\000C\000\000", 0, 12);
x3("\000(\000(\000?\000m\000:\000a\000.\000c\000)\000)\000\000", "\000a\000\012\000c\000\000", 0, 6, 1);
x3("\000(\000(\000?\000=\000a\000z\000)\000a\000)\000\000", "\000a\000z\000b\000\000", 0, 2, 1);
x3("\000a\000b\000c\000|\000(\000.\000a\000b\000d\000)\000\000", "\000z\000a\000b\000d\000\000", 0, 8, 1);
x2("\000(\000?\000:\000a\000b\000c\000)\000|\000(\000A\000B\000C\000)\000\000", "\000a\000b\000c\000\000", 0, 6);
x3("\000(\000?\000i\000:\000(\000a\000b\000c\000)\000)\000|\000(\000z\000z\000z\000)\000\000", "\000A\000B\000C\000\000", 0, 6, 1);
x3("\000a\000*\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 8, 10, 1);
x3("\000a\000*\000?\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 0, 2, 1);
x3("\000a\000*\000?\000(\000c\000)\000\000", "\000a\000a\000a\000a\000c\000\000", 8, 10, 1);
x3("\000[\000b\000c\000d\000]\000a\000*\000(\000.\000)\000\000", "\000c\000a\000a\000a\000a\000z\000\000", 10, 12, 1);
x3("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1);
n("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000");
x3("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1);
n("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000");
x3("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000\000", 4, 8, 1);
n("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000b\000\000");
n("\000(\000\134\0001\000)\000\000", "\000\000");
n("\000\134\0001\000(\000a\000)\000\000", "\000a\000a\000\000");
n("\000(\000a\000(\000b\000)\000\134\0001\000)\000\134\0002\000+\000\000", "\000a\000b\000a\000b\000b\000\000");
n("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000\000");
x2("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000a\000\000", 0, 8);
x2("\000(\000a\000)\000(\000?\000=\000\134\0001\000)\000\000", "\000a\000a\000\000", 0, 2);
n("\000(\000a\000)\000$\000|\000\134\0001\000\000", "\000a\000z\000\000");
x2("\000(\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4);
n("\000(\000a\000)\000\134\0001\000\000", "\000a\000b\000\000");
x2("\000(\000a\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4);
x2("\000(\000a\000?\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 0);
x2("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 8);
x3("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 4, 1);
x2("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000b\000b\000b\000\000", 0, 10);
x2("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000\000", 0, 2);
x2("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0001\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000a\000a\000a\000b\000b\000\000", 0, 20);
x2("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000b\000b\000\000", 0, 14);
x2("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 16);
x3("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 6, 7);
x2("\000(\000a\000)\000(\000b\000)\000(\000c\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "\000a\000b\000c\000b\000a\000c\000\000", 0, 12);
x2("\000(\000[\000a\000-\000d\000]\000)\000\134\0001\000\000", "\000c\000c\000\000", 0, 4);
x2("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000 \000\000", 0, 12);
n("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000\000");
x2("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000w\000h\000o\000w\000h\000o\000\000", 0, 12);
x2("\000.\000.\000.\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000a\000b\000c\000w\000h\000o\000w\000h\000o\000\000", 0, 18);
x2("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000c\000b\000c\000c\000b\000c\000\000", 0, 12);
x2("\000(\000^\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4);
n("\000(\000^\000a\000)\000\134\0001\000\000", "\000b\000a\000a\000\000");
n("\000(\000a\000$\000)\000\134\0001\000\000", "\000a\000a\000\000");
n("\000(\000a\000b\000\134\000Z\000)\000\134\0001\000\000", "\000a\000b\000\000");
x2("\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000a\000\000", 2, 2);
x2("\000.\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000b\000a\000\000", 2, 4);
x3("\000(\000.\000(\000a\000b\000c\000)\000\134\0002\000)\000\000", "\000z\000a\000b\000c\000a\000b\000c\000\000", 0, 14, 1);
x3("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "\000z\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1);
x2("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000A\000z\000\000", 0, 8);
n("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000a\000z\000\000");
x2("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000a\000b\000\000", 2, 4);
n("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000b\000b\000\000");
x2("\000(\000?\000<\000=\000a\000|\000b\000)\000b\000\000", "\000b\000b\000\000", 2, 4);
x2("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000b\000c\000b\000\000", 4, 6);
x2("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000a\000b\000\000", 2, 4);
x2("\000(\000?\000<\000=\000a\000|\000b\000c\000|\000|\000d\000e\000f\000g\000h\000i\000j\000|\000k\000l\000m\000n\000o\000p\000q\000|\000r\000)\000z\000\000", "\000r\000z\000\000", 2, 4);
x2("\000(\000a\000)\000\134\000g\000<\0001\000>\000\000", "\000a\000a\000\000", 0, 4);
x2("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000c\000b\000\000", 2, 4);
n("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000a\000b\000\000");
x2("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000b\000\000", "\000b\000b\000b\000\000", 0, 2);
n("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000z\000\000", "\000b\000c\000z\000\000");
x2("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000a\000)\000\000", "\000a\000\000", 0, 2);
x2("\000(\000?\000<\000n\000a\000m\000e\000_\0002\000>\000a\000b\000)\000\134\000g\000<\000n\000a\000m\000e\000_\0002\000>\000\000", "\000a\000b\000a\000b\000\000", 0, 8);
x2("\000(\000?\000<\000n\000a\000m\000e\000_\0003\000>\000.\000z\000v\000.\000)\000\134\000k\000<\000n\000a\000m\000e\000_\0003\000>\000\000", "\000a\000z\000v\000b\000a\000z\000v\000b\000\000", 0, 16);
x2("\000(\000?\000<\000=\000\134\000g\000<\000a\000b\000>\000)\000|\000-\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000a\000b\000>\000X\000y\000Z\000)\000\000", "\000X\000y\000Z\000\000", 6, 6);
x2("\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000n\000>\000)\000+\000\000", "\000\000", 0, 0);
x2("\000(\000?\000<\000n\000>\000|\000\134\000(\000\134\000g\000<\000n\000>\000\134\000)\000)\000+\000$\000\000", "\000(\000)\000(\000(\000)\000)\000\000", 0, 12);
x3("\000\134\000g\000<\000n\000>\000(\000?\000<\000n\000>\000.\000)\000{\0000\000}\000\000", "\000X\000\000", 0, 2, 1);
x2("\000\134\000g\000<\000n\000>\000(\000a\000b\000c\000|\000d\000f\000(\000?\000<\000n\000>\000.\000Y\000Z\000)\000{\0002\000,\0008\000}\000)\000{\0000\000}\000\000", "\000X\000Y\000Z\000\000", 0, 6);
x2("\000\134\000A\000(\000?\000<\000n\000>\000(\000a\000\134\000g\000<\000n\000>\000)\000|\000)\000\134\000z\000\000", "\000a\000a\000a\000a\000\000", 0, 8);
x2("\000(\000?\000<\000n\000>\000|\000\134\000g\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000a\000|\000(\000b\000)\000\134\000g\000<\000m\000>\000)\000\000", "\000b\000b\000b\000b\000a\000b\000b\000a\000\000", 0, 16);
x2("\000(\000?\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\134\000w\000+\000\134\000s\000x\000)\000a\000+\000\134\000k\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\000", "\000 \000 \000f\000g\000 \000x\000a\000a\000a\000a\000a\000a\000a\000a\000f\000g\000 \000x\000\000", 4, 36);
x3("\000(\000z\000)\000(\000)\000(\000)\000(\000?\000<\000_\0009\000>\000a\000)\000\134\000g\000<\000_\0009\000>\000\000", "\000z\000a\000a\000\000", 4, 6, 1);
x2("\000(\000.\000)\000(\000(\000(\000?\000<\000_\000>\000a\000)\000)\000)\000\134\000k\000<\000_\000>\000\000", "\000z\000a\000a\000\000", 0, 6);
x2("\000(\000(\000?\000<\000n\000a\000m\000e\0001\000>\000\134\000d\000)\000|\000(\000?\000<\000n\000a\000m\000e\0002\000>\000\134\000w\000)\000)\000(\000\134\000k\000<\000n\000a\000m\000e\0001\000>\000|\000\134\000k\000<\000n\000a\000m\000e\0002\000>\000)\000\000", "\000f\000f\000\000", 0, 4);
x2("\000(\000?\000:\000(\000?\000<\000x\000>\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000\000", 0, 0);
x2("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000e\000f\000g\000\000", 6, 18);
n("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000\000");
x2("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000a\000-\000p\000y\000u\000m\000p\000y\000u\000m\000\000", 4, 20);
x3("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000x\000x\000x\000x\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000\000", 8, 36, 14);
x3("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0006\000>\000)\000(\000?\000<\000n\000a\000m\000e\0007\000>\000)\000(\000?\000<\000n\000a\000m\000e\0008\000>\000)\000(\000?\000<\000n\000a\000m\000e\0009\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0000\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0006\000>\000a\000a\000a\000)\000(\000?\000<\000n\000a\000m\000e\0001\0007\000>\000)\000$\000\000", "\000a\000a\000a\000\000", 0, 6, 16);
x2("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000a\000\000", 0, 2);
x2("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000\000", 0, 26);
x3("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000)\000)\000\000", 0, 34, 1);
x2("\000\134\000g\000<\000b\000a\000r\000>\000|\000\134\000z\000E\000N\000D\000(\000?\000<\000b\000a\000r\000>\000.\000*\000a\000b\000c\000$\000)\000\000", "\000a\000b\000c\000x\000x\000x\000a\000b\000c\000\000", 0, 18);
x2("\000\134\000g\000<\0001\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000\000", "\000b\000a\000c\000\000", 0, 6);
x3("\000\134\000g\000<\000_\000A\000>\000\134\000g\000<\000_\000A\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000(\000?\000<\000_\000A\000>\000.\000b\000.\000)\000\000", "\000x\000b\000x\000y\000b\000y\000\000", 6, 12, 1);
x2("\000\134\000A\000(\000?\000:\000\134\000g\000<\000p\000o\000n\000>\000|\000\134\000g\000<\000p\000a\000n\000>\000|\000\134\000z\000E\000N\000D\000 \000 \000(\000?\000<\000p\000a\000n\000>\000a\000|\000c\000\134\000g\000<\000p\000o\000n\000>\000c\000)\000(\000?\000<\000p\000o\000n\000>\000b\000|\000d\000\134\000g\000<\000p\000a\000n\000>\000d\000)\000)\000$\000\000", "\000c\000d\000c\000b\000c\000d\000c\000\000", 0, 14);
x2("\000\134\000A\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000m\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\000", "\000a\000a\000a\000a\000\000", 0, 8);
x2("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000\000", 2, 10);
x2("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000a\000a\000a\000a\000\000", 0, 20);
x2("\000(\000?\000<\000p\000a\000r\000e\000>\000\134\000(\000(\000[\000^\000\134\000(\000\134\000)\000]\000+\000+\000|\000\134\000g\000<\000p\000a\000r\000e\000>\000)\000*\000+\000\134\000)\000)\000\000", "\000(\000(\000a\000)\000)\000\000", 0, 10);
x2("\000(\000)\000*\000\134\0001\000\000", "\000\000", 0, 0);
x2("\000(\000?\000:\000(\000)\000|\000(\000)\000)\000*\000\134\0001\000\134\0002\000\000", "\000\000", 0, 0);
x3("\000(\000?\000:\000\134\0001\000a\000|\000(\000)\000)\000*\000\000", "\000a\000\000", 0, 0, 1);
x2("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000\000", "\0000\000x\0001\000x\0002\000x\0003\000\000", 2, 12);
x2("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000(\000?\000i\000:\000\134\0001\000)\000\134\000Z\000\000", "\0000\000x\0001\000x\0002\000x\0001\000X\0002\000\000", 2, 18);
x2("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000\134\0005\000\000", "\000\000", 0, 0);
x2("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000x\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000b\000\134\0005\000\000", "\000b\000\000", 0, 2);
x2("\217\372\000\000", "\217\372\000\000", 0, 2);
x2("\000\000", "0B\000\000", 0, 0);
x2("0B\000\000", "0B\000\000", 0, 2);
n("0D\000\000", "0B\000\000");
x2("0F0F\000\000", "0F0F\000\000", 0, 4);
x2("0B0D0F\000\000", "0B0D0F\000\000", 0, 6);
x2("0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", "0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", 0, 70);
x2("0B\000\000", "0D0B\000\000", 2, 4);
x2("0D0F\000\000", "0B0D0F\000\000", 2, 6);
x2("e\207\000\000", "e\207\000\000", 0, 2);
x2("\000.\000\000", "0B\000\000", 0, 2);
x2("\000.\000.\000\000", "0K0M\000\000", 0, 4);
x2("\000\134\000w\000\000", "0J\000\000", 0, 2);
n("\000\134\000W\000\000", "0B\000\000");
x2("\000[\000\134\000W\000]\000\000", "0F\000$\000\000", 2, 4);
x2("\000\134\000S\000\000", "0]\000\000", 0, 2);
x2("\000\134\000S\000\000", "o\042\000\000", 0, 2);
x2("\000\134\000b\000\000", "l\027\000 \000\000", 0, 0);
x2("\000\134\000b\000\000", "\000 0{\000\000", 2, 2);
x2("\000\134\000B\000\000", "0[0]\000 \000\000", 2, 2);
x2("\000\134\000B\000\000", "0F\000 \000\000", 4, 4);
x2("\000\134\000B\000\000", "\000 0D\000\000", 0, 0);
x2("\000[0_0a\000]\000\000", "0a\000\000", 0, 2);
n("\000[0j0k\000]\000\000", "0l\000\000");
x2("\000[0F\000-0J\000]\000\000", "0H\000\000", 0, 2);
n("\000[\000^0Q\000]\000\000", "0Q\000\000");
x2("\000[\000\134\000w\000]\000\000", "0m\000\000", 0, 2);
n("\000[\000\134\000d\000]\000\000", "0u\000\000");
x2("\000[\000\134\000D\000]\000\000", "0o\000\000", 0, 2);
n("\000[\000\134\000s\000]\000\000", "0O\000\000");
x2("\000[\000\134\000S\000]\000\000", "0x\000\000", 0, 2);
x2("\000[\000\134\000w\000\134\000d\000]\000\000", "0\210\000\000", 0, 2);
x2("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000 \000 0\210\000\000", 6, 8);
n("\000\134\000w\233<\216\312\000\000", "\000 \233<\216\312\000\000");
x2("\233<\000\134\000W\216\312\000\000", "\233<\000 \216\312\000\000", 0, 6);
x2("0B\000.0D\000.0F\000\000", "0B0B0D0D0F\000\000", 0, 10);
x2("\000.\000\134\000w0F\000\134\000W\000.\000.0^\000\000", "0H0F0F\000 0F0^0^\000\000", 0, 14);
x2("\000\134\000s\000\134\000w0S0S0S\000\000", "\000 0S0S0S0S\000\000", 0, 10);
x2("0B0B\000.0Q\000\000", "0B0B0Q0Q\000\000", 0, 8);
n("\000.0D\000\000", "0D0H\000\000");
x2("\000.0J\000\000", "0J0J\000\000", 0, 4);
x2("\000^0B\000\000", "0B\000\000", 0, 2);
x2("\000^0\200\000$\000\000", "0\200\000\000", 0, 2);
x2("\000^\000\134\000w\000$\000\000", "0k\000\000", 0, 2);
x2("\000^\000\134\000w0K0M0O0Q0S\000$\000\000", "\000z0K0M0O0Q0S\000\000", 0, 12);
x2("\000^\000\134\000w\000.\000.\000.0F0H0J\000$\000\000", "\000z0B0D0F0F0H0J\000\000", 0, 14);
x2("\000\134\000w\000\134\000w\000\134\000s\000\134\000W0J0J0J\000\134\000d\000\000", "\000a0J\000 \000 0J0J0J\0004\000\000", 0, 16);
x2("\000\134\000A0_0a0d\000\000", "0_0a0d\000\000", 0, 6);
x2("0\2000\2010\202\000\134\000Z\000\000", "0\2000\2010\202\000\000", 0, 6);
x2("0K0M0O\000\134\000z\000\000", "0K0M0O\000\000", 0, 6);
x2("0K0M0O\000\134\000Z\000\000", "0K0M0O\000\012\000\000", 0, 6);
x2("\000\134\000G0}0t\000\000", "0}0t\000\000", 0, 4);
n("\000\134\000G0H\000\000", "0F0H0J\000\000");
n("0h0f\000\134\000G\000\000", "0h0f\000\000");
n("0~0\177\000\134\000A\000\000", "0~0\177\000\000");
n("0~\000\134\000A0\177\000\000", "0~0\177\000\000");
x2("\000(\000?\000=0[\000)0[\000\000", "0[\000\000", 0, 2);
n("\000(\000?\000=0F\000)\000.\000\000", "0D\000\000");
x2("\000(\000?\000!0F\000)0K\000\000", "0K\000\000", 0, 2);
n("\000(\000?\000!0h\000)0B\000\000", "0h\000\000");
x2("\000(\000?\000i\000:0B\000)\000\000", "0B\000\000", 0, 2);
x2("\000(\000?\000i\000:0v0y\000)\000\000", "0v0y\000\000", 0, 4);
n("\000(\000?\000i\000:0D\000)\000\000", "0F\000\000");
x2("\000(\000?\000m\000:0\210\000.\000)\000\000", "0\210\000\012\000\000", 0, 4);
x2("\000(\000?\000m\000:\000.0\201\000)\000\000", "0~\000\0120\201\000\000", 2, 6);
x2("0B\000?\000\000", "\000\000", 0, 0);
x2("Y\011\000?\000\000", "S\026\000\000", 0, 0);
x2("Y\011\000?\000\000", "Y\011\000\000", 0, 2);
x2("\221\317\000*\000\000", "\000\000", 0, 0);
x2("\221\317\000*\000\000", "\221\317\000\000", 0, 2);
x2("[P\000*\000\000", "[P[P[P\000\000", 0, 6);
x2("\231\254\000*\000\000", "\236\177\231\254\231\254\231\254\231\254\000\000", 0, 0);
n("\134q\000+\000\000", "\000\000");
x2("l\263\000+\000\000", "l\263\000\000", 0, 2);
x2("fB\000+\000\000", "fBfBfBfB\000\000", 0, 8);
x2("0H\000+\000\000", "0H0H0F0F0F\000\000", 0, 4);
x2("0F\000+\000\000", "0J0F0F0F0F\000\000", 2, 10);
x2("\000.\000?\000\000", "0_\000\000", 0, 2);
x2("\000.\000*\000\000", "0q0t0w0z\000\000", 0, 8);
x2("\000.\000+\000\000", "0\215\000\000", 0, 2);
x2("\000.\000+\000\000", "0D0F0H0K\000\012\000\000", 0, 8);
x2("0B\000|0D\000\000", "0B\000\000", 0, 2);
x2("0B\000|0D\000\000", "0D\000\000", 0, 2);
x2("0B0D\000|0D0F\000\000", "0B0D\000\000", 0, 4);
x2("0B0D\000|0D0F\000\000", "0D0F\000\000", 0, 4);
x2("0\222\000(\000?\000:0K0M\000|0M0O\000)\000\000", "0\2220K0M\000\000", 0, 6);
x2("0\222\000(\000?\000:0K0M\000|0M0O\000)0Q\000\000", "0\2220M0O0Q\000\000", 0, 8);
x2("0B0D\000|\000(\000?\000:0B0F\000|0B0\222\000)\000\000", "0B0\222\000\000", 0, 4);
x2("0B\000|0D\000|0F\000\000", "0H0F\000\000", 2, 4);
x2("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0W0Y0[\000\000", 0, 6);
n("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0Y0[\000\000");
x2("0B\000|\000^0\217\000\000", "0v0B\000\000", 2, 4);
x2("0B\000|\000^0\222\000\000", "0\2220B\000\000", 0, 2);
x2("\233<\000|\000\134\000G\216\312\000\000", "0Q\216\312\233<\000\000", 4, 6);
x2("\233<\000|\000\134\000G\216\312\000\000", "\216\312\233<\000\000", 0, 2);
x2("\233<\000|\000\134\000A\216\312\000\000", "\000b\216\312\233<\000\000", 4, 6);
x2("\233<\000|\000\134\000A\216\312\000\000", "\216\312\000\000", 0, 2);
x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\233<\000\000", 2, 4);
x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\000", 0, 2);
x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\012\000\000", 0, 2);
x2("\233<\000|\216\312\000\134\000z\000\000", "\216\312\233<\000\000", 2, 4);
x2("\233<\000|\216\312\000\134\000z\000\000", "\216\312\000\000", 0, 2);
x2("\000\134\000w\000|\000\134\000s\000\000", "0J\000\000", 0, 2);
x2("\000\134\000w\000|\000%\000\000", "\000%0J\000\000", 0, 2);
x2("\000\134\000w\000|\000[\000&\000$\000]\000\000", "0F\000&\000\000", 0, 2);
x2("\000[0D\000-0Q\000]\000\000", "0F\000\000", 0, 2);
x2("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0B\000\000", 0, 2);
x2("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0K\000\000", 0, 2);
x2("\000[\000^0B\000]\000\000", "\000\012\000\000", 0, 2);
x2("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0F0\222\000\000", 0, 2);
x2("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0D0\222\000\000", 0, 4);
x2("0B0D0F\000|\000(\000?\000=0Q0Q\000)\000.\000.0{\000\000", "0Q0Q0{\000\000", 0, 6);
x2("0B0D0F\000|\000(\000?\000!0Q0Q\000)\000.\000.0{\000\000", "0B0D0{\000\000", 0, 6);
x2("\000(\000?\000=0\2220B\000)\000.\000.0B\000|\000(\000?\000=0\2220\222\000)\000.\000.0B\000\000", "0\2220\2220B\000\000", 0, 6);
x2("\000(\000?\000<\000=0B\000|0D0F\000)0D\000\000", "0D0F0D\000\000", 4, 6);
n("\000(\000?\000>0B\000|0B0D0H\000)0F\000\000", "0B0D0H0F\000\000");
x2("\000(\000?\000>0B0D0H\000|0B\000)0F\000\000", "0B0D0H0F\000\000", 0, 8);
x2("0B\000?\000|0D\000\000", "0B\000\000", 0, 2);
x2("0B\000?\000|0D\000\000", "0D\000\000", 0, 0);
x2("0B\000?\000|0D\000\000", "\000\000", 0, 0);
x2("0B\000*\000|0D\000\000", "0B0B\000\000", 0, 4);
x2("0B\000*\000|0D\000*\000\000", "0D0B\000\000", 0, 0);
x2("0B\000*\000|0D\000*\000\000", "0B0D\000\000", 0, 2);
x2("\000[\000a0B\000]\000*\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 4);
x2("0B\000+\000|0D\000*\000\000", "\000\000", 0, 0);
x2("0B\000+\000|0D\000*\000\000", "0D0D0D\000\000", 0, 6);
x2("0B\000+\000|0D\000*\000\000", "0B0D0D0D\000\000", 0, 2);
x2("0B\000+\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 0);
n("0B\000+\000|0D\000+\000\000", "\000\000");
x2("\000(0B\000|0D\000)\000?\000\000", "0D\000\000", 0, 2);
x2("\000(0B\000|0D\000)\000*\000\000", "0D0B\000\000", 0, 4);
x2("\000(0B\000|0D\000)\000+\000\000", "0D0B0D\000\000", 0, 6);
x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 0, 8);
x2("\000(0B0D\000|0F0H\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 4, 12);
x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0B0D0F0B\000\000", 2, 10);
x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0D0\2220F0B\000\000", 0, 4);
x2("\000(0B0D\000|0F0B\000)\000+\000\000", "\000$\000$\000z\000z\000z\000z0B0D0\2220F0B\000\000", 12, 16);
x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0B0D0B0D0B\000\000", 0, 10);
x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B\000\000", 2, 4);
x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B0B0B0D0B\000\000", 2, 8);
x2("\000(\000?\000:0B\000|0D\000)\000(\000?\000:0B\000|0D\000)\000\000", "0B0D\000\000", 0, 4);
x2("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000*\000|0D\000*\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 6);
x2("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000+\000|0D\000+\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 12);
x2("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12);
x2("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0001\000,\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12);
x2("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0F0F\000\000", 0, 4);
n("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0B0D0F0F\000\000");
x2("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0B0D0F\000\000", 12, 16);
x2("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0D0F\000\000", 0, 14);
x2("0F\000{\0000\000,\000}\000\000", "0F0F0F0F\000\000", 0, 8);
x2("0B\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2);
x2("\000(\000?\000i\000)\000c\000|0B\000\000", "\000C\000\000", 0, 2);
x2("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000a\000\000", 0, 2);
n("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000A\000\000");
x2("\000[0B0D0F\000]\000?\000\000", "0B0D0F\000\000", 0, 2);
x2("\000[0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 6);
x2("\000[\000^0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 0);
n("\000[\000^0B0D0F\000]\000+\000\000", "0B0D0F\000\000");
x2("0B\000?\000?\000\000", "0B0B0B\000\000", 0, 0);
x2("0D0B\000?\000?0D\000\000", "0D0B0D\000\000", 0, 6);
x2("0B\000*\000?\000\000", "0B0B0B\000\000", 0, 0);
x2("0D0B\000*\000?\000\000", "0D0B0B\000\000", 0, 2);
x2("0D0B\000*\000?0D\000\000", "0D0B0B0D\000\000", 0, 8);
x2("0B\000+\000?\000\000", "0B0B0B\000\000", 0, 2);
x2("0D0B\000+\000?\000\000", "0D0B0B\000\000", 0, 4);
x2("0D0B\000+\000?0D\000\000", "0D0B0B0D\000\000", 0, 8);
x2("\000(\000?\000:Y)\000?\000)\000?\000?\000\000", "Y)\000\000", 0, 0);
x2("\000(\000?\000:Y)\000?\000?\000)\000?\000\000", "Y)\000\000", 0, 0);
x2("\000(\000?\000:Y\042\000?\000)\000+\000?\000\000", "Y\042Y\042Y\042\000\000", 0, 2);
x2("\000(\000?\000:\230\250\000+\000)\000?\000?\000\000", "\230\250\230\250\230\250\000\000", 0, 0);
x2("\000(\000?\000:\226\352\000+\000)\000?\000?\227\034\000\000", "\226\352\226\352\226\352\227\034\000\000", 0, 8);
x2("\000(\000?\000:0B0D\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0);
x2("\000(\000?\000:\233<\216\312\000)\000?\000{\0002\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 8);
x2("\000(\000?\000:\233<\216\312\000)\000*\000{\0000\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 0);
x2("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16);
n("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\000\000");
x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 12);
x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16);
x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000?\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 8);
x2("\000(\000?\000:\233<\216\312\000)\000{\000,\000}\000\000", "\233<\216\312\000{\000,\000}\000\000", 0, 10);
x2("\000(\000?\000:0K0M0O\000)\000+\000?\000{\0002\000}\000\000", "0K0M0O0K0M0O0K0M0O\000\000", 0, 12);
x3("\000(pk\000)\000\000", "pk\000\000", 0, 2, 1);
x3("\000(pkl4\000)\000\000", "pkl4\000\000", 0, 4, 1);
x2("\000(\000(fB\225\223\000)\000)\000\000", "fB\225\223\000\000", 0, 4);
x3("\000(\000(\230\250l4\000)\000)\000\000", "\230\250l4\000\000", 0, 4, 1);
x3("\000(\000(f(e\345\000)\000)\000\000", "f(e\345\000\000", 0, 4, 2);
x3("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\221\317[P\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\221\317[P\000\000", 0, 4, 20);
x3("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 0, 4, 1);
x3("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 4, 8, 2);
x3("\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 3);
x3("\000(\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000)\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 4);
x3("\000.\000*\000(0\3250\251\000)0\3630\3730\336\000(0\363\000(\000)0\2670\3450\277\000)0\2440\363\000\000", "0\3250\2510\3630\3730\3360\3630\2670\3450\2770\2440\363\000\000", 10, 18, 2);
x2("\000(\000^0B\000)\000\000", "0B\000\000", 0, 2);
x3("\000(0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 1);
x3("\000(\000^0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 2);
x3("\000(0B\000?\000)\000\000", "0B0B0B\000\000", 0, 2, 1);
x3("\000(0~\000*\000)\000\000", "0~0~0~\000\000", 0, 6, 1);
x3("\000(0h\000*\000)\000\000", "\000\000", 0, 0, 1);
x3("\000(0\213\000+\000)\000\000", "0\2130\2130\2130\2130\2130\2130\213\000\000", 0, 14, 1);
x3("\000(0u\000+\000|0x\000*\000)\000\000", "0u0u0u0x0x\000\000", 0, 6, 1);
x3("\000(0B\000+\000|0D\000?\000)\000\000", "0D0D0D0B0B\000\000", 0, 2, 1);
x3("\000(0B0D0F\000)\000?\000\000", "0B0D0F\000\000", 0, 6, 1);
x3("\000(0B0D0F\000)\000*\000\000", "0B0D0F\000\000", 0, 6, 1);
x3("\000(0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1);
x3("\000(0U0W0Y\000|0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1);
x3("\000(\000[0j0k0l\000]\000[0K0M0O\000]\000|0K0M0O\000)\000+\000\000", "0K0M0O\000\000", 0, 6, 1);
x3("\000(\000(\000?\000i\000:0B0D0F\000)\000)\000\000", "0B0D0F\000\000", 0, 6, 1);
x3("\000(\000(\000?\000m\000:0B\000.0F\000)\000)\000\000", "0B\000\0120F\000\000", 0, 6, 1);
x3("\000(\000(\000?\000=0B0\223\000)0B\000)\000\000", "0B0\2230D\000\000", 0, 2, 1);
x3("0B0D0F\000|\000(\000.0B0D0H\000)\000\000", "0\2230B0D0H\000\000", 0, 8, 1);
x3("0B\000*\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1);
x3("0B\000*\000?\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 0, 2, 1);
x3("0B\000*\000?\000(0\223\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1);
x3("\000[0D0F0H\000]0B\000*\000(\000.\000)\000\000", "0H0B0B0B0B0\223\000\000", 10, 12, 1);
x3("\000(\000\134\000A0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1);
n("\000(\000\134\000A0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000");
x3("\000(\000^0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1);
n("\000(\000^0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000");
x3("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\213\000\000", 4, 8, 1);
n("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\2130\213\000\000");
x2("\000(q!\000)\000\134\0001\000\000", "q!q!\000\000", 0, 4);
n("\000(q!\000)\000\134\0001\000\000", "q!kf\000\000");
x2("\000(zz\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 4);
x2("\000(zz\000?\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 0);
x2("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 8);
x3("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 4, 1);
x2("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D0D0D0D\000\000", 0, 10);
x2("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D\000\000", 0, 2);
x2("\000(0B\000*\000)\000(0D\000*\000)\000\134\0001\000\134\0002\000\000", "0B0B0B0D0D0B0B0B0D0D\000\000", 0, 20);
x2("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 0, 14);
x3("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 6, 10, 2);
x2("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 16);
x3("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 6, 7);
x2("\000(0o\000)\000(0r\000)\000(0u\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "0o0r0u0r0o0u\000\000", 0, 12);
x2("\000(\000[0M\000-0Q\000]\000)\000\134\0001\000\000", "0O0O\000\000", 0, 4);
x2("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000 \000\000", 0, 12);
n("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000\000");
x2("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "\212\260\377\037\212\260\377\037\000\000", 0, 8);
x2("\000.\000.\000.\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0B\000a0B\212\260\377\037\212\260\377\037\000\000", 0, 14);
x2("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0F0D0F0F0D0F\000\000", 0, 12);
x2("\000(\000^0S\000)\000\134\0001\000\000", "0S0S\000\000", 0, 4);
n("\000(\000^0\200\000)\000\134\0001\000\000", "0\2010\2000\200\000\000");
n("\000(0B\000$\000)\000\134\0001\000\000", "0B0B\000\000");
n("\000(0B0D\000\134\000Z\000)\000\134\0001\000\000", "0B0D\000\000");
x2("\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0B\000\000", 2, 2);
x2("\000.\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0D0B\000\000", 2, 4);
x3("\000(\000.\000(0\2040D0\206\000)\000\134\0002\000)\000\000", "\000z0\2040D0\2060\2040D0\206\000\000", 0, 14, 1);
x3("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "0B\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1);
x2("\000(\000(\000?\000i\000:0B\000v0Z\000)\000)\000\134\0001\000\000", "0B\000v0Z0B\000v0Z\000\000", 0, 12);
x2("\000(\000?\000<a\0320K\000>Y\011\000|\000\134\000(\000\134\000g\000<a\0320K\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(Y\011\000)\000)\000)\000)\000)\000)\000\000", 0, 26);
x2("\000\134\000A\000(\000?\000:\000\134\000g\000<\226?\000_\0001\000>\000|\000\134\000g\000<N\221\000_\0002\000>\000|\000\134\000z}BN\206\000 \000 \000(\000?\000<\226?\000_\0001\000>\211\263\000|\201\352\000\134\000g\000<N\221\000_\0002\000>\201\352\000)\000(\000?\000<N\221\000_\0002\000>W(\000|\203\351\205\251\000\134\000g\000<\226?\000_\0001\000>\203\351\205\251\000)\000)\000$\000\000", "\203\351\205\251\201\352\203\351\205\251\201\352W(\201\352\203\351\205\251\201\352\203\351\205\251\000\000", 0, 26);
x2("\000[\000[0r0u\000]\000]\000\000", "0u\000\000", 0, 2);
x2("\000[\000[0D0J0F\000]0K\000]\000\000", "0K\000\000", 0, 2);
n("\000[\000[\000^0B\000]\000]\000\000", "0B\000\000");
n("\000[\000^\000[0B\000]\000]\000\000", "0B\000\000");
x2("\000[\000^\000[\000^0B\000]\000]\000\000", "0B\000\000", 0, 2);
x2("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0O\000\000", 0, 2);
n("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0K\000\000");
n("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0Q\000\000");
x2("\000[0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000", 0, 2);
n("\000[\000^0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000");
x2("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0D\000\000", 0, 2);
n("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0B\000\000");
x2("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0M\000\000", 0, 2);
n("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0D\000\000");
x2("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0F\000\000", 0, 2);
x2("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0H\000\000", 0, 2);
n("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0K\000\000");
x2("\000[0B\000-\000&\000&\000-0B\000]\000\000", "\000-\000\000", 0, 2);
x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000q\000-\000w\000]\000\000", "0H\000\000", 0, 2);
x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000f\000\000", 0, 2);
x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000g\000\000", 0, 2);
n("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\0002\000\000");
x2("\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40);
x2("\000.\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40);
fprintf(stdout,
"\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",
nsucc, nfail, nerror, onig_version());
#ifndef POSIX_TEST
onig_region_free(region, 1);
onig_end();
#endif
return ((nfail == 0 && nerror == 0) ? 0 : -1);
}

200
oniguruma/win32/Makefile Normal file
View file

@ -0,0 +1,200 @@
# Oniguruma Makefile for Win32
product_name = oniguruma
CPPFLAGS =
CFLAGS = -O2 -nologo /W3
LDFLAGS =
LOADLIBES =
ARLIB = lib
ARLIB_FLAGS = -nologo
ARDLL = cl
ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll
LINKFLAGS = -link -incremental:no -pdb:none
INSTALL = install -c
CP = copy
CC = cl
DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT
RUBYDIR = ..
subdirs =
libbase = onig
libname = $(libbase)_s.lib
dllname = $(libbase).dll
dlllib = $(libbase).lib
onigheaders = oniguruma.h regint.h regparse.h regenc.h st.h
posixheaders = onigposix.h
headers = $(posixheaders) $(onigheaders)
onigobjs = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \
regexec.obj regenc.obj regsyntax.obj regtrav.obj \
regversion.obj st.obj
posixobjs = regposix.obj regposerr.obj
libobjs = $(onigobjs) $(posixobjs)
jp_objs = $(encdir)\euc_jp.obj $(encdir)\sjis.obj
iso8859_objs = $(encdir)\iso8859_1.obj $(encdir)\iso8859_2.obj \
$(encdir)\iso8859_3.obj $(encdir)\iso8859_4.obj \
$(encdir)\iso8859_5.obj $(encdir)\iso8859_6.obj \
$(encdir)\iso8859_7.obj $(encdir)\iso8859_8.obj \
$(encdir)\iso8859_9.obj $(encdir)\iso8859_10.obj \
$(encdir)\iso8859_11.obj $(encdir)\iso8859_13.obj \
$(encdir)\iso8859_14.obj $(encdir)\iso8859_15.obj \
$(encdir)\iso8859_16.obj
encobjs = $(encdir)\ascii.obj $(encdir)\utf8.obj \
$(encdir)\unicode.obj \
$(encdir)\utf16_be.obj $(encdir)\utf16_le.obj \
$(encdir)\utf32_be.obj $(encdir)\utf32_le.obj \
$(jp_objs) $(iso8859_objs) \
$(encdir)\euc_tw.obj $(encdir)\euc_kr.obj $(encdir)\big5.obj \
$(encdir)\gb18030.obj \
$(encdir)\koi8_r.obj \
$(encdir)\cp1251.obj # $(encdir)\koi8.obj
onigsources = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \
regsyntax.c regtrav.c regversion.c reggnu.c st.c
posixsources = regposix.c regposerr.c
libsources = $(posixsources) $(onigsources)
rubysources = $(onigsources)
encdir = enc
patchfiles = re.c.168.patch re.c.181.patch
distfiles = README COPYING HISTORY \
Makefile.in configure.in config.h.in configure \
$(headers) $(libsources) $(patchfiles) \
test.rb testconv.rb
testc = testc
testp = testp
makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
.SUFFIXES:
.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
.c.obj:
$(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $<
# targets
default: all
setup:
$(CP) win32\config.h config.h
$(CP) win32\testc.c testc.c
all: $(libname) $(dllname)
$(libname): $(libobjs) $(encobjs)
$(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs)
$(dllname): $(libobjs) $(encobjs)
$(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
regparse.obj: regparse.c $(onigheaders) config.h st.h
regext.obj: regext.c $(onigheaders) config.h
regtrav.obj: regtrav.c $(onigheaders) config.h
regcomp.obj: regcomp.c $(onigheaders) config.h
regexec.obj: regexec.c regint.h regenc.h oniguruma.h config.h
reggnu.obj: reggnu.c regint.h regenc.h oniguruma.h config.h oniggnu.h
regerror.obj: regerror.c regint.h regenc.h oniguruma.h config.h
regenc.obj: regenc.c regenc.h oniguruma.h config.h
regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h
regversion.obj: regversion.c oniguruma.h config.h
regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h
regposerr.obj: regposerr.c $(posixheaders) config.h
st.obj: st.c regint.h oniguruma.h config.h st.h
$(encdir)\ascii.obj: $(encdir)\ascii.c regenc.h config.h
$(encdir)\unicode.obj: $(encdir)\unicode.c regenc.h config.h
$(encdir)\utf8.obj: $(encdir)\utf8.c regenc.h config.h
$(encdir)\utf16_be.obj: $(encdir)\utf16_be.c regenc.h config.h
$(encdir)\utf16_le.obj: $(encdir)\utf16_le.c regenc.h config.h
$(encdir)\utf32_be.obj: $(encdir)\utf32_be.c regenc.h config.h
$(encdir)\utf32_le.obj: $(encdir)\utf32_le.c regenc.h config.h
$(encdir)\euc_jp.obj: $(encdir)\euc_jp.c regenc.h config.h
$(encdir)\euc_tw.obj: $(encdir)\euc_tw.c regenc.h config.h
$(encdir)\euc_kr.obj: $(encdir)\euc_kr.c regenc.h config.h
$(encdir)\sjis.obj: $(encdir)\sjis.c regenc.h config.h
$(encdir)\iso8859_1.obj: $(encdir)\iso8859_1.c regenc.h config.h
$(encdir)\iso8859_2.obj: $(encdir)\iso8859_2.c regenc.h config.h
$(encdir)\iso8859_3.obj: $(encdir)\iso8859_3.c regenc.h config.h
$(encdir)\iso8859_4.obj: $(encdir)\iso8859_4.c regenc.h config.h
$(encdir)\iso8859_5.obj: $(encdir)\iso8859_5.c regenc.h config.h
$(encdir)\iso8859_6.obj: $(encdir)\iso8859_6.c regenc.h config.h
$(encdir)\iso8859_7.obj: $(encdir)\iso8859_7.c regenc.h config.h
$(encdir)\iso8859_8.obj: $(encdir)\iso8859_8.c regenc.h config.h
$(encdir)\iso8859_9.obj: $(encdir)\iso8859_9.c regenc.h config.h
$(encdir)\iso8859_10.obj: $(encdir)\iso8859_10.c regenc.h config.h
$(encdir)\iso8859_11.obj: $(encdir)\iso8859_11.c regenc.h config.h
$(encdir)\iso8859_13.obj: $(encdir)\iso8859_13.c regenc.h config.h
$(encdir)\iso8859_14.obj: $(encdir)\iso8859_14.c regenc.h config.h
$(encdir)\iso8859_15.obj: $(encdir)\iso8859_15.c regenc.h config.h
$(encdir)\iso8859_16.obj: $(encdir)\iso8859_16.c regenc.h config.h
$(encdir)\koi8.obj: $(encdir)\koi8.c regenc.h config.h
$(encdir)\koi8_r.obj: $(encdir)\koi8_r.c regenc.h config.h
$(encdir)\cp1251.obj: $(encdir)\cp1251.c regenc.h config.h
$(encdir)\big5.obj: $(encdir)\big5.c regenc.h config.h
$(encdir)\gb18030.obj: $(encdir)\gb18030.c regenc.h config.h
# Ruby test
rtest:
$(RUBYDIR)\win32\ruby -w -Ke test.rb
# C library test
ctest: $(testc)
.\$(testc)
# POSIX C library test
ptest: $(testp)
.\$(testp)
$(testc): $(testc).c $(libname)
$(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
$(testp): $(testc).c $(dlllib)
$(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib)
#$(testc)u.c: test.rb testconvu.rb
# ruby -Ke testconvu.rb test.rb > $@
$(testc)u: $(testc)u.c $(libname)
$(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
clean:
del *.obj $(encdir)\*.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj
# backup file suffix
SORIG = ruby_orig
# ruby 1.9 source update
19:
$(CP) regerror.c $(RUBYDIR)
$(CP) regparse.c $(RUBYDIR)
$(CP) regcomp.c $(RUBYDIR)
$(CP) regexec.c $(RUBYDIR)
$(CP) regenc.c $(RUBYDIR)
$(CP) regint.h $(RUBYDIR)
$(CP) regparse.h $(RUBYDIR)
$(CP) regenc.h $(RUBYDIR)
$(CP) oniguruma.h $(RUBYDIR)
$(CP) enc\ascii.c $(RUBYDIR)
$(CP) enc\utf8.c $(RUBYDIR)
$(CP) enc\euc_jp.c $(RUBYDIR)
$(CP) enc\sjis.c $(RUBYDIR)
$(CP) enc\unicode.c $(RUBYDIR)
samples: all
$(CC) $(CFLAGS) -I. -o simple sample\simple.c $(dlllib)
$(CC) $(CFLAGS) -I. -o posix sample\posix.c $(dlllib)
$(CC) $(CFLAGS) -I. -o names sample\names.c $(dlllib)
$(CC) $(CFLAGS) -I. -o listcap sample\listcap.c $(dlllib)
$(CC) $(CFLAGS) -I. -o sql sample\sql.c $(dlllib)
$(CC) $(CFLAGS) -I. -o encode sample\encode.c $(dlllib)
$(CC) $(CFLAGS) -I. -o syntax sample\syntax.c $(dlllib)

84
oniguruma/win32/config.h Normal file
View file

@ -0,0 +1,84 @@
#define STDC_HEADERS 1
#define HAVE_SYS_TYPES_H 1
#define HAVE_SYS_STAT_H 1
#define HAVE_STDLIB_H 1
#define HAVE_STRING_H 1
#define HAVE_MEMORY_H 1
#define HAVE_FLOAT_H 1
#define HAVE_OFF_T 1
#define SIZEOF_INT 4
#define SIZEOF_SHORT 2
#define SIZEOF_LONG 4
#define SIZEOF_LONG_LONG 0
#define SIZEOF___INT64 8
#define SIZEOF_OFF_T 4
#define SIZEOF_VOIDP 4
#define SIZEOF_FLOAT 4
#define SIZEOF_DOUBLE 8
#define HAVE_PROTOTYPES 1
#define TOKEN_PASTE(x,y) x##y
#define HAVE_STDARG_PROTOTYPES 1
#ifndef NORETURN
#if _MSC_VER > 1100
#define NORETURN(x) __declspec(noreturn) x
#else
#define NORETURN(x) x
#endif
#endif
#define HAVE_DECL_SYS_NERR 1
#define STDC_HEADERS 1
#define HAVE_STDLIB_H 1
#define HAVE_STRING_H 1
#define HAVE_LIMITS_H 1
#define HAVE_FCNTL_H 1
#define HAVE_SYS_UTIME_H 1
#define HAVE_MEMORY_H 1
#define uid_t int
#define gid_t int
#define HAVE_STRUCT_STAT_ST_RDEV 1
#define HAVE_ST_RDEV 1
#define GETGROUPS_T int
#define RETSIGTYPE void
#define HAVE_ALLOCA 1
#define HAVE_DUP2 1
#define HAVE_MEMCMP 1
#define HAVE_MEMMOVE 1
#define HAVE_MKDIR 1
#define HAVE_STRCASECMP 1
#define HAVE_STRNCASECMP 1
#define HAVE_STRERROR 1
#define HAVE_STRFTIME 1
#define HAVE_STRCHR 1
#define HAVE_STRSTR 1
#define HAVE_STRTOD 1
#define HAVE_STRTOL 1
#define HAVE_STRTOUL 1
#define HAVE_FLOCK 1
#define HAVE_VSNPRINTF 1
#define HAVE_FINITE 1
#define HAVE_FMOD 1
#define HAVE_FREXP 1
#define HAVE_HYPOT 1
#define HAVE_MODF 1
#define HAVE_WAITPID 1
#define HAVE_CHSIZE 1
#define HAVE_TIMES 1
#define HAVE__SETJMP 1
#define HAVE_TELLDIR 1
#define HAVE_SEEKDIR 1
#define HAVE_MKTIME 1
#define HAVE_COSH 1
#define HAVE_SINH 1
#define HAVE_TANH 1
#define HAVE_EXECVE 1
#define HAVE_TZNAME 1
#define HAVE_DAYLIGHT 1
#define SETPGRP_VOID 1
#define inline __inline
#define NEED_IO_SEEK_BETWEEN_RW 1
#define RSHIFT(x,y) ((x)>>(int)y)
#define FILE_COUNT _cnt
#define FILE_READPTR _ptr
#define DEFAULT_KCODE KCODE_NONE
#define DLEXT ".so"
#define DLEXT2 ".dll"

863
oniguruma/win32/testc.c Normal file
View file

@ -0,0 +1,863 @@
/*
* This program was generated by testconv.rb.
*/
#include "config.h"
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#include <stdio.h>
#ifdef POSIX_TEST
#include "onigposix.h"
#else
#include "oniguruma.h"
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#define SLEN(s) strlen(s)
static int nsucc = 0;
static int nfail = 0;
static int nerror = 0;
static FILE* err_file;
#ifndef POSIX_TEST
static OnigRegion* region;
#endif
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
{
int r;
#ifdef POSIX_TEST
regex_t reg;
char buf[200];
regmatch_t pmatch[25];
r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
if (r == REG_NOMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
nfail++;
}
else {
if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
nfail++;
}
}
}
regfree(&reg);
#else
regex_t* reg;
OnigErrorInfo einfo;
r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_SJIS, ONIG_SYNTAX_DEFAULT, &einfo);
if (r) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r, &einfo);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
(UChar* )str, (UChar* )(str + SLEN(str)),
region, ONIG_OPTION_NONE);
if (r < ONIG_MISMATCH) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
if (r == ONIG_MISMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
nfail++;
}
else {
if (region->beg[mem] == from && region->end[mem] == to) {
fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
from, to, region->beg[mem], region->end[mem]);
nfail++;
}
}
}
onig_free(reg);
#endif
}
static void x2(char* pattern, char* str, int from, int to)
{
xx(pattern, str, from, to, 0, 0);
}
static void x3(char* pattern, char* str, int from, int to, int mem)
{
xx(pattern, str, from, to, mem, 0);
}
static void n(char* pattern, char* str)
{
xx(pattern, str, 0, 0, 0, 1);
}
extern int main(int argc, char* argv[])
{
err_file = stdout;
#ifdef POSIX_TEST
reg_set_encoding(REG_POSIX_ENCODING_SJIS);
#else
region = onig_region_new();
#endif
x2("", "", 0, 0);
x2("^", "", 0, 0);
x2("$", "", 0, 0);
x2("\\G", "", 0, 0);
x2("\\A", "", 0, 0);
x2("\\Z", "", 0, 0);
x2("\\z", "", 0, 0);
x2("^$", "", 0, 0);
x2("\\ca", "\001", 0, 1);
x2("\\C-b", "\002", 0, 1);
x2("\\c\\\\", "\034", 0, 1);
x2("q[\\c\\\\]", "q\034", 0, 2);
x2("", "a", 0, 0);
x2("a", "a", 0, 1);
x2("\\x61", "a", 0, 1);
x2("aa", "aa", 0, 2);
x2("aaa", "aaa", 0, 3);
x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
x2("ab", "ab", 0, 2);
x2("b", "ab", 1, 2);
x2("bc", "abc", 1, 3);
x2("(?i:#RET#)", "#INS##RET#", 5, 10);
x2("\\17", "\017", 0, 1);
x2("\\x1f", "\x1f", 0, 1);
x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
x2(".", "a", 0, 1);
n(".", "");
x2("..", "ab", 0, 2);
x2("\\w", "e", 0, 1);
n("\\W", "e");
x2("\\s", " ", 0, 1);
x2("\\S", "b", 0, 1);
x2("\\d", "4", 0, 1);
n("\\D", "4");
x2("\\b", "z ", 0, 0);
x2("\\b", " z", 1, 1);
x2("\\B", "zz ", 1, 1);
x2("\\B", "z ", 2, 2);
x2("\\B", " z", 0, 0);
x2("[ab]", "b", 0, 1);
n("[ab]", "c");
x2("[a-z]", "t", 0, 1);
n("[^a]", "a");
x2("[^a]", "\n", 0, 1);
x2("[]]", "]", 0, 1);
n("[^]]", "]");
x2("[\\^]+", "0^^1", 1, 3);
x2("[b-]", "b", 0, 1);
x2("[b-]", "-", 0, 1);
x2("[\\w]", "z", 0, 1);
n("[\\w]", " ");
x2("[\\W]", "b$", 1, 2);
x2("[\\d]", "5", 0, 1);
n("[\\d]", "e");
x2("[\\D]", "t", 0, 1);
n("[\\D]", "3");
x2("[\\s]", " ", 0, 1);
n("[\\s]", "a");
x2("[\\S]", "b", 0, 1);
n("[\\S]", " ");
x2("[\\w\\d]", "2", 0, 1);
n("[\\w\\d]", " ");
x2("[[:upper:]]", "B", 0, 1);
x2("[*[:xdigit:]+]", "+", 0, 1);
x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
x2("[*[:xdigit:]+]", "-@^+", 3, 4);
n("[[:upper]]", "A");
x2("[[:upper]]", ":", 0, 1);
x2("[\\044-\\047]", "\046", 0, 1);
x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
n("[\\x6A-\\x6D]", "\x6E");
n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
x2("[\\[]", "[", 0, 1);
x2("[\\]]", "]", 0, 1);
x2("[&]", "&", 0, 1);
x2("[[ab]]", "b", 0, 1);
x2("[[ab]c]", "c", 0, 1);
n("[[^a]]", "a");
n("[^[a]]", "a");
x2("[[ab]&&bc]", "b", 0, 1);
n("[[ab]&&bc]", "a");
n("[[ab]&&bc]", "c");
x2("[a-z&&b-y&&c-x]", "w", 0, 1);
n("[^a-z&&b-y&&c-x]", "w");
x2("[[^a&&a]&&a-z]", "b", 0, 1);
n("[[^a&&a]&&a-z]", "a");
x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
n("[[^a-z&&bcdef]&&[^c-g]]", "c");
x2("[^[^abc]&&[^cde]]", "c", 0, 1);
x2("[^[^abc]&&[^cde]]", "e", 0, 1);
n("[^[^abc]&&[^cde]]", "f");
x2("[a-&&-a]", "-", 0, 1);
n("[a\\-&&\\-a]", "&");
n("\\wabc", " abc");
x2("a\\Wbc", "a bc", 0, 4);
x2("a.b.c", "aabbc", 0, 5);
x2(".\\wb\\W..c", "abb bcc", 0, 7);
x2("\\s\\wzzz", " zzzz", 0, 5);
x2("aa.b", "aabb", 0, 4);
n(".a", "ab");
x2(".a", "aa", 0, 2);
x2("^a", "a", 0, 1);
x2("^a$", "a", 0, 1);
x2("^\\w$", "a", 0, 1);
n("^\\w$", " ");
x2("^\\wab$", "zab", 0, 3);
x2("^\\wabcdef$", "zabcdef", 0, 7);
x2("^\\w...def$", "zabcdef", 0, 7);
x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
x2("\\A\\Z", "", 0, 0);
x2("\\Axyz", "xyz", 0, 3);
x2("xyz\\Z", "xyz", 0, 3);
x2("xyz\\z", "xyz", 0, 3);
x2("a\\Z", "a", 0, 1);
x2("\\Gaz", "az", 0, 2);
n("\\Gz", "bza");
n("az\\G", "az");
n("az\\A", "az");
n("a\\Az", "az");
x2("\\^\\$", "^$", 0, 2);
x2("^x?y", "xy", 0, 2);
x2("^(x?y)", "xy", 0, 2);
x2("\\w", "_", 0, 1);
n("\\W", "_");
x2("(?=z)z", "z", 0, 1);
n("(?=z).", "a");
x2("(?!z)a", "a", 0, 1);
n("(?!z)a", "z");
x2("(?i:a)", "a", 0, 1);
x2("(?i:a)", "A", 0, 1);
x2("(?i:A)", "a", 0, 1);
n("(?i:A)", "b");
x2("(?i:[A-Z])", "a", 0, 1);
x2("(?i:[f-m])", "H", 0, 1);
x2("(?i:[f-m])", "h", 0, 1);
n("(?i:[f-m])", "e");
x2("(?i:[A-c])", "D", 0, 1);
n("(?i:[^a-z])", "A");
n("(?i:[^a-z])", "a");
x2("(?i:[!-k])", "Z", 0, 1);
x2("(?i:[!-k])", "7", 0, 1);
x2("(?i:[T-}])", "b", 0, 1);
x2("(?i:[T-}])", "{", 0, 1);
x2("(?i:\\?a)", "?A", 0, 2);
x2("(?i:\\*A)", "*a", 0, 2);
n(".", "\n");
x2("(?m:.)", "\n", 0, 1);
x2("(?m:a.)", "a\n", 0, 2);
x2("(?m:.b)", "a\nb", 1, 3);
x2(".*abc", "dddabdd\nddabc", 8, 13);
x2("(?m:.*abc)", "dddabddabc", 0, 10);
n("(?i)(?-i)a", "A");
n("(?i)(?-i:a)", "A");
x2("a?", "", 0, 0);
x2("a?", "b", 0, 0);
x2("a?", "a", 0, 1);
x2("a*", "", 0, 0);
x2("a*", "a", 0, 1);
x2("a*", "aaa", 0, 3);
x2("a*", "baaaa", 0, 0);
n("a+", "");
x2("a+", "a", 0, 1);
x2("a+", "aaaa", 0, 4);
x2("a+", "aabbb", 0, 2);
x2("a+", "baaaa", 1, 5);
x2(".?", "", 0, 0);
x2(".?", "f", 0, 1);
x2(".?", "\n", 0, 0);
x2(".*", "", 0, 0);
x2(".*", "abcde", 0, 5);
x2(".+", "z", 0, 1);
x2(".+", "zdswer\n", 0, 6);
x2("(.*)a\\1f", "babfbac", 0, 4);
x2("(.*)a\\1f", "bacbabf", 3, 7);
x2("((.*)a\\2f)", "bacbabf", 3, 7);
x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
x2("a|b", "a", 0, 1);
x2("a|b", "b", 0, 1);
x2("|a", "a", 0, 0);
x2("(|a)", "a", 0, 0);
x2("ab|bc", "ab", 0, 2);
x2("ab|bc", "bc", 0, 2);
x2("z(?:ab|bc)", "zbc", 0, 3);
x2("a(?:ab|bc)c", "aabc", 0, 4);
x2("ab|(?:ac|az)", "az", 0, 2);
x2("a|b|c", "dc", 1, 2);
x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
x2("a|^z", "ba", 1, 2);
x2("a|^z", "za", 0, 1);
x2("a|\\Gz", "bza", 2, 3);
x2("a|\\Gz", "za", 0, 1);
x2("a|\\Az", "bza", 2, 3);
x2("a|\\Az", "za", 0, 1);
x2("a|b\\Z", "ba", 1, 2);
x2("a|b\\Z", "b", 0, 1);
x2("a|b\\z", "ba", 1, 2);
x2("a|b\\z", "b", 0, 1);
x2("\\w|\\s", " ", 0, 1);
n("\\w|\\w", " ");
x2("\\w|%", "%", 0, 1);
x2("\\w|[&$]", "&", 0, 1);
x2("[b-d]|[^e-z]", "a", 0, 1);
x2("(?:a|[c-f])|bz", "dz", 0, 1);
x2("(?:a|[c-f])|bz", "bz", 0, 2);
x2("abc|(?=zz)..f", "zzf", 0, 3);
x2("abc|(?!zz)..f", "abf", 0, 3);
x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
n("(?>a|abd)c", "abdc");
x2("(?>abd|a)c", "abdc", 0, 4);
x2("a?|b", "a", 0, 1);
x2("a?|b", "b", 0, 0);
x2("a?|b", "", 0, 0);
x2("a*|b", "aa", 0, 2);
x2("a*|b*", "ba", 0, 0);
x2("a*|b*", "ab", 0, 1);
x2("a+|b*", "", 0, 0);
x2("a+|b*", "bbb", 0, 3);
x2("a+|b*", "abbb", 0, 1);
n("a+|b+", "");
x2("(a|b)?", "b", 0, 1);
x2("(a|b)*", "ba", 0, 2);
x2("(a|b)+", "bab", 0, 3);
x2("(ab|ca)+", "caabbc", 0, 4);
x2("(ab|ca)+", "aabca", 1, 5);
x2("(ab|ca)+", "abzca", 0, 2);
x2("(a|bab)+", "ababa", 0, 5);
x2("(a|bab)+", "ba", 1, 2);
x2("(a|bab)+", "baaaba", 1, 4);
x2("(?:a|b)(?:a|b)", "ab", 0, 2);
x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
x2("(?:a+|b+){2}", "aaabbb", 0, 6);
x2("h{0,}", "hhhh", 0, 4);
x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
n("ax{2}*a", "0axxxa1");
n("a.{0,2}a", "0aXXXa0");
n("a.{0,2}?a", "0aXXXa0");
n("a.{0,2}?a", "0aXXXXa0");
x2("^a{2,}?a$", "aaa", 0, 3);
x2("^[a-z]{2,}?$", "aaa", 0, 3);
x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
n("(?:a+|\\Ab*)cc", "abcc");
x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
x2("a|(?i)c", "C", 0, 1);
x2("(?i)c|a", "C", 0, 1);
x2("(?i)c|a", "A", 0, 1);
x2("(?i:c)|a", "C", 0, 1);
n("(?i:c)|a", "A");
x2("[abc]?", "abc", 0, 1);
x2("[abc]*", "abc", 0, 3);
x2("[^abc]*", "abc", 0, 0);
n("[^abc]+", "abc");
x2("a?\?", "aaa", 0, 0);
x2("ba?\?b", "bab", 0, 3);
x2("a*?", "aaa", 0, 0);
x2("ba*?", "baa", 0, 1);
x2("ba*?b", "baab", 0, 4);
x2("a+?", "aaa", 0, 1);
x2("ba+?", "baa", 0, 2);
x2("ba+?b", "baab", 0, 4);
x2("(?:a?)?\?", "a", 0, 0);
x2("(?:a?\?)?", "a", 0, 0);
x2("(?:a?)+?", "aaa", 0, 1);
x2("(?:a+)?\?", "aaa", 0, 0);
x2("(?:a+)?\?b", "aaab", 0, 4);
x2("(?:ab)?{2}", "", 0, 0);
x2("(?:ab)?{2}", "ababa", 0, 4);
x2("(?:ab)*{0}", "ababa", 0, 0);
x2("(?:ab){3,}", "abababab", 0, 8);
n("(?:ab){3,}", "abab");
x2("(?:ab){2,4}", "ababab", 0, 6);
x2("(?:ab){2,4}", "ababababab", 0, 8);
x2("(?:ab){2,4}?", "ababababab", 0, 4);
x2("(?:ab){,}", "ab{,}", 0, 5);
x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
x2("(d+)([^abc]z)", "dddz", 0, 4);
x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
x2("(\\w+)(\\wz)", "dddz", 0, 4);
x3("(a)", "a", 0, 1, 1);
x3("(ab)", "ab", 0, 2, 1);
x2("((ab))", "ab", 0, 2);
x3("((ab))", "ab", 0, 2, 1);
x3("((ab))", "ab", 0, 2, 2);
x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
x3("(ab)(cd)", "abcd", 0, 2, 1);
x3("(ab)(cd)", "abcd", 2, 4, 2);
x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
x2("(^a)", "a", 0, 1);
x3("(a)|(a)", "ba", 1, 2, 1);
x3("(^a)|(a)", "ba", 1, 2, 2);
x3("(a?)", "aaa", 0, 1, 1);
x3("(a*)", "aaa", 0, 3, 1);
x3("(a*)", "", 0, 0, 1);
x3("(a+)", "aaaaaaa", 0, 7, 1);
x3("(a+|b*)", "bbbaa", 0, 3, 1);
x3("(a+|b?)", "bbbaa", 0, 1, 1);
x3("(abc)?", "abc", 0, 3, 1);
x3("(abc)*", "abc", 0, 3, 1);
x3("(abc)+", "abc", 0, 3, 1);
x3("(xyz|abc)+", "abc", 0, 3, 1);
x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
x3("((?i:abc))", "AbC", 0, 3, 1);
x2("(abc)(?i:\\1)", "abcABC", 0, 6);
x3("((?m:a.c))", "a\nc", 0, 3, 1);
x3("((?=az)a)", "azb", 0, 1, 1);
x3("abc|(.abd)", "zabd", 0, 4, 1);
x2("(?:abc)|(ABC)", "abc", 0, 3);
x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
x3("a*(.)", "aaaaz", 4, 5, 1);
x3("a*?(.)", "aaaaz", 0, 1, 1);
x3("a*?(c)", "aaaac", 4, 5, 1);
x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
x3("(\\Abb)cc", "bbcc", 0, 2, 1);
n("(\\Abb)cc", "zbbcc");
x3("(^bb)cc", "bbcc", 0, 2, 1);
n("(^bb)cc", "zbbcc");
x3("cc(bb$)", "ccbb", 2, 4, 1);
n("cc(bb$)", "ccbbb");
n("(\\1)", "");
n("\\1(a)", "aa");
n("(a(b)\\1)\\2+", "ababb");
n("(?:(?:\\1|z)(a))+$", "zaa");
x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
x2("(a)(?=\\1)", "aa", 0, 1);
n("(a)$|\\1", "az");
x2("(a)\\1", "aa", 0, 2);
n("(a)\\1", "ab");
x2("(a?)\\1", "aa", 0, 2);
x2("(a?\?)\\1", "aa", 0, 0);
x2("(a*)\\1", "aaaaa", 0, 4);
x3("(a*)\\1", "aaaaa", 0, 2, 1);
x2("a(b*)\\1", "abbbb", 0, 5);
x2("a(b*)\\1", "ab", 0, 1);
x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
x2("([a-d])\\1", "cc", 0, 2);
x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
n("(\\w\\d\\s)\\1", "f5 f5");
x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
x2("(^a)\\1", "aa", 0, 2);
n("(^a)\\1", "baa");
n("(a$)\\1", "aa");
n("(ab\\Z)\\1", "ab");
x2("(a*\\Z)\\1", "a", 1, 1);
x2(".(a*\\Z)\\1", "ba", 1, 2);
x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
x2("((?i:az))\\1", "AzAz", 0, 4);
n("((?i:az))\\1", "Azaz");
x2("(?<=a)b", "ab", 1, 2);
n("(?<=a)b", "bb");
x2("(?<=a|b)b", "bb", 1, 2);
x2("(?<=a|bc)b", "bcb", 2, 3);
x2("(?<=a|bc)b", "ab", 1, 2);
x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
x2("(a)\\g<1>", "aa", 0, 2);
x2("(?<!a)b", "cb", 1, 2);
n("(?<!a)b", "ab");
x2("(?<!a|bc)b", "bbb", 0, 1);
n("(?<!a|bc)z", "bcz");
x2("(?<name1>a)", "a", 0, 1);
x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
x2("(?<n>|a\\g<n>)+", "", 0, 0);
x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
x2("()*\\1", "", 0, 0);
x2("(?:()|())*\\1\\2", "", 0, 0);
x3("(?:\\1a|())*", "a", 0, 0, 1);
x2("x((.)*)*x", "0x1x2x3", 1, 6);
x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
x2("\\xED\\xF2", "\xed\xf2", 0, 2);
x2("", "", 0, 0);
x2("", "", 0, 2);
n("", "");
x2("うう", "うう", 0, 4);
x2("あいう", "あいう", 0, 6);
x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70);
x2("", "いあ", 2, 4);
x2("いう", "あいう", 2, 6);
x2("\\xca\\xb8", "\xca\xb8", 0, 2);
x2(".", "", 0, 2);
x2("..", "かき", 0, 4);
x2("\\w", "", 0, 2);
n("\\W", "");
x2("[\\W]", "う$", 2, 3);
x2("\\S", "", 0, 2);
x2("\\S", "", 0, 2);
x2("\\b", "", 0, 0);
x2("\\b", "", 1, 1);
x2("\\B", "せそ ", 2, 2);
x2("\\B", "", 3, 3);
x2("\\B", "", 0, 0);
x2("[たち]", "", 0, 2);
n("[なに]", "");
x2("[う-お]", "", 0, 2);
n("[^け]", "");
x2("[\\w]", "", 0, 2);
n("[\\d]", "");
x2("[\\D]", "", 0, 2);
n("[\\s]", "");
x2("[\\S]", "", 0, 2);
x2("[\\w\\d]", "", 0, 2);
x2("[\\w\\d]", "", 3, 5);
n("\\w鬼車", " 鬼車");
x2("\\W車", "鬼 車", 0, 5);
x2("あ.い.う", "ああいいう", 0, 10);
x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13);
x2("\\s\\wこここ", " ここここ", 0, 9);
x2("ああ.け", "ああけけ", 0, 8);
n(".い", "いえ");
x2(".お", "おお", 0, 4);
x2("^あ", "", 0, 2);
x2("^む$", "", 0, 2);
x2("^\\w$", "", 0, 2);
x2("^\\wかきくけこ$", "zかきくけこ", 0, 11);
x2("^\\w...うえお$", "zあいううえお", 0, 13);
x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12);
x2("\\Aたちつ", "たちつ", 0, 6);
x2("むめも\\Z", "むめも", 0, 6);
x2("かきく\\z", "かきく", 0, 6);
x2("かきく\\Z", "かきく\n", 0, 6);
x2("\\Gぽぴ", "ぽぴ", 0, 4);
n("\\Gえ", "うえお");
n("とて\\G", "とて");
n("まみ\\A", "まみ");
n("\\Aみ", "まみ");
x2("(?=せ)せ", "", 0, 2);
n("(?=う).", "");
x2("(?!う)か", "", 0, 2);
n("(?!と)あ", "");
x2("(?i:あ)", "", 0, 2);
x2("(?i:ぶべ)", "ぶべ", 0, 4);
n("(?i:い)", "");
x2("(?m:よ.)", "\n", 0, 3);
x2("(?m:.め)", "\n", 2, 5);
x2("あ?", "", 0, 0);
x2("変?", "", 0, 0);
x2("変?", "", 0, 2);
x2("量*", "", 0, 0);
x2("量*", "", 0, 2);
x2("子*", "子子子", 0, 6);
x2("馬*", "鹿馬馬馬馬", 0, 0);
n("山+", "");
x2("河+", "", 0, 2);
x2("時+", "時時時時", 0, 8);
x2("え+", "ええううう", 0, 4);
x2("う+", "おうううう", 2, 10);
x2(".?", "", 0, 2);
x2(".*", "ぱぴぷぺ", 0, 8);
x2(".+", "", 0, 2);
x2(".+", "いうえか\n", 0, 8);
x2("あ|い", "", 0, 2);
x2("あ|い", "", 0, 2);
x2("あい|いう", "あい", 0, 4);
x2("あい|いう", "いう", 0, 4);
x2("を(?:かき|きく)", "をかき", 0, 6);
x2("を(?:かき|きく)け", "をきくけ", 0, 8);
x2("あい|(?:あう|あを)", "あを", 0, 4);
x2("あ|い|う", "えう", 2, 4);
x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6);
n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
x2("あ|^わ", "ぶあ", 2, 4);
x2("あ|^を", "をあ", 0, 2);
x2("鬼|\\G車", "け車鬼", 4, 6);
x2("鬼|\\G車", "車鬼", 0, 2);
x2("鬼|\\A車", "b車鬼", 3, 5);
x2("鬼|\\A車", "", 0, 2);
x2("鬼|車\\Z", "車鬼", 2, 4);
x2("鬼|車\\Z", "", 0, 2);
x2("鬼|車\\Z", "\n", 0, 2);
x2("鬼|車\\z", "車鬼", 2, 4);
x2("鬼|車\\z", "", 0, 2);
x2("\\w|\\s", "", 0, 2);
x2("\\w|%", "%お", 0, 1);
x2("\\w|[&$]", "う&", 0, 2);
x2("[い-け]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("[^あ]", "\n", 0, 1);
x2("(?:あ|[う-き])|いを", "うを", 0, 2);
x2("(?:あ|[う-き])|いを", "いを", 0, 4);
x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6);
x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6);
x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6);
x2("(?<=あ|いう)い", "いうい", 4, 6);
n("(?>あ|あいえ)う", "あいえう");
x2("(?>あいえ|あ)う", "あいえう", 0, 8);
x2("あ?|い", "", 0, 2);
x2("あ?|い", "", 0, 0);
x2("あ?|い", "", 0, 0);
x2("あ*|い", "ああ", 0, 4);
x2("あ*|い*", "いあ", 0, 0);
x2("あ*|い*", "あい", 0, 2);
x2("[aあ]*|い*", "aあいいい", 0, 3);
x2("あ+|い*", "", 0, 0);
x2("あ+|い*", "いいい", 0, 6);
x2("あ+|い*", "あいいい", 0, 2);
x2("あ+|い*", "aあいいい", 0, 0);
n("あ+|い+", "");
x2("(あ|い)?", "", 0, 2);
x2("(あ|い)*", "いあ", 0, 4);
x2("(あ|い)+", "いあい", 0, 6);
x2("(あい|うあ)+", "うああいうえ", 0, 8);
x2("(あい|うえ)+", "うああいうえ", 4, 12);
x2("(あい|うあ)+", "ああいうあ", 2, 10);
x2("(あい|うあ)+", "あいをうあ", 0, 4);
x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10);
x2("(あ|いあい)+", "あいあいあ", 0, 10);
x2("(あ|いあい)+", "いあ", 2, 4);
x2("(あ|いあい)+", "いあああいあ", 2, 8);
x2("(?:あ|い)(?:あ|い)", "あい", 0, 4);
x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6);
x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12);
x2("(?:あ+|い+){2}", "あああいいい", 0, 12);
x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12);
x2("(?:あ+|\\Aい*)うう", "うう", 0, 4);
n("(?:あ+|\\Aい*)うう", "あいうう");
x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16);
x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14);
x2("う{0,}", "うううう", 0, 8);
x2("あ|(?i)c", "C", 0, 1);
x2("(?i)c|あ", "C", 0, 1);
x2("(?i:あ)|a", "a", 0, 1);
n("(?i:あ)|a", "A");
x2("[あいう]?", "あいう", 0, 2);
x2("[あいう]*", "あいう", 0, 6);
x2("[^あいう]*", "あいう", 0, 0);
n("[^あいう]+", "あいう");
x2("あ?\?", "あああ", 0, 0);
x2("いあ?\?い", "いあい", 0, 6);
x2("あ*?", "あああ", 0, 0);
x2("いあ*?", "いああ", 0, 2);
x2("いあ*?い", "いああい", 0, 8);
x2("あ+?", "あああ", 0, 2);
x2("いあ+?", "いああ", 0, 4);
x2("いあ+?い", "いああい", 0, 8);
x2("(?:天?)?\?", "", 0, 0);
x2("(?:天?\?)?", "", 0, 0);
x2("(?:夢?)+?", "夢夢夢", 0, 2);
x2("(?:風+)?\?", "風風風", 0, 0);
x2("(?:雪+)?\?霜", "雪雪雪霜", 0, 8);
x2("(?:あい)?{2}", "", 0, 0);
x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8);
x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16);
n("(?:鬼車){3,}", "鬼車鬼車");
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12);
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16);
x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
x2("(?:鬼車){,}", "鬼車{,}", 0, 7);
x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12);
x3("(火)", "", 0, 2, 1);
x3("(火水)", "火水", 0, 4, 1);
x2("((時間))", "時間", 0, 4);
x3("((風水))", "風水", 0, 4, 1);
x3("((昨日))", "昨日", 0, 4, 2);
x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20);
x3("(あい)(うえ)", "あいうえ", 0, 4, 1);
x3("(あい)(うえ)", "あいうえ", 4, 8, 2);
x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3);
x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4);
x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2);
x2("(^あ)", "", 0, 2);
x3("(あ)|(あ)", "いあ", 2, 4, 1);
x3("(^あ)|(あ)", "いあ", 2, 4, 2);
x3("(あ?)", "あああ", 0, 2, 1);
x3("(ま*)", "ままま", 0, 6, 1);
x3("(と*)", "", 0, 0, 1);
x3("(る+)", "るるるるるるる", 0, 14, 1);
x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1);
x3("(あ+|い?)", "いいいああ", 0, 2, 1);
x3("(あいう)?", "あいう", 0, 6, 1);
x3("(あいう)*", "あいう", 0, 6, 1);
x3("(あいう)+", "あいう", 0, 6, 1);
x3("(さしす|あいう)+", "あいう", 0, 6, 1);
x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1);
x3("((?i:あいう))", "あいう", 0, 6, 1);
x3("((?m:あ.う))", "\n", 0, 5, 1);
x3("((?=あん)あ)", "あんい", 0, 2, 1);
x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1);
x3("あ*(.)", "ああああん", 8, 10, 1);
x3("あ*?(.)", "ああああん", 0, 2, 1);
x3("あ*?(ん)", "ああああん", 8, 10, 1);
x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1);
x3("(\\Aいい)うう", "いいうう", 0, 4, 1);
n("(\\Aいい)うう", "んいいうう");
x3("(^いい)うう", "いいうう", 0, 4, 1);
n("(^いい)うう", "んいいうう");
x3("ろろ(るる$)", "ろろるる", 4, 8, 1);
n("ろろ(るる$)", "ろろるるる");
x2("(無)\\1", "無無", 0, 4);
n("(無)\\1", "無武");
x2("(空?)\\1", "空空", 0, 4);
x2("(空?\?)\\1", "空空", 0, 0);
x2("(空*)\\1", "空空空空空", 0, 8);
x3("(空*)\\1", "空空空空空", 0, 4, 1);
x2("あ(い*)\\1", "あいいいい", 0, 10);
x2("あ(い*)\\1", "あい", 0, 2);
x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20);
x2("(あ*)(い*)\\2", "あああいいいい", 0, 14);
x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2);
x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16);
x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7);
x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12);
x2("([き-け])\\1", "くく", 0, 4);
x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8);
n("(\\w\\d\\s)\\1", "あ5 あ5");
x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8);
x2("...(誰?|[あ-う]{3})\\1", "あaあ誰", 0, 13);
x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12);
x2("(^こ)\\1", "ここ", 0, 4);
n("(^む)\\1", "めむむ");
n("(あ$)\\1", "ああ");
n("(あい\\Z)\\1", "あい");
x2("(あ*\\Z)\\1", "", 2, 2);
x2(".(あ*\\Z)\\1", "いあ", 2, 4);
x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1);
x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1);
x2("((?i:あvず))\\1", "あvずあvず", 0, 10);
x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14);
x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26);
x2("[[ひふ]]", "", 0, 2);
x2("[[いおう]か]", "", 0, 2);
n("[[^あ]]", "");
n("[^[あ]]", "");
x2("[^[^あ]]", "", 0, 2);
x2("[[かきく]&&きく]", "", 0, 2);
n("[[かきく]&&きく]", "");
n("[[かきく]&&きく]", "");
x2("[あ-ん&&い-を&&う-ゑ]", "", 0, 2);
n("[^あ-ん&&い-を&&う-ゑ]", "");
x2("[[^あ&&あ]&&あ-ん]", "", 0, 2);
n("[[^あ&&あ]&&あ-ん]", "");
x2("[[^あ-ん&&いうえお]&&[^う-か]]", "", 0, 2);
n("[[^あ-ん&&いうえお]&&[^う-か]]", "");
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
n("[^[^あいう]&&[^うえお]]", "");
x2("[あ-&&-あ]", "-", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "", 0, 2);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 32);
x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 32);
fprintf(stdout,
"\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",
nsucc, nfail, nerror, onig_version());
#ifndef POSIX_TEST
onig_region_free(region, 1);
onig_end();
#endif
return ((nfail == 0 && nerror == 0) ? 0 : -1);
}