diff --git a/.gitea/workflows/release.yaml b/.gitea/workflows/release.yaml index e6a6740..e4f051f 100644 --- a/.gitea/workflows/release.yaml +++ b/.gitea/workflows/release.yaml @@ -22,6 +22,11 @@ jobs: with: fetch-depth: 0 + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y libpcre2-dev + - name: Setup Python uses: actions/setup-python@v4 with: @@ -45,6 +50,9 @@ jobs: if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' run: poetry install --with dev + - name: Build Cython extensions + run: poetry run python scripts/build_cython.py build_ext --inplace + - name: Build package run: | poetry build diff --git a/.gitea/workflows/test.yaml b/.gitea/workflows/test.yaml index 43a175e..106c6d0 100644 --- a/.gitea/workflows/test.yaml +++ b/.gitea/workflows/test.yaml @@ -17,6 +17,11 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y libpcre2-dev + - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -40,6 +45,9 @@ jobs: if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' run: poetry install --with dev + - name: Build Cython extensions + run: poetry run python scripts/build_cython.py build_ext --inplace + - name: Run tests run: poetry run pytest tests/ -v diff --git a/benchmarks/bench_routing.py b/benchmarks/bench_routing.py new file mode 100644 index 0000000..8bcacfe --- /dev/null +++ b/benchmarks/bench_routing.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +""" +Benchmark script for routing performance comparison. + +Compares: +- Pure Python implementation with standard re (_routing_py) +- Cython implementation with PCRE2 JIT (_routing) + +Usage: + python benchmarks/bench_routing.py +""" + +import re +import time +import statistics +from typing import Callable, Tuple + +from pyserve._routing_py import ( + FastRouter as PyFastRouter, + FastRouteMatch as PyFastRouteMatch, +) + +try: + from pyserve._routing import ( + FastRouter as CyFastRouter, + FastRouteMatch as CyFastRouteMatch, + ) + CYTHON_AVAILABLE = True +except ImportError: + CYTHON_AVAILABLE = False + print("Cython module not compiled. Run: poetry run python scripts/build_cython.py\n") + + +def benchmark(func: Callable, iterations: int = 100000) -> Tuple[float, float]: + """Benchmark a function and return mean/stdev in nanoseconds.""" + times = [] + + # Warmup + for _ in range(1000): + func() + + # Actual benchmark + for _ in range(iterations): + start = time.perf_counter_ns() + func() + end = time.perf_counter_ns() + times.append(end - start) + + return statistics.mean(times), statistics.stdev(times) + + +def format_time(ns: float) -> str: + """Format time in nanoseconds to human readable format.""" + if ns < 1000: + return f"{ns:.1f} ns" + elif ns < 1_000_000: + return f"{ns/1000:.2f} µs" + else: + return f"{ns/1_000_000:.2f} ms" + + +def setup_router(router_class): + """Setup a router with typical routes.""" + router = router_class() + + # Exact routes + router.add_route("=/health", {"return": "200 OK"}) + router.add_route("=/api/status", {"return": "200 OK"}) + router.add_route("=/favicon.ico", {"return": "204"}) + + # Regex routes + router.add_route("~^/api/v1/users/(?P\\d+)$", {"proxy_pass": "http://users-service"}) + router.add_route("~^/api/v1/posts/(?P\\d+)$", {"proxy_pass": "http://posts-service"}) + router.add_route("~\\.(css|js|png|jpg|gif|svg|woff2?)$", {"root": "./static"}) + router.add_route("~^/api/", {"proxy_pass": "http://api-gateway"}) + + # Default route + router.add_route("__default__", {"spa_fallback": True, "root": "./dist"}) + + return router + + +def run_benchmarks(): + print("=" * 70) + print("ROUTING BENCHMARK") + print("=" * 70) + print() + + # Test paths with different matching scenarios + test_cases = [ + ("/health", "Exact match (first)"), + ("/api/status", "Exact match (middle)"), + ("/api/v1/users/12345", "Regex match with groups"), + ("/static/app.js", "Regex match (file extension)"), + ("/api/v2/other", "Regex match (simple prefix)"), + ("/some/random/path", "Default route (fallback)"), + ("/nonexistent", "Default route (fallback)"), + ] + + iterations = 100000 + + print(f"Iterations: {iterations:,}") + print() + + # Setup routers + py_router = setup_router(PyFastRouter) + cy_router = setup_router(CyFastRouter) if CYTHON_AVAILABLE else None + + results = {} + + for path, description in test_cases: + print(f"Path: {path}") + print(f" {description}") + + # Python implementation (standard re) + py_mean, py_std = benchmark(lambda p=path: py_router.match(p), iterations) + results[(path, "Python (re)")] = py_mean + print(f" Python (re): {format_time(py_mean):>12} ± {format_time(py_std)}") + + # Cython implementation (PCRE2 JIT) + if CYTHON_AVAILABLE and cy_router: + cy_mean, cy_std = benchmark(lambda p=path: cy_router.match(p), iterations) + results[(path, "Cython (PCRE2)")] = cy_mean + speedup = py_mean / cy_mean if cy_mean > 0 else 0 + print(f" Cython (PCRE2): {format_time(cy_mean):>12} ± {format_time(cy_std)} ({speedup:.2f}x faster)") + + print() + + # Summary + if CYTHON_AVAILABLE: + print("=" * 70) + print("SUMMARY") + print("=" * 70) + + py_total = sum(v for k, v in results.items() if k[1] == "Python (re)") + cy_total = sum(v for k, v in results.items() if k[1] == "Cython (PCRE2)") + + print(f" Python (re) total: {format_time(py_total)}") + print(f" Cython (PCRE2) total: {format_time(cy_total)}") + print(f" Overall speedup: {py_total / cy_total:.2f}x") + + # Show JIT compilation status + print() + print("PCRE2 JIT Status:") + for route in cy_router.list_routes(): # type: ignore False linter error + if route["type"] == "regex": + jit = route.get("jit_compiled", False) + status = "✓ JIT" if jit else "✗ No JIT" + print(f" {status}: {route['pattern']}") + + +if __name__ == "__main__": + run_benchmarks() diff --git a/poetry.lock b/poetry.lock index e3e4c8a..fe54b7f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "a2wsgi" @@ -1720,5 +1720,5 @@ wsgi = ["a2wsgi"] [metadata] lock-version = "2.1" -python-versions = ">=3.12" -content-hash = "653d7b992e2bb133abde2e8b1c44265e948ed90487ab3f2670429510a8aa0683" +python-versions = ">=3.12, <=3.13.7" +content-hash = "411b746f1a577ed635af9fd3e01daf1fa03950d27ef23888fc7cdd0b99762404" diff --git a/pyserve/_routing.pyx b/pyserve/_routing.pyx new file mode 100644 index 0000000..3d519f1 --- /dev/null +++ b/pyserve/_routing.pyx @@ -0,0 +1,486 @@ +# cython: language_level=3 +# cython: boundscheck=False +# cython: wraparound=False +# cython: cdivision=True +from libc.stdint cimport uint32_t +from libc.stddef cimport size_t +from cpython.bytes cimport PyBytes_AsString, PyBytes_GET_SIZE + +cimport cython +from ._routing_pcre2 cimport * + +from typing import Optional + +# Type aliases for cleaner NULL casts +ctypedef pcre2_compile_context* compile_ctx_ptr +ctypedef pcre2_match_context* match_ctx_ptr +ctypedef pcre2_general_context* general_ctx_ptr + + +# Buffer size for error messages +DEF ERROR_BUFFER_SIZE = 256 + +# Maximum capture groups we support +DEF MAX_CAPTURE_GROUPS = 32 + + +cdef class PCRE2Pattern: + cdef: + pcre2_code* _code + pcre2_match_data* _match_data + bint _jit_available + str _pattern_str + uint32_t _capture_count + dict _name_to_index # Named capture groups + list _index_to_name # Index to name mapping + + def __cinit__(self): + self._code = NULL + self._match_data = NULL + self._jit_available = False + self._capture_count = 0 + self._name_to_index = {} + self._index_to_name = [] + + def __dealloc__(self): + if self._match_data is not NULL: + pcre2_match_data_free(self._match_data) + self._match_data = NULL + if self._code is not NULL: + pcre2_code_free(self._code) + self._code = NULL + + @staticmethod + cdef PCRE2Pattern _create(str pattern, bint case_insensitive=False, bint use_jit=True): + cdef: + PCRE2Pattern self = PCRE2Pattern.__new__(PCRE2Pattern) + bytes pattern_bytes + const char* pattern_ptr + Py_ssize_t pattern_len + uint32_t options = 0 + int errorcode = 0 + PCRE2_SIZE erroroffset = 0 + int jit_result + uint32_t capture_count = 0 + + self._pattern_str = pattern + self._name_to_index = {} + self._index_to_name = [] + + pattern_bytes = pattern.encode('utf-8') + pattern_ptr = PyBytes_AsString(pattern_bytes) + pattern_len = PyBytes_GET_SIZE(pattern_bytes) + + options = PCRE2_UTF | PCRE2_UCP + if case_insensitive: + options |= PCRE2_CASELESS + + self._code = pcre2_compile( + pattern_ptr, + pattern_len, + options, + &errorcode, + &erroroffset, + NULL + ) + + if self._code is NULL: + error_msg = PCRE2Pattern._get_error_message(errorcode) + raise ValueError(f"PCRE2 compile error at offset {erroroffset}: {error_msg}") + + if use_jit: + jit_result = pcre2_jit_compile(self._code, PCRE2_JIT_COMPLETE) + self._jit_available = (jit_result == 0) + + pcre2_pattern_info(self._code, PCRE2_INFO_CAPTURECOUNT, &capture_count) + self._capture_count = capture_count + + self._match_data = pcre2_match_data_create_from_pattern(self._code, NULL) + if self._match_data is NULL: + pcre2_code_free(self._code) + self._code = NULL + raise MemoryError("Failed to create match data") + + self._extract_named_groups() + + return self + + cdef void _extract_named_groups(self): + cdef: + uint32_t namecount = 0 + uint32_t nameentrysize = 0 + PCRE2_SPTR nametable + uint32_t i + int group_num + bytes name_bytes + str name + + pcre2_pattern_info(self._code, PCRE2_INFO_NAMECOUNT, &namecount) + + if namecount == 0: + return # void return + + pcre2_pattern_info(self._code, PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize) + pcre2_pattern_info(self._code, PCRE2_INFO_NAMETABLE, &nametable) + + self._index_to_name = [None] * (self._capture_count + 1) + + for i in range(namecount): + group_num = (nametable[0] << 8) | nametable[1] + name_bytes = (nametable + 2) + name = name_bytes.decode('utf-8') + + self._name_to_index[name] = group_num + if group_num <= self._capture_count: + self._index_to_name[group_num] = name + + nametable += nameentrysize + + @staticmethod + cdef str _get_error_message(int errorcode): + cdef: + PCRE2_UCHAR buffer[ERROR_BUFFER_SIZE] + int result + + result = pcre2_get_error_message(errorcode, buffer, ERROR_BUFFER_SIZE) + if result < 0: + return f"Unknown error {errorcode}" + return (buffer).decode('utf-8') + + cpdef bint search(self, str subject): + """ + Search for pattern anywhere in subject. + Returns True if found, False otherwise. + """ + cdef: + bytes subject_bytes + const char* subject_ptr + Py_ssize_t subject_len + int result + + if self._code is NULL: + return False + + subject_bytes = subject.encode('utf-8') + subject_ptr = PyBytes_AsString(subject_bytes) + subject_len = PyBytes_GET_SIZE(subject_bytes) + + if self._jit_available: + result = pcre2_jit_match( + self._code, + subject_ptr, + subject_len, + 0, # start offset + 0, # options + self._match_data, + NULL + ) + else: + result = pcre2_match( + self._code, + subject_ptr, + subject_len, + 0, + 0, + self._match_data, + NULL + ) + + return (result >= 0) + + cpdef dict groupdict(self, str subject): + """ + Match pattern and return dict of named groups. + Returns empty dict if no match or no named groups. + """ + cdef: + bytes subject_bytes + const char* subject_ptr + Py_ssize_t subject_len + int result + PCRE2_SIZE* ovector + dict groups = {} + str name + int index + PCRE2_SIZE start, end + + if self._code is NULL or not self._name_to_index: + return groups + + subject_bytes = subject.encode('utf-8') + subject_ptr = PyBytes_AsString(subject_bytes) + subject_len = PyBytes_GET_SIZE(subject_bytes) + + if self._jit_available: + result = pcre2_jit_match( + self._code, + subject_ptr, + subject_len, + 0, 0, + self._match_data, + NULL + ) + else: + result = pcre2_match( + self._code, + subject_ptr, + subject_len, + 0, 0, + self._match_data, + NULL + ) + + if result < 0: + return groups + + ovector = pcre2_get_ovector_pointer(self._match_data) + + for name, index in self._name_to_index.items(): + start = ovector[(2 * index)] + end = ovector[(2 * index + 1)] + if start != PCRE2_UNSET and end != PCRE2_UNSET: + groups[name] = subject_bytes[start:end].decode('utf-8') + else: + groups[name] = None + + return groups + + cpdef tuple search_with_groups(self, str subject): + cdef: + bytes subject_bytes + const char* subject_ptr + Py_ssize_t subject_len + int result + PCRE2_SIZE* ovector + dict groups = {} + str name + int index + PCRE2_SIZE start, end + + if self._code is NULL: + return (False, {}) + + subject_bytes = subject.encode('utf-8') + subject_ptr = PyBytes_AsString(subject_bytes) + subject_len = PyBytes_GET_SIZE(subject_bytes) + + if self._jit_available: + result = pcre2_jit_match( + self._code, + subject_ptr, + subject_len, + 0, 0, + self._match_data, + NULL + ) + else: + result = pcre2_match( + self._code, + subject_ptr, + subject_len, + 0, 0, + self._match_data, + NULL + ) + + if result < 0: + return (False, {}) + + if self._name_to_index: + ovector = pcre2_get_ovector_pointer(self._match_data) + for name, index in self._name_to_index.items(): + start = ovector[(2 * index)] + end = ovector[(2 * index + 1)] + if start != PCRE2_UNSET and end != PCRE2_UNSET: + groups[name] = subject_bytes[start:end].decode('utf-8') + else: + groups[name] = None + + return (True, groups) + + @property + def pattern(self) -> str: + return self._pattern_str + + @property + def jit_compiled(self) -> bool: + return self._jit_available + + @property + def capture_count(self) -> int: + return self._capture_count + + +cdef class FastRouteMatch: + cdef: + public dict config + public dict params + + def __cinit__(self): + self.config = {} + self.params = {} + + def __init__(self, dict config, params=None): + self.config = config + self.params = params if params is not None else {} + + +cdef class FastRouter: + """ + High-performance router with PCRE2 JIT-compiled patterns. + + Matching order (nginx-like): + 1. Exact routes (prefix "=") - O(1) dict lookup + 2. Regex routes (prefix "~" or "~*") - PCRE2 JIT matching + 3. Default route (fallback) + """ + cdef: + dict _exact_routes + list _regex_routes + dict _default_route + bint _has_default + int _regex_count + + def __cinit__(self): + self._exact_routes = {} + self._regex_routes = [] + self._default_route = {} + self._has_default = False + self._regex_count = 0 + + def __init__(self): + self._exact_routes = {} + self._regex_routes = [] + self._default_route = {} + self._has_default = False + self._regex_count = 0 + + def add_route(self, str pattern, dict config): + cdef: + str exact_path + str regex_pattern + bint case_insensitive + PCRE2Pattern compiled_pattern + + if pattern.startswith("="): + exact_path = pattern[1:] + self._exact_routes[exact_path] = config + + elif pattern == "__default__": + self._default_route = config + self._has_default = True + + elif pattern.startswith("~"): + case_insensitive = pattern.startswith("~*") + regex_pattern = pattern[2:] if case_insensitive else pattern[1:] + + try: + compiled_pattern = PCRE2Pattern._create(regex_pattern, case_insensitive) + self._regex_routes.append((compiled_pattern, config)) + self._regex_count = len(self._regex_routes) + except (ValueError, MemoryError): + pass # Skip invalid patterns + + cpdef object match(self, str path): + cdef: + dict config + dict params + int i + PCRE2Pattern pattern + tuple route_entry + bint matched + + if path in self._exact_routes: + config = self._exact_routes[path] + return FastRouteMatch(config, {}) + + for i in range(self._regex_count): + route_entry = self._regex_routes[i] + pattern = route_entry[0] + config = route_entry[1] + + matched, params = pattern.search_with_groups(path) + if matched: + return FastRouteMatch(config, params) + + if self._has_default: + return FastRouteMatch(self._default_route, {}) + + return None + + @property + def exact_routes(self) -> dict: + return self._exact_routes + + @property + def routes(self) -> dict: + """Return regex routes as dict (pattern_str -> config).""" + cdef: + dict result = {} + PCRE2Pattern pattern + for pattern, config in self._regex_routes: + result[pattern.pattern] = config + return result + + @property + def default_route(self) -> Optional[dict]: + return self._default_route if self._has_default else None + + cpdef list list_routes(self): + cdef: + list result = [] + str path_str + dict config + PCRE2Pattern pattern + + for path_str, config in self._exact_routes.items(): + result.append({ + "type": "exact", + "pattern": f"={path_str}", + "config": config, + }) + + for pattern, config in self._regex_routes: + result.append({ + "type": "regex", + "pattern": pattern.pattern, + "jit_compiled": pattern.jit_compiled, + "config": config, + }) + + if self._has_default: + result.append({ + "type": "default", + "pattern": "__default__", + "config": self._default_route, + }) + + return result + + +def compile_pattern(str pattern, bint case_insensitive=False) -> PCRE2Pattern: + """ + Compile a PCRE2 pattern with JIT support. + + Args: + pattern: Regular expression pattern + case_insensitive: Whether to match case-insensitively + + Returns: + Compiled PCRE2Pattern object + """ + return PCRE2Pattern._create(pattern, case_insensitive) + + +def fast_match(router: FastRouter, str path): + """ + Convenience function for matching a path. + + Args: + router: FastRouter instance + path: URL path to match + + Returns: + FastRouteMatch or None + """ + return router.match(path) diff --git a/pyserve/_routing_pcre2.pxd b/pyserve/_routing_pcre2.pxd new file mode 100644 index 0000000..7f91fee --- /dev/null +++ b/pyserve/_routing_pcre2.pxd @@ -0,0 +1,208 @@ +# cython: language_level=3 + +from libc.stdint cimport uint8_t, uint32_t, int32_t +from libc.stddef cimport size_t + +cdef extern from "pcre2.h": + pass + +cdef extern from *: + ctypedef struct pcre2_code_8: + pass + ctypedef pcre2_code_8 pcre2_code + + ctypedef struct pcre2_match_data_8: + pass + ctypedef pcre2_match_data_8 pcre2_match_data + + ctypedef struct pcre2_compile_context_8: + pass + ctypedef pcre2_compile_context_8 pcre2_compile_context + + ctypedef struct pcre2_match_context_8: + pass + ctypedef pcre2_match_context_8 pcre2_match_context + + ctypedef struct pcre2_general_context_8: + pass + ctypedef pcre2_general_context_8 pcre2_general_context + + ctypedef uint8_t PCRE2_UCHAR + ctypedef const uint8_t* PCRE2_SPTR + ctypedef size_t PCRE2_SIZE + + uint32_t PCRE2_CASELESS + uint32_t PCRE2_MULTILINE + uint32_t PCRE2_DOTALL + uint32_t PCRE2_UTF + uint32_t PCRE2_UCP + uint32_t PCRE2_NO_UTF_CHECK + uint32_t PCRE2_ANCHORED + uint32_t PCRE2_ENDANCHORED + + uint32_t PCRE2_JIT_COMPLETE + uint32_t PCRE2_JIT_PARTIAL_SOFT + uint32_t PCRE2_JIT_PARTIAL_HARD + + int PCRE2_ERROR_NOMATCH + int PCRE2_ERROR_PARTIAL + int PCRE2_ERROR_JIT_STACKLIMIT + + PCRE2_SIZE PCRE2_UNSET + PCRE2_SIZE PCRE2_ZERO_TERMINATED + + pcre2_code* pcre2_compile_8( + PCRE2_SPTR pattern, + PCRE2_SIZE length, + uint32_t options, + int* errorcode, + PCRE2_SIZE* erroroffset, + pcre2_compile_context* ccontext + ) + + void pcre2_code_free_8(pcre2_code* code) + + int pcre2_jit_compile_8(pcre2_code* code, uint32_t options) + + pcre2_match_data* pcre2_match_data_create_from_pattern_8( + const pcre2_code* code, + pcre2_general_context* gcontext + ) + + pcre2_match_data* pcre2_match_data_create_8( + uint32_t ovecsize, + pcre2_general_context* gcontext + ) + + void pcre2_match_data_free_8(pcre2_match_data* match_data) + + int pcre2_match_8( + const pcre2_code* code, + PCRE2_SPTR subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + pcre2_match_data* match_data, + pcre2_match_context* mcontext + ) + + int pcre2_jit_match_8( + const pcre2_code* code, + PCRE2_SPTR subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + pcre2_match_data* match_data, + pcre2_match_context* mcontext + ) + + PCRE2_SIZE* pcre2_get_ovector_pointer_8(pcre2_match_data* match_data) + uint32_t pcre2_get_ovector_count_8(pcre2_match_data* match_data) + + int pcre2_pattern_info_8( + const pcre2_code* code, + uint32_t what, + void* where + ) + + uint32_t PCRE2_INFO_CAPTURECOUNT + uint32_t PCRE2_INFO_NAMECOUNT + uint32_t PCRE2_INFO_NAMETABLE + uint32_t PCRE2_INFO_NAMEENTRYSIZE + uint32_t PCRE2_INFO_JITSIZE + + int pcre2_get_error_message_8( + int errorcode, + PCRE2_UCHAR* buffer, + PCRE2_SIZE bufflen + ) + + int pcre2_substring_copy_byname_8( + pcre2_match_data* match_data, + PCRE2_SPTR name, + PCRE2_UCHAR* buffer, + PCRE2_SIZE* bufflen + ) + + int pcre2_substring_copy_bynumber_8( + pcre2_match_data* match_data, + uint32_t number, + PCRE2_UCHAR* buffer, + PCRE2_SIZE* bufflen + ) + + int pcre2_substring_get_byname_8( + pcre2_match_data* match_data, + PCRE2_SPTR name, + PCRE2_UCHAR** bufferptr, + PCRE2_SIZE* bufflen + ) + + int pcre2_substring_get_bynumber_8( + pcre2_match_data* match_data, + uint32_t number, + PCRE2_UCHAR** bufferptr, + PCRE2_SIZE* bufflen + ) + + void pcre2_substring_free_8(PCRE2_UCHAR* buffer) + + +cdef inline pcre2_code* pcre2_compile( + PCRE2_SPTR pattern, + PCRE2_SIZE length, + uint32_t options, + int* errorcode, + PCRE2_SIZE* erroroffset, + pcre2_compile_context* ccontext +) noexcept: + return pcre2_compile_8(pattern, length, options, errorcode, erroroffset, ccontext) + +cdef inline void pcre2_code_free(pcre2_code* code) noexcept: + pcre2_code_free_8(code) + +cdef inline int pcre2_jit_compile(pcre2_code* code, uint32_t options) noexcept: + return pcre2_jit_compile_8(code, options) + +cdef inline pcre2_match_data* pcre2_match_data_create_from_pattern( + const pcre2_code* code, + pcre2_general_context* gcontext +) noexcept: + return pcre2_match_data_create_from_pattern_8(code, gcontext) + +cdef inline void pcre2_match_data_free(pcre2_match_data* match_data) noexcept: + pcre2_match_data_free_8(match_data) + +cdef inline int pcre2_match( + const pcre2_code* code, + PCRE2_SPTR subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + pcre2_match_data* match_data, + pcre2_match_context* mcontext +) noexcept: + return pcre2_match_8(code, subject, length, startoffset, options, match_data, mcontext) + +cdef inline int pcre2_jit_match( + const pcre2_code* code, + PCRE2_SPTR subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + pcre2_match_data* match_data, + pcre2_match_context* mcontext +) noexcept: + return pcre2_jit_match_8(code, subject, length, startoffset, options, match_data, mcontext) + +cdef inline PCRE2_SIZE* pcre2_get_ovector_pointer(pcre2_match_data* match_data) noexcept: + return pcre2_get_ovector_pointer_8(match_data) + +cdef inline uint32_t pcre2_get_ovector_count(pcre2_match_data* match_data) noexcept: + return pcre2_get_ovector_count_8(match_data) + +cdef inline int pcre2_pattern_info(const pcre2_code* code, uint32_t what, void* where) noexcept: + return pcre2_pattern_info_8(code, what, where) + +cdef inline int pcre2_get_error_message(int errorcode, PCRE2_UCHAR* buffer, PCRE2_SIZE bufflen) noexcept: + return pcre2_get_error_message_8(errorcode, buffer, bufflen) diff --git a/pyserve/_routing_py.py b/pyserve/_routing_py.py new file mode 100644 index 0000000..6d7a7b6 --- /dev/null +++ b/pyserve/_routing_py.py @@ -0,0 +1,129 @@ +""" +Pure Python fallback for _routing when PCRE2/Cython is not available. + +This module provides the same interface using the standard library `re` module. +It's slower than the Cython+PCRE2 implementation but works everywhere. +In future we may add pcre2.py library support for better performance in this module. +""" + +import re +from typing import Any, Dict, List, Optional, Pattern, Tuple + + +class FastRouteMatch: + __slots__ = ("config", "params") + + def __init__(self, config: Dict[str, Any], params: Optional[Dict[str, str]] = None): + self.config = config + self.params = params if params is not None else {} + + +class FastRouter: + """ + Router with regex pattern matching. + + Matching order (nginx-like): + 1. Exact routes (prefix "=") - O(1) dict lookup + 2. Regex routes (prefix "~" or "~*") - linear scan + 3. Default route (fallback) + """ + + __slots__ = ("_exact_routes", "_regex_routes", "_default_route", "_has_default", "_regex_count") + + def __init__(self) -> None: + self._exact_routes: Dict[str, Dict[str, Any]] = {} + self._regex_routes: List[Tuple[Pattern[str], Dict[str, Any]]] = [] + self._default_route: Dict[str, Any] = {} + self._has_default: bool = False + self._regex_count: int = 0 + + def add_route(self, pattern: str, config: Dict[str, Any]) -> None: + if pattern.startswith("="): + exact_path = pattern[1:] + self._exact_routes[exact_path] = config + return + + if pattern == "__default__": + self._default_route = config + self._has_default = True + return + + if pattern.startswith("~"): + case_insensitive = pattern.startswith("~*") + regex_pattern = pattern[2:] if case_insensitive else pattern[1:] + + flags = re.IGNORECASE if case_insensitive else 0 + try: + compiled_pattern = re.compile(regex_pattern, flags) + self._regex_routes.append((compiled_pattern, config)) + self._regex_count = len(self._regex_routes) + except re.error: + pass # Ignore invalid patterns + + def match(self, path: str) -> Optional[FastRouteMatch]: + if path in self._exact_routes: + config = self._exact_routes[path] + return FastRouteMatch(config, {}) + + for pattern, config in self._regex_routes: + match_obj = pattern.search(path) + if match_obj is not None: + params = match_obj.groupdict() + return FastRouteMatch(config, params) + + if self._has_default: + return FastRouteMatch(self._default_route, {}) + + return None + + @property + def exact_routes(self) -> Dict[str, Dict[str, Any]]: + return self._exact_routes + + @property + def routes(self) -> Dict[Pattern[str], Dict[str, Any]]: + return {p: c for p, c in self._regex_routes} + + @property + def default_route(self) -> Optional[Dict[str, Any]]: + return self._default_route if self._has_default else None + + def list_routes(self) -> List[Dict[str, Any]]: + result: List[Dict[str, Any]] = [] + + for path, config in self._exact_routes.items(): + result.append({ + "type": "exact", + "pattern": f"={path}", + "config": config, + }) + + for pattern, config in self._regex_routes: + result.append({ + "type": "regex", + "pattern": pattern.pattern, + "config": config, + }) + + if self._has_default: + result.append({ + "type": "default", + "pattern": "__default__", + "config": self._default_route, + }) + + return result + + +def fast_match(router: FastRouter, path: str) -> Optional[FastRouteMatch]: + """ + Convenience function for matching a path. + + Args: + router: FastRouter instance + path: URL path to match + + Returns: + FastRouteMatch or None + """ + return router.match(path) diff --git a/pyserve/routing.py b/pyserve/routing.py index 2e7c5a5..c2e3b49 100644 --- a/pyserve/routing.py +++ b/pyserve/routing.py @@ -1,7 +1,6 @@ import mimetypes -import re from pathlib import Path -from typing import Any, Dict, Optional, Pattern +from typing import Any, Dict from urllib.parse import urlparse import httpx @@ -10,60 +9,19 @@ from starlette.responses import FileResponse, PlainTextResponse, Response from .logging_utils import get_logger +try: + from pyserve._routing import FastRouteMatch, FastRouter, fast_match # type: ignore + CYTHON_ROUTING_AVAILABLE = True +except ImportError: + from pyserve._routing_py import FastRouteMatch, FastRouter, fast_match + CYTHON_ROUTING_AVAILABLE = False + logger = get_logger(__name__) -class RouteMatch: - def __init__(self, config: Dict[str, Any], params: Optional[Dict[str, str]] = None): - self.config = config - self.params = params or {} - - -class Router: - def __init__(self, static_dir: str = "./static"): - self.static_dir = Path(static_dir) - self.routes: Dict[Pattern, Dict[str, Any]] = {} - self.exact_routes: Dict[str, Dict[str, Any]] = {} - self.default_route: Optional[Dict[str, Any]] = None - - def add_route(self, pattern: str, config: Dict[str, Any]) -> None: - if pattern.startswith("="): - exact_path = pattern[1:] - self.exact_routes[exact_path] = config - logger.debug(f"Added exact route: {exact_path}") - return - - if pattern == "__default__": - self.default_route = config - logger.debug("Added default route") - return - - if pattern.startswith("~"): - case_insensitive = pattern.startswith("~*") - regex_pattern = pattern[2:] if case_insensitive else pattern[1:] - - flags = re.IGNORECASE if case_insensitive else 0 - try: - compiled_pattern = re.compile(regex_pattern, flags) - self.routes[compiled_pattern] = config - logger.debug(f"Added regex route: {pattern}") - except re.error as e: - logger.error(f"Regex compilation error {pattern}: {e}") - - def match(self, path: str) -> Optional[RouteMatch]: - if path in self.exact_routes: - return RouteMatch(self.exact_routes[path]) - - for pattern, config in self.routes.items(): - match = pattern.search(path) - if match: - params = match.groupdict() - return RouteMatch(config, params) - - if self.default_route: - return RouteMatch(self.default_route) - - return None +# Aliases for backward compatibility +RouteMatch = FastRouteMatch +Router = FastRouter class RequestHandler: diff --git a/scripts/build_cython.py b/scripts/build_cython.py index f4eac55..ef2ca70 100644 --- a/scripts/build_cython.py +++ b/scripts/build_cython.py @@ -9,9 +9,86 @@ Or via make: """ import os +import subprocess import sys from pathlib import Path + +def get_pcre2_config(): + include_dirs = [] + library_dirs = [] + libraries = ["pcre2-8"] + + try: + cflags = subprocess.check_output( + ["pkg-config", "--cflags", "libpcre2-8"], + stderr=subprocess.DEVNULL + ).decode().strip() + libs = subprocess.check_output( + ["pkg-config", "--libs", "libpcre2-8"], + stderr=subprocess.DEVNULL + ).decode().strip() + + for flag in cflags.split(): + if flag.startswith("-I"): + include_dirs.append(flag[2:]) + + for flag in libs.split(): + if flag.startswith("-L"): + library_dirs.append(flag[2:]) + elif flag.startswith("-l"): + lib = flag[2:] + if lib not in libraries: + libraries.append(lib) + + return include_dirs, library_dirs, libraries + except (subprocess.CalledProcessError, FileNotFoundError): + pass + + try: + cflags = subprocess.check_output( + ["pcre2-config", "--cflags"], + stderr=subprocess.DEVNULL + ).decode().strip() + libs = subprocess.check_output( + ["pcre2-config", "--libs8"], + stderr=subprocess.DEVNULL + ).decode().strip() + + for flag in cflags.split(): + if flag.startswith("-I"): + include_dirs.append(flag[2:]) + + for flag in libs.split(): + if flag.startswith("-L"): + library_dirs.append(flag[2:]) + elif flag.startswith("-l"): + lib = flag[2:] + if lib not in libraries: + libraries.append(lib) + + return include_dirs, library_dirs, libraries + except (subprocess.CalledProcessError, FileNotFoundError): + pass + + # Fallback: try common paths + common_paths = [ + "/opt/homebrew", # macOS ARM + "/usr/local", # macOS Intel / Linux + "/usr", # Linux + ] + + for base in common_paths: + include_path = Path(base) / "include" + lib_path = Path(base) / "lib" + if (include_path / "pcre2.h").exists(): + include_dirs.append(str(include_path)) + library_dirs.append(str(lib_path)) + break + + return include_dirs, library_dirs, libraries + + def build_extensions(): try: from Cython.Build import cythonize @@ -29,6 +106,14 @@ def build_extensions(): print("Install with: pip install setuptools") return False + pcre2_include, pcre2_libdir, pcre2_libs = get_pcre2_config() + + if not pcre2_include: + print("WARNING: PCRE2 not found. Routing module may not compile.") + print("Install PCRE2: brew install pcre2 (macOS) or apt install libpcre2-dev (Linux)") + else: + print(f"Found PCRE2: includes={pcre2_include}, libs={pcre2_libdir}") + extensions = [ Extension( "pyserve._path_matcher", @@ -36,6 +121,18 @@ def build_extensions(): extra_compile_args=["-O3", "-ffast-math"], define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], ), + Extension( + "pyserve._routing", + sources=["pyserve/_routing.pyx"], + include_dirs=pcre2_include, + library_dirs=pcre2_libdir, + libraries=pcre2_libs, + extra_compile_args=["-O3", "-ffast-math"], + define_macros=[ + ("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION"), + ("PCRE2_CODE_UNIT_WIDTH", "8"), + ], + ), ] ext_modules = cythonize( @@ -59,7 +156,9 @@ def build_extensions(): cmd.run() print("\nCython extensions built successfully!") - print(" - pyserve/_path_matcher" + (".pyd" if sys.platform == "win32" else ".so")) + ext_suffix = ".pyd" if sys.platform == "win32" else ".so" + print(f" - pyserve/_path_matcher{ext_suffix}") + print(f" - pyserve/_routing{ext_suffix}") return True diff --git a/tests/test_routing.py b/tests/test_routing.py index 02f225e..3e5a77d 100644 --- a/tests/test_routing.py +++ b/tests/test_routing.py @@ -50,16 +50,10 @@ class TestRouter: def test_router_initialization(self): """Test router initializes with correct defaults.""" router = Router() - assert router.static_dir == Path("./static") assert router.routes == {} assert router.exact_routes == {} assert router.default_route is None - def test_router_custom_static_dir(self): - """Test router with custom static directory.""" - router = Router(static_dir="/custom/path") - assert router.static_dir == Path("/custom/path") - def test_add_exact_route(self): """Test adding exact match route.""" router = Router()