<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet"
        integrity="sha384-GLhlTQ8iRABdZLl6O3oVMWSktQOp6b7In1Zl3/Jr59b6EGGoI1aFkw7cmDA6j6gD" crossorigin="anonymous">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.3.0/css/all.min.css"
        integrity="sha512-SzlrxWUlpfuzQ+pcUCosxcglQRNAq/DZjVsC0lE40xsADsfeQoEypE+enwcOiGjk/bSuGGKHEyjSoQ1zVisanQ=="
        crossorigin="anonymous" referrerpolicy="no-referrer" />
</head>
</html>
from cpython cimport PyBytes_AsString


#from cpython cimport PyByteArray_AsString # cython still not exports that
cdef extern from "Python.h":
    char* PyByteArray_AsString(bytearray ba) except NULL

from libc.stdint cimport uint32_t, uint64_t, uintmax_t


def _websocket_mask_cython(object mask, object data):
    """Note, this function mutates its `data` argument
    """
    cdef:
        Py_ssize_t data_len, i
        # bit operations on signed integers are implementation-specific
        unsigned char * in_buf
        const unsigned char * mask_buf
        uint32_t uint32_msk
        uint64_t uint64_msk

    assert len(mask) == 4

    if not isinstance(mask, bytes):
        mask = bytes(mask)

    if isinstance(data, bytearray):
        data = <bytearray>data
    else:
        data = bytearray(data)

    data_len = len(data)
    in_buf = <unsigned char*>PyByteArray_AsString(data)
    mask_buf = <const unsigned char*>PyBytes_AsString(mask)
    uint32_msk = (<uint32_t*>mask_buf)[0]

    # TODO: align in_data ptr to achieve even faster speeds
    # does it need in python ?! malloc() always aligns to sizeof(long) bytes

    if sizeof(size_t) >= 8:
        uint64_msk = uint32_msk
        uint64_msk = (uint64_msk << 32) | uint32_msk

        while data_len >= 8:
            (<uint64_t*>in_buf)[0] ^= uint64_msk
            in_buf += 8
            data_len -= 8


    while data_len >= 4:
        (<uint32_t*>in_buf)[0] ^= uint32_msk
        in_buf += 4
        data_len -= 4

    for i in range(0, data_len):
        in_buf[i] ^= mask_buf[i]
