Skip to content

FAST_IO_HAS_BUILTIN, fix win9x mutex, fix use sse2 ins in sse #1149

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: next
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 8 additions & 36 deletions include/fast_io_core_impl/allocation/c_malloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,8 @@ class c_malloc_allocator
n = 1;
}
void *p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_malloc)
#if FAST_IO_HAS_BUILTIN(__builtin_malloc)
__builtin_malloc(n)
#else
::std::malloc(n)
#endif
#else
::std::malloc(n)
#endif
Expand All @@ -66,12 +62,8 @@ class c_malloc_allocator
}
::std::size_t const to_allocate{n};
p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_realloc)
#if FAST_IO_HAS_BUILTIN(__builtin_realloc)
__builtin_realloc
#else
::std::realloc
#endif
#else
::std::realloc
#endif
Expand All @@ -93,12 +85,8 @@ class c_malloc_allocator
n = 1;
}
void *p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_calloc)
#if FAST_IO_HAS_BUILTIN(__builtin_calloc)
__builtin_calloc
#else
::std::calloc
#endif
#else
::std::calloc
#endif
Expand Down Expand Up @@ -142,12 +130,8 @@ class c_malloc_allocator
if (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__)
{
p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_malloc)
#if FAST_IO_HAS_BUILTIN(__builtin_malloc)
__builtin_malloc
#else
::std::malloc
#endif
#else
::std::malloc
#endif
Expand Down Expand Up @@ -175,12 +159,8 @@ class c_malloc_allocator
if (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__)
{
p =
#if defined(__has_builtin)
#if __has_builtin(__builtin_realloc)
#if FAST_IO_HAS_BUILTIN(__builtin_realloc)
__builtin_realloc
#else
::std::realloc
#endif
#else
::std::realloc
#endif
Expand All @@ -205,12 +185,8 @@ class c_malloc_allocator
}
if (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__)
{
#if defined(__has_builtin)
#if __has_builtin(__builtin_free)
#if FAST_IO_HAS_BUILTIN(__builtin_free)
__builtin_free
#else
::std::free
#endif
#else
::std::free
#endif
Expand All @@ -228,12 +204,8 @@ class c_malloc_allocator
{
return;
}
#if defined(__has_builtin)
#if __has_builtin(__builtin_free)
__builtin_free
#else
::std::free
#endif
#if FAST_IO_HAS_BUILTIN(__builtin_free)
__builtin_free
#else
::std::free
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ struct gdi_teb_batch
::std::uint_least32_t Buffer[310];
};

// NOLINTBEGIN(*-optin.performance.Padding)
struct teb
{
nt_tib NtTib;
Expand Down Expand Up @@ -265,6 +266,7 @@ struct teb
::std::uint_least32_t SpinCallCount;
::std::uint_least64_t ExtendedFeatureDisableMask;
};
// NOLINTEND(*-optin.performance.Padding)

FAST_IO_DLLIMPORT FAST_IO_GNU_MALLOC void *FAST_IO_WINSTDCALL RtlAllocateHeap(void *, ::std::uint_least32_t, ::std::size_t) noexcept FAST_IO_WINSTDCALL_RENAME(RtlAllocateHeap, 12);
FAST_IO_DLLIMPORT char unsigned FAST_IO_WINSTDCALL RtlFreeHeap(void *, ::std::uint_least32_t, void *) noexcept FAST_IO_WINSTDCALL_RENAME(RtlFreeHeap, 12);
Expand Down
2 changes: 1 addition & 1 deletion include/fast_io_core_impl/codecvt/general.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ general_code_cvt(src_char_type const *src_first, src_char_type const *src_last,
else
{
#if (defined(_MSC_VER) && defined(_M_AMD64) && !defined(__clang__)) || \
(defined(__SSE__) && defined(__x86_64__) && __cpp_lib_is_constant_evaluated >= 201811L)
(defined(__SSE__) && defined(__SSE2__) && defined(__x86_64__) && __cpp_lib_is_constant_evaluated >= 201811L)
if constexpr (src_encoding != encoding_scheme::utf_ebcdic && encoding != encoding_scheme::utf_ebcdic &&
1 == sizeof(src_char_type) && (1 == sizeof(dest_char_type) || encoding_is_utf(encoding)))
{
Expand Down
6 changes: 3 additions & 3 deletions include/fast_io_core_impl/codecvt/utf.h
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ inline constexpr char32_t utf16_surrogate_to_utf32(char16_t high, char16_t low)
return static_cast<char32_t>((static_cast<::std::uint_least32_t>(high) << 10u) + low - 0x35fdc00u);
}

#if (defined(_MSC_VER) && defined(_M_AMD64) && !defined(__clang__)) || (defined(__SSE__) && defined(__x86_64__))
#if (defined(_MSC_VER) && defined(_M_AMD64) && !defined(__clang__)) || (defined(__SSE__) && defined(__SSE2__) && defined(__x86_64__))
template <::std::integral T, ::std::integral U>
requires((sizeof(T) == 1) && (sizeof(U) == 1 || sizeof(U) == 2 || sizeof(U) == 4))
inline code_cvt_result<T, U> convert_ascii_with_sse(T const *__restrict pSrc, U *__restrict pDst) noexcept
Expand All @@ -520,7 +520,7 @@ inline code_cvt_result<T, U> convert_ascii_with_sse(T const *__restrict pSrc, U
x86_64_v16qi chunk;
__builtin_memcpy(__builtin_addressof(chunk), pSrc, m128i_size);
mask = static_cast<::std::uint_least32_t>(__builtin_ia32_pmovmskb128(chunk));
#if __has_builtin(__builtin_shufflevector)
#if FAST_IO_HAS_BUILTIN(__builtin_shufflevector)
x86_64_v16qi half{__builtin_shufflevector(chunk, zero, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5,
16 + 5, 6, 16 + 6, 7, 16 + 7)};
__builtin_memcpy(pDst, __builtin_addressof(half), m128i_size);
Expand All @@ -539,7 +539,7 @@ inline code_cvt_result<T, U> convert_ascii_with_sse(T const *__restrict pSrc, U
x86_64_v16qi chunk;
__builtin_memcpy(__builtin_addressof(chunk), pSrc, m128i_size);
mask = static_cast<::std::uint_least32_t>(__builtin_ia32_pmovmskb128(chunk));
#if __has_builtin(__builtin_shufflevector)
#if FAST_IO_HAS_BUILTIN(__builtin_shufflevector)
x86_64_v16qi half_result{__builtin_shufflevector(chunk, zero, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7)};
x86_64_v8hi half;
Expand Down
36 changes: 7 additions & 29 deletions include/fast_io_core_impl/freestanding/algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,43 +243,31 @@ namespace fast_io::freestanding
{

inline
#if defined(__has_builtin)
#if __has_builtin(__builtin_memcpy)
#if FAST_IO_HAS_BUILTIN(__builtin_memcpy)
constexpr
#endif
#endif
void *
my_memcpy(void *dest, void const *src, ::std::size_t count) noexcept
{
return
#if defined(__has_builtin)
#if __has_builtin(__builtin_memcpy)
#if FAST_IO_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy
#else
::std::memcpy
#endif
#else
::std::memcpy
#endif
(dest, src, count);
}

inline
#if defined(__has_builtin)
#if __has_builtin(__builtin_memmove)
#if FAST_IO_HAS_BUILTIN(__builtin_memmove)
constexpr
#endif
#endif
void *
my_memmove(void *dest, void const *src, ::std::size_t count) noexcept
{
return
#if defined(__has_builtin)
#if __has_builtin(__builtin_memmove)
#if FAST_IO_HAS_BUILTIN(__builtin_memmove)
__builtin_memmove
#else
::std::memmove
#endif
#else
::std::memmove
#endif
Expand All @@ -289,34 +277,24 @@ inline
inline void *my_memset(void *dest, int ch, ::std::size_t count) noexcept
{
return
#if defined(__has_builtin)
#if __has_builtin(__builtin_memset)
#if FAST_IO_HAS_BUILTIN(__builtin_memset)
__builtin_memset
#else
::std::memset
#endif
#else
::std::memset
#endif
(dest, ch, count);
}

inline
#if defined(__has_builtin)
#if __has_builtin(__builtin_memcmp)
#if FAST_IO_HAS_BUILTIN(__builtin_memcmp)
constexpr
#endif
#endif
int
my_memcmp(void const *dest, void const *src, ::std::size_t count) noexcept
{
return
#if defined(__has_builtin)
#if __has_builtin(__builtin_memcmp)
#if FAST_IO_HAS_BUILTIN(__builtin_memcmp)
__builtin_memcmp
#else
::std::memcmp
#endif
#else
::std::memcmp
#endif
Expand Down
8 changes: 4 additions & 4 deletions include/fast_io_core_impl/freestanding/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ struct allocator
{
__builtin_trap();
}
#if __has_builtin(__builtin_operator_new)
return static_cast<T *>(__builtin_operator_new(n * sizeof(T)));
#if FAST_IO_HAS_BUILTIN(__builtin_operator_new)
return static_cast<T *>(__builtin_operator_new(n * sizeof(T)));
#else
__builtin_trap();
return nullptr;
#endif
}
inline constexpr void deallocate([[maybe_unused]] T *ptr, [[maybe_unused]] ::std::size_t n) noexcept
{
#if __has_builtin(__builtin_operator_delete)
__builtin_operator_delete(ptr, sizeof(T) * n);
#if FAST_IO_HAS_BUILTIN(__builtin_operator_delete)
__builtin_operator_delete(ptr, sizeof(T) * n);
#endif
}
};
Expand Down
44 changes: 14 additions & 30 deletions include/fast_io_core_impl/freestanding/bytes.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ struct overlapped_copy_buffer_ptr
{
T *ptr;
inline explicit constexpr overlapped_copy_buffer_ptr(::std::size_t n) noexcept
: ptr(new T[n])
: ptr(::new T[n])
{}
inline overlapped_copy_buffer_ptr(overlapped_copy_buffer_ptr const &) = delete;
inline overlapped_copy_buffer_ptr &operator=(overlapped_copy_buffer_ptr const &) = delete;
inline constexpr ~overlapped_copy_buffer_ptr()
{
delete[] ptr;
::delete[] ptr;
}
};

Expand Down Expand Up @@ -88,10 +88,8 @@ inline constexpr ::std::byte *bytes_copy_n(::std::byte const *first, ::std::size
[[likely]]
#endif
{
#if !defined(__has_builtin)
::std::memmove(dest, first, n);
#elif __has_builtin(__builtin_memmove)
__builtin_memmove(dest, first, n);
#if FAST_IO_HAS_BUILTIN(__builtin_memmove)
__builtin_memmove(dest, first, n);
#else
::std::memmove(dest, first, n);
#endif
Expand Down Expand Up @@ -122,10 +120,8 @@ inline constexpr ::std::byte *nonoverlapped_bytes_copy_n(::std::byte const *firs
[[likely]]
#endif
{
#if !defined(__has_builtin)
::std::memcpy(dest, first, n);
#elif __has_builtin(__builtin_memcpy)
__builtin_memcpy(dest, first, n);
#if FAST_IO_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy(dest, first, n);
#else
::std::memcpy(dest, first, n);
#endif
Expand Down Expand Up @@ -157,10 +153,8 @@ inline constexpr ::std::byte const *type_punning_from_bytes(::std::byte const *_
else
#endif
{
#if !defined(__has_builtin)
::std::memcpy(__builtin_addressof(t), first, n);
#elif __has_builtin(__builtin_memcpy)
__builtin_memcpy(__builtin_addressof(t), first, n);
#if FAST_IO_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy(__builtin_addressof(t), first, n);
#else
::std::memcpy(__builtin_addressof(t), first, n);
#endif
Expand All @@ -184,10 +178,8 @@ inline constexpr ::std::byte *type_punning_to_bytes_n(T const &__restrict first,
else
#endif
{
#if !defined(__has_builtin)
::std::memcpy(dest, __builtin_addressof(first), n);
#elif __has_builtin(__builtin_memcpy)
__builtin_memcpy(dest, __builtin_addressof(first), n);
#if FAST_IO_HAS_BUILTIN(__builtin_memcpy)
__builtin_memcpy(dest, __builtin_addressof(first), n);
#else
::std::memcpy(dest, __builtin_addressof(first), n);
#endif
Expand All @@ -214,12 +206,8 @@ inline constexpr ::std::byte *bytes_clear_n(::std::byte *data, ::std::size_t siz
}
else
{
#if defined(__has_builtin)
#if __has_builtin(__builtin_memset)
__builtin_memset(data, 0, size);
#else
::std::memset(data, 0, size);
#endif
#if FAST_IO_HAS_BUILTIN(__builtin_memset)
__builtin_memset(data, 0, size);
#else
::std::memset(data, 0, size);
#endif
Expand All @@ -243,12 +231,8 @@ inline constexpr ::std::byte *bytes_fill_n(::std::byte *data, ::std::size_t size
}
else
{
#if defined(__has_builtin)
#if __has_builtin(__builtin_memset)
__builtin_memset(data, static_cast<char unsigned>(val), size);
#else
::std::memset(data, static_cast<char unsigned>(val), size);
#endif
#if FAST_IO_HAS_BUILTIN(__builtin_memset)
__builtin_memset(data, static_cast<char unsigned>(val), size);
#else
::std::memset(data, static_cast<char unsigned>(val), size);
#endif
Expand Down
12 changes: 2 additions & 10 deletions include/fast_io_core_impl/freestanding/cstr_len.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,8 @@ inline constexpr ::std::size_t cstr_len(char_type const *cstr) noexcept
{
if constexpr (::std::same_as<char_type, char>)
{
#if defined(__has_builtin)
#if __has_builtin(__builtin_strlen)
#if FAST_IO_HAS_BUILTIN(__builtin_strlen)
return __builtin_strlen(cstr);
#else
return ::std::strlen(cstr);
#endif
#else
return ::std::strlen(cstr);
#endif
Expand All @@ -67,12 +63,8 @@ inline constexpr ::std::size_t cstr_nlen(char_type const *cstr, ::std::size_t n)
{
if constexpr (::std::same_as<char_type, char>)
{
#if defined(__has_builtin)
#if __has_builtin(__builtin_strnlen)
#if FAST_IO_HAS_BUILTIN(__builtin_strnlen)
return __builtin_strnlen(cstr, n);
#else
return details::dummy_cstr_nlen(cstr, n);
#endif
#else
return details::dummy_cstr_nlen(cstr, n);
#endif
Expand Down
Loading
Loading