diff --git a/dist/Makefile.in b/dist/Makefile.in index 00648a182..5c0a242b0 100644 --- a/dist/Makefile.in +++ b/dist/Makefile.in @@ -281,7 +281,7 @@ DTRACE_OBJS= @ADDITIONAL_OBJS@ @REPLACEMENT_OBJS@ @CRYPTO_OBJS@ \ log_stat@o@ mkpath@o@ mp_alloc@o@ mp_backup@o@ mp_bh@o@ mp_fget@o@ \ mp_fmethod@o@ mp_fopen@o@ mp_fput@o@ mp_fset@o@ mp_method@o@ \ mp_mvcc@o@ mp_region@o@ mp_register@o@ mp_resize@o@ mp_stat@o@ \ - mp_sync@o@ mp_trickle@o@ openflags@o@ os_abort@o@ os_abs@o@ \ + mp_sync@o@ mp_trickle@o@ openflags@o@ os_abort@o@ os_abs@o@ os_aio@o@ os_aio_iocp@o@ os_aio_kqueue@o@ os_aio_pool@o@ os_aio_posix@o@ os_aio_uring@o@ \ os_alloc@o@ os_atomic@o@ os_clock@o@ os_cpu@o@ os_ctime@o@ os_config@o@ \ os_dir@o@ os_errno@o@ os_fid@o@ os_flock@o@ os_fsync@o@ \ os_getenv@o@ os_handle@o@ os_map@o@ os_method@o@ os_mkdir@o@ \ @@ -2239,6 +2239,18 @@ os_abort@o@: $(srcdir)/os/os_abort.c $(CC) $(CFLAGS) $? os_addrinfo@o@: $(srcdir)/os/os_addrinfo.c $(CC) $(CFLAGS) $? +os_aio@o@: $(srcdir)/os/os_aio.c + $(CC) $(CFLAGS) $? +os_aio_uring@o@: $(srcdir)/os/os_aio_uring.c + $(CC) $(CFLAGS) $? +os_aio_posix@o@: $(srcdir)/os/os_aio_posix.c + $(CC) $(CFLAGS) $? +os_aio_kqueue@o@: $(srcdir)/os/os_aio_kqueue.c + $(CC) $(CFLAGS) $? +os_aio_pool@o@: $(srcdir)/os/os_aio_pool.c + $(CC) $(CFLAGS) $? +os_aio_iocp@o@: $(srcdir)/os/os_aio_iocp.c + $(CC) $(CFLAGS) $? os_alloc@o@: $(srcdir)/os/os_alloc.c $(CC) $(CFLAGS) $? os_atomic@o@: $(srcdir)/os/os_atomic.c diff --git a/dist/api_flags b/dist/api_flags index 77a02e018..43bc20b45 100644 --- a/dist/api_flags +++ b/dist/api_flags @@ -213,6 +213,7 @@ DbEnv.set_flags DB_DIRECT_DB # Don't buffer databases in the OS DB_DSYNC_DB # Set O_DSYNC on the databases DB_HOTBACKUP_IN_PROGRESS # Inhibit bulk loading optimization + DB_MPOOL_AIO # Use asynchronous buffer-pool writeback DB_MULTIVERSION # Multiversion concurrency control DB_NOLOCKING # Set locking/mutex behavior DB_NOMMAP # Don't mmap the underlying file diff --git a/dist/config.hin b/dist/config.hin index 0dc55d7ff..4a2b1a758 100644 --- a/dist/config.hin +++ b/dist/config.hin @@ -30,6 +30,15 @@ /* Define to 1 if you have the 'abort' function. */ #undef HAVE_ABORT +/* Define to 1 to use the BSD kqueue+aio backend. */ +#undef HAVE_AIO_KQUEUE + +/* Define to 1 to use the POSIX aio backend. */ +#undef HAVE_AIO_POSIX + +/* Define to 1 to use the thread-pool AIO offload backend. */ +#undef HAVE_AIO_THREADPOOL + /* Define to 1 if you have the 'atoi' function. */ #undef HAVE_ATOI @@ -189,6 +198,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H +/* Define to 1 to use the Linux io_uring AIO backend. */ +#undef HAVE_IO_URING + /* Define to 1 if you have the 'isalpha' function. */ #undef HAVE_ISALPHA diff --git a/dist/configure b/dist/configure index 97d4b14bd..1af81ff8f 100755 --- a/dist/configure +++ b/dist/configure @@ -1762,21 +1762,56 @@ fi } # ac_fn_c_try_compile -# ac_fn_cxx_try_compile LINENO -# ---------------------------- -# Try to compile conftest.$ac_ext, and return whether this succeeded. -ac_fn_cxx_try_compile () +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest.beam - if { { ac_try="$ac_compile" + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$3=yes" +else case e in #( + e) eval "$3=no" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +eval ac_res=\$$3 + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf '%s\n' "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext + if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf '%s\n' "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>conftest.err + (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 @@ -1785,9 +1820,12 @@ printf '%s\n' "$ac_try_echo"; } >&5 fi printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { - test -z "$ac_cxx_werror_flag" || + test -z "$ac_c_werror_flag" || test ! -s conftest.err - } && test -s conftest.$ac_objext + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + } then : ac_retval=0 else case e in #( @@ -1797,25 +1835,148 @@ sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 ;; esac fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval -} # ac_fn_cxx_try_compile +} # ac_fn_c_try_link -# ac_fn_cxx_try_cpp LINENO -# ------------------------ -# Try to preprocess conftest.$ac_ext, and return whether this succeeded. -ac_fn_cxx_try_cpp () +# ac_fn_c_check_member LINENO AGGR MEMBER VAR INCLUDES +# ---------------------------------------------------- +# Tries to find if the field MEMBER exists in type AGGR, after including +# INCLUDES, setting cache variable VAR accordingly. +ac_fn_c_check_member () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - if { { ac_try="$ac_cpp conftest.$ac_ext" + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5 +printf %s "checking for $2.$3... " >&6; } +if eval test \${$4+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main (void) +{ +static $2 ac_aggr; +if (ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$4=yes" +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main (void) +{ +static $2 ac_aggr; +if (sizeof ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$4=yes" +else case e in #( + e) eval "$4=no" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +eval ac_res=\$$4 + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf '%s\n' "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_member + +# ac_fn_check_decl LINENO SYMBOL VAR INCLUDES EXTRA-OPTIONS FLAG-VAR +# ------------------------------------------------------------------ +# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR +# accordingly. Pass EXTRA-OPTIONS to the compiler, using FLAG-VAR. +ac_fn_check_decl () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + as_decl_name=`echo $2|sed 's/ *(.*//'` + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 +printf %s "checking whether $as_decl_name is declared... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` + eval ac_save_FLAGS=\$$6 + as_fn_append $6 " $5" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +#ifndef $as_decl_name +#ifdef __cplusplus + (void) $as_decl_use; +#else + (void) $as_decl_name; +#endif +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$3=yes" +else case e in #( + e) eval "$3=no" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + eval $6=\$ac_save_FLAGS + ;; +esac +fi +eval ac_res=\$$3 + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf '%s\n' "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_check_decl + +# ac_fn_cxx_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam + if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf '%s\n' "$ac_try_echo"; } >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 @@ -1823,39 +1984,38 @@ printf '%s\n' "$ac_try_echo"; } >&5 mv -f conftest.er1 conftest.err fi printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } > conftest.i && { - test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || test ! -s conftest.err - } + } && test -s conftest.$ac_objext then : ac_retval=0 else case e in #( e) printf '%s\n' "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 ;; + ac_retval=1 ;; esac fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval -} # ac_fn_cxx_try_cpp +} # ac_fn_cxx_try_compile -# ac_fn_c_try_link LINENO -# ----------------------- -# Try to link conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_link () +# ac_fn_cxx_try_cpp LINENO +# ------------------------ +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_cpp () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext - if { { ac_try="$ac_link" + if { { ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf '%s\n' "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>conftest.err + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 @@ -1863,12 +2023,9 @@ printf '%s\n' "$ac_try_echo"; } >&5 mv -f conftest.er1 conftest.err fi printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || + test $ac_status = 0; } > conftest.i && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || test ! -s conftest.err - } && test -s conftest$ac_exeext && { - test "$cross_compiling" = yes || - test -x conftest$ac_exeext } then : ac_retval=0 @@ -1876,53 +2033,13 @@ else case e in #( e) printf '%s\n' "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_retval=1 ;; + ac_retval=1 ;; esac fi - # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information - # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would - # interfere with the next link command; also delete a directory that is - # left behind by Apple's compiler. We do this before executing the actions. - rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval -} # ac_fn_c_try_link - -# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES -# ------------------------------------------------------- -# Tests whether HEADER exists and can be compiled using the include files in -# INCLUDES, setting the cache variable VAR accordingly. -ac_fn_c_check_header_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 -printf %s "checking for $2... " >&6; } -if eval test \${$3+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$4 -#include <$2> -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - eval "$3=yes" -else case e in #( - e) eval "$3=no" ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi -eval ac_res=\$$3 - { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -printf '%s\n' "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_header_compile +} # ac_fn_cxx_try_cpp # ac_fn_c_check_func LINENO FUNC VAR # ---------------------------------- @@ -2036,69 +2153,6 @@ fi } # ac_fn_cxx_try_link -# ac_fn_c_check_member LINENO AGGR MEMBER VAR INCLUDES -# ---------------------------------------------------- -# Tries to find if the field MEMBER exists in type AGGR, after including -# INCLUDES, setting cache variable VAR accordingly. -ac_fn_c_check_member () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5 -printf %s "checking for $2.$3... " >&6; } -if eval test \${$4+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$5 -int -main (void) -{ -static $2 ac_aggr; -if (ac_aggr.$3) -return 0; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - eval "$4=yes" -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -$5 -int -main (void) -{ -static $2 ac_aggr; -if (sizeof ac_aggr.$3) -return 0; - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - eval "$4=yes" -else case e in #( - e) eval "$4=no" ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi -eval ac_res=\$$4 - { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 -printf '%s\n' "$ac_res" >&6; } - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - -} # ac_fn_c_check_member - # ac_fn_c_try_run LINENO # ---------------------- # Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that @@ -5907,95 +5961,11 @@ fi # Respect the environment LIBS settings LIBSO_LIBS="$LIBS" -# This is where we handle stuff that autoconf can't handle: compiler, -# preprocessor and load flags, libraries that the standard tests don't -# look for. -# -# There are additional libraries we need for some compiler/architecture -# combinations. -# -# Some architectures require DB to be compiled with special flags and/or -# libraries for threaded applications -# -# The makefile CC may be different than the CC used in config testing, -# because the makefile CC may be set to use $(LIBTOOL). -# -# Don't override anything if it's already set from the environment. -optimize_flag="-O" -extra_cflags="" - -case "$host_os" in -aix4.3.*|aix[5-9]*) - case "$host_os" in - aix4.3.*) - CPPFLAGS="$CPPFLAGS -D_LINUX_SOURCE_COMPAT";; - esac - # IBM's XLC compilers (at least versions 7/8/9) generate incorrect code - # when ordinary optimization is enabled because they make strong - # assumptions about the types held at each memory location, and some - # Berkeley DB code violates those assumptions. [#16141] - extra_cflags=" -qalias=noansi" - optimize_flag="-O2" - CC=${CC-"xlc_r"} - CPPFLAGS="$CPPFLAGS -D_THREAD_SAFE" - LDFLAGS="$LDFLAGS -Wl,-brtl";; -bsdi3*) CC=${CC-"shlicc2"} - LIBSO_LIBS="$LIBSO_LIBS -lipc";; -cygwin*) - CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE -D_REENTRANT";; -freebsd*) - CPPFLAGS="$CPPFLAGS -D_THREAD_SAFE" - LDFLAGS="$LDFLAGS -pthread";; -gnu*|k*bsd*-gnu|linux*) - CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE -D_REENTRANT";; -hpux*) CPPFLAGS="$CPPFLAGS -D_REENTRANT";; -irix*) optimize_flag="-O2" - CPPFLAGS="$CPPFLAGS -D_SGI_MP_SOURCE";; -mpeix*) CPPFLAGS="$CPPFLAGS -D_POSIX_SOURCE -D_SOCKET_SOURCE" - LIBSO_LIBS="$LIBSO_LIBS -lsocket -lsvipc";; -osf*) CPPFLAGS="$CPPFLAGS -pthread";; -*qnx*) qnx_build="yes" - printf '%s\n' "#define HAVE_QNX 1" >>confdefs.h - - ;; -solaris*) - CPPFLAGS="$CPPFLAGS -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS ";; -esac - -# Set CFLAGS/CXXFLAGS. We MUST set the flags before we call autoconf -# compiler configuration macros, because if we don't, they set CFLAGS -# to no optimization and -g, which isn't what we want. -# -# If the user wants a debugging environment, add -g the their compiler flags -# and don't automatically optimize. If you want to compile with a different -# set of flags, specify CFLAGS in the environment before configuring. -if test "$db_cv_debug" = "yes"; then - printf '%s\n' "#define DEBUG 1" >>confdefs.h - - - - CFLAGS="-g $CFLAGS" -else - CFLAGS=${CFLAGS-$optimize_flag} -fi - -CFLAGS="$CFLAGS$extra_cflags" -CXXFLAGS=${CXXFLAGS-"$CFLAGS"} - -# The default compiler is cc (NOT gcc), the default CFLAGS is as specified -# above, NOT what is set by AC_PROG_CC, as it won't set optimization flags -# for any compiler other than gcc. -# -# Berkeley DB is written in K&R (old-style) C: function definitions name -# their parameters in a list following the parenthesis, with the prototype -# supplied separately via the __P() macro. C23 (gnu23) removed old-style -# definitions from the language, so a modern Autoconf (>= 2.72) AC_PROG_CC, -# which probes -std=gnu23 first and adopts it, turns every definition in the -# tree into a hard error. Suppress that probe by pre-seeding the C11/C23 -# feature-test cache variables to "no" so AC_PROG_CC adds no -std flag, then -# pin a K&R-compatible standard (gnu99) ourselves below. -ac_cv_prog_cc_c23=no -ac_cv_prog_cc_c11=no +# Asynchronous I/O backends for the buffer pool (os_aio), selected at +# runtime in preference order: io_uring (Linux) > IOCP (Windows) > +# kqueue+aio (BSD) > POSIX aio (Solaris/illumos, macOS) > thread-pool +# offload (last-resort fallback). All are optional -- without any, os_aio +# falls back to synchronous I/O. @@ -6014,10 +5984,8 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then - for ac_prog in cc gcc - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_CC+y} @@ -6038,7 +6006,7 @@ do esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + ac_cv_prog_CC="${ac_tool_prefix}gcc" printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi @@ -6059,15 +6027,11 @@ printf '%s\n' "no" >&6; } fi - test -n "$CC" && break - done fi -if test -z "$CC"; then +if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC - for ac_prog in cc gcc -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 printf %s "checking for $ac_word... " >&6; } if test ${ac_cv_prog_ac_ct_CC+y} @@ -6088,7 +6052,7 @@ do esac for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="$ac_prog" + ac_cv_prog_ac_ct_CC="gcc" printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi @@ -6108,10 +6072,6 @@ else printf '%s\n' "no" >&6; } fi - - test -n "$ac_ct_CC" && break -done - if test "x$ac_ct_CC" = x; then CC="" else @@ -6123,302 +6083,1529 @@ ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi +else + CC="$ac_cv_prog_CC" fi - -test -z "$CC" && { { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "no acceptable C compiler found in \$PATH -See 'config.log' for more details" "$LINENO" 5; } - -# Provide some information about the compiler. -printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 -set X $ac_compile -ac_compiler=$2 -for ac_option in --version -v -V -qversion -version; do - { { ac_try="$ac_compiler $ac_option >&5" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf '%s\n' "$ac_try_echo"; } >&5 - (eval "$ac_compiler $ac_option >&5") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - sed '10a\ -... rest of stderr output deleted ... - 10q' conftest.err >conftest.er1 - cat conftest.er1 >&5 - fi - rm -f conftest.er1 conftest.err - printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } -done - -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -ac_clean_files_save=$ac_clean_files -ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" -# Try to create an executable without -o first, disregard a.out. -# It will help us diagnose broken compilers, and finding out an intuition -# of exeext. -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 -printf %s "checking whether the C compiler works... " >&6; } -ac_link_default=`printf '%s\n' "$ac_link" | sed 's/ -o *conftest[^ ]*//'` - -# The possible output files: -ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" - -ac_rmfiles= -for ac_file in $ac_files +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH do - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; - * ) ac_rmfiles="$ac_rmfiles $ac_file";; + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi done -rm -f $ac_rmfiles + done +IFS=$as_save_IFS -if { { ac_try="$ac_link_default" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; +fi ;; esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf '%s\n' "$ac_try_echo"; } >&5 - (eval "$ac_link_default") 2>&5 - ac_status=$? - printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf '%s\n' "$CC" >&6; } +else + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf '%s\n' "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} then : - # Autoconf-2.13 could set the ac_cv_exeext variable to 'no'. -# So ignore a value of 'no', otherwise this would lead to 'EXEEXT = no' -# in a Makefile. We should not override ac_cv_exeext if it was cached, -# so that the user can short-circuit this test for compilers unknown to -# Autoconf. -for ac_file in $ac_files '' + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) - ;; - [ab].out ) - # We found the default executable, but exeext='' is most - # certainly right. - break;; - *.* ) - if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; - then :; else - ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - fi - # We set ac_cv_exeext here because the later test for it is not - # safe: cross compilers may not add the suffix if given an '-o' - # argument, so we may need to know it at that point already. - # Even if this section looks crufty: it has the advantage of - # actually working. - break;; - * ) - break;; + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi done -test "$ac_cv_exeext" = no && ac_cv_exeext= + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" + fi +fi +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf '%s\n' "$CC" >&6; } +else + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf '%s\n' "no" >&6; } +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 else case e in #( - e) ac_file='' ;; + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; esac fi -if test -z "$ac_file" +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf '%s\n' "$CC" >&6; } +else + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf '%s\n' "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf '%s\n' "$ac_ct_CC" >&6; } +else + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf '%s\n' "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf '%s\n' "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. +set dummy ${ac_tool_prefix}clang; ac_word=$2 +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}clang" + printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf '%s\n' "$CC" >&6; } +else + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf '%s\n' "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "clang", so it can be a program name with args. +set dummy clang; ac_word=$2 +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="clang" + printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf '%s\n' "$ac_ct_CC" >&6; } +else + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf '%s\n' "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf '%s\n' "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +fi + + +test -z "$CC" && { { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See 'config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion -version; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf '%s\n' "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +printf %s "checking whether the C compiler works... " >&6; } +ac_link_default=`printf '%s\n' "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf '%s\n' "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # Autoconf-2.13 could set the ac_cv_exeext variable to 'no'. +# So ignore a value of 'no', otherwise this would lead to 'EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an '-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else case e in #( + e) ac_file='' ;; +esac +fi +if test -z "$ac_file" +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf '%s\n' "no" >&6; } +printf '%s\n' "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See 'config.log' for more details" "$LINENO" 5; } +else case e in #( + e) { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf '%s\n' "yes" >&6; } ;; +esac +fi +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +printf %s "checking for C compiler default output file name... " >&6; } +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +printf '%s\n' "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +printf %s "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf '%s\n' "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # If both 'conftest.exe' and 'conftest' are 'present' (well, observable) +# catch 'conftest.exe'. For instance with Cygwin, 'ls conftest' will +# work properly (i.e., refer to 'conftest.exe'), while it won't with +# 'rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else case e in #( + e) { { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See 'config.log' for more details" "$LINENO" 5; } ;; +esac +fi +rm -f conftest conftest$ac_cv_exeext +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +printf '%s\n' "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +FILE *f = fopen ("conftest.out", "w"); + if (!f) + return 1; + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +printf %s "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf '%s\n' "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf '%s\n' "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error 77 "cannot run C compiled programs. +If you meant to cross compile, use '--host'. +See 'config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +printf '%s\n' "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext \ + conftest.o conftest.obj conftest.out +ac_clean_files=$ac_clean_files_save +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +printf %s "checking for suffix of object files... " >&6; } +if test ${ac_cv_objext+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf '%s\n' "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else case e in #( + e) printf '%s\n' "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See 'config.log' for more details" "$LINENO" 5; } ;; +esac +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext ;; +esac +fi +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +printf '%s\n' "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 +printf %s "checking whether the compiler supports GNU C... " >&6; } +if test ${ac_cv_c_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_compiler_gnu=yes +else case e in #( + e) ac_compiler_gnu=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + ;; +esac +fi +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +printf '%s\n' "$ac_cv_c_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+y} +ac_save_CFLAGS=$CFLAGS +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +printf %s "checking whether $CC accepts -g... " >&6; } +if test ${ac_cv_prog_cc_g+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +else case e in #( + e) CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag ;; +esac +fi +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +printf '%s\n' "$ac_cv_prog_cc_g" >&6; } +if test $ac_test_CFLAGS; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +ac_prog_cc_stdc=no +if test x$ac_prog_cc_stdc = xno +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C23 features" >&5 +printf %s "checking for $CC option to enable C23 features... " >&6; } +if test ${ac_cv_prog_cc_c23+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c23=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c23_program +_ACEOF +for ac_arg in '' -std=gnu23 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c23=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c23" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c23" = xno +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf '%s\n' "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c23" = x +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf '%s\n' "none needed" >&6; } +else case e in #( + e) { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c23" >&5 +printf '%s\n' "$ac_cv_prog_cc_c23" >&6; } + CC="$CC $ac_cv_prog_cc_c23" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c23 + ac_prog_cc_stdc=c23 ;; +esac +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 +printf %s "checking for $CC option to enable C11 features... " >&6; } +if test ${ac_cv_prog_cc_c11+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c11=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c11_program +_ACEOF +for ac_arg in '' -std=gnu11 -std:c11 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c11" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c11" = xno +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf '%s\n' "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c11" = x +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf '%s\n' "none needed" >&6; } +else case e in #( + e) { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +printf '%s\n' "$ac_cv_prog_cc_c11" >&6; } + CC="$CC $ac_cv_prog_cc_c11" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 + ac_prog_cc_stdc=c11 ;; +esac +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 +printf %s "checking for $CC option to enable C99 features... " >&6; } +if test ${ac_cv_prog_cc_c99+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c99_program +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c99" = xno +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf '%s\n' "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c99" = x +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf '%s\n' "none needed" >&6; } +else case e in #( + e) { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +printf '%s\n' "$ac_cv_prog_cc_c99" >&6; } + CC="$CC $ac_cv_prog_cc_c99" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 + ac_prog_cc_stdc=c99 ;; +esac +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 +printf %s "checking for $CC option to enable C89 features... " >&6; } +if test ${ac_cv_prog_cc_c89+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c89_program +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC ;; +esac +fi + +if test "x$ac_cv_prog_cc_c89" = xno +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf '%s\n' "unsupported" >&6; } +else case e in #( + e) if test "x$ac_cv_prog_cc_c89" = x +then : + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf '%s\n' "none needed" >&6; } +else case e in #( + e) { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +printf '%s\n' "$ac_cv_prog_cc_c89" >&6; } + CC="$CC $ac_cv_prog_cc_c89" ;; +esac +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 + ac_prog_cc_stdc=c89 ;; +esac +fi +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +ac_header= ac_cache= +for ac_item in $ac_header_c_list +do + if test $ac_cache; then + ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" + if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then + printf '%s\n' "#define $ac_item 1" >> confdefs.h + fi + ac_header= ac_cache= + elif test $ac_header; then + ac_cache=$ac_item + else + ac_header=$ac_item + fi +done + + + + + + + + +if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes +then : + +printf '%s\n' "#define STDC_HEADERS 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "liburing.h" "ac_cv_header_liburing_h" "$ac_includes_default" +if test "x$ac_cv_header_liburing_h" = xyes +then : + + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for io_uring_queue_init in -luring" >&5 +printf %s "checking for io_uring_queue_init in -luring... " >&6; } +if test ${ac_cv_lib_uring_io_uring_queue_init+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_check_lib_save_LIBS=$LIBS +LIBS="-luring $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char io_uring_queue_init (void); +int +main (void) +{ +return io_uring_queue_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_uring_io_uring_queue_init=yes +else case e in #( + e) ac_cv_lib_uring_io_uring_queue_init=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS ;; +esac +fi +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_uring_io_uring_queue_init" >&5 +printf '%s\n' "$ac_cv_lib_uring_io_uring_queue_init" >&6; } +if test "x$ac_cv_lib_uring_io_uring_queue_init" = xyes +then : + + +printf '%s\n' "#define HAVE_IO_URING 1" >>confdefs.h + + LIBSO_LIBS="$LIBSO_LIBS -luring" + LIBS="$LIBS -luring" +fi + +fi + + +# POSIX aio: aio_read/aio_write may live in libc (BSD/macOS) or librt +# (Solaris/illumos, glibc). Record the library so os_aio_posix links. +ac_fn_c_check_header_compile "$LINENO" "aio.h" "ac_cv_header_aio_h" "$ac_includes_default" +if test "x$ac_cv_header_aio_h" = xyes +then : + + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for library containing aio_read" >&5 +printf %s "checking for library containing aio_read... " >&6; } +if test ${ac_cv_search_aio_read+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. + The 'extern "C"' is for builds by C++ compilers; + although this is not generally supported in C code supporting it here + has little cost and some practical benefit (sr 110532). */ +#ifdef __cplusplus +extern "C" +#endif +char aio_read (void); +int +main (void) +{ +return aio_read (); + ; + return 0; +} +_ACEOF +for ac_lib in '' rt aio +do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO" +then : + ac_cv_search_aio_read=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext + if test ${ac_cv_search_aio_read+y} +then : + break +fi +done +if test ${ac_cv_search_aio_read+y} +then : + +else case e in #( + e) ac_cv_search_aio_read=no ;; +esac +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS ;; +esac +fi +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_aio_read" >&5 +printf '%s\n' "$ac_cv_search_aio_read" >&6; } +ac_res=$ac_cv_search_aio_read +if test "$ac_res" != no +then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + + +printf '%s\n' "#define HAVE_AIO_POSIX 1" >>confdefs.h + +fi + +fi + + +# kqueue+aio (BSD, notably FreeBSD): needs EVFILT_AIO and, crucially, the +# sigev_notify_kqueue member of struct sigevent so a completed aiocb posts a +# kevent. macOS has EVFILT_AIO but lacks sigev_notify_kqueue, so it is +# excluded here and uses the POSIX aio backend instead. +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $CC options to detect undeclared functions" >&5 +printf %s "checking for $CC options to detect undeclared functions... " >&6; } +if test ${ac_cv_c_undeclared_builtin_options+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_save_CFLAGS=$CFLAGS + ac_cv_c_undeclared_builtin_options='cannot detect' + for ac_arg in '' -fno-builtin; do + CFLAGS="$ac_save_CFLAGS $ac_arg" + # This test program should *not* compile successfully. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +(void) strchr; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else case e in #( + e) # This test program should compile successfully. + # No library function is consistently available on + # freestanding implementations, so test against a dummy + # declaration. Include always-available headers on the + # off chance that they somehow elicit warnings. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include +extern void ac_decl (int, char *); + +int +main (void) +{ +(void) ac_decl (0, (char *) 0); + (void) ac_decl; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + if test x"$ac_arg" = x +then : + ac_cv_c_undeclared_builtin_options='none needed' +else case e in #( + e) ac_cv_c_undeclared_builtin_options=$ac_arg ;; +esac +fi + break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + done + CFLAGS=$ac_save_CFLAGS + ;; +esac +fi +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_undeclared_builtin_options" >&5 +printf '%s\n' "$ac_cv_c_undeclared_builtin_options" >&6; } + case $ac_cv_c_undeclared_builtin_options in #( + 'cannot detect') : + { { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "cannot make $CC report undeclared builtins +See 'config.log' for more details" "$LINENO" 5; } ;; #( + 'none needed') : + ac_c_undeclared_builtin_options='' ;; #( + *) : + ac_c_undeclared_builtin_options=$ac_cv_c_undeclared_builtin_options ;; +esac + +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $CC options to ignore future-version functions" >&5 +printf %s "checking for $CC options to ignore future-version functions... " >&6; } +if test ${ac_cv_c_future_darwin_options+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) ac_compile_saved="$ac_compile" + ac_compile="$ac_compile -Werror=unguarded-availability-new" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if ! (defined __APPLE__ && defined __MACH__) + #error "-Werror=unguarded-availability-new not needed here" + #endif + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_c_future_darwin_options='-Werror=unguarded-availability-new' +else case e in #( + e) ac_cv_c_future_darwin_options='none needed' ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_compile="$ac_compile_saved" + ;; +esac +fi +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_future_darwin_options" >&5 +printf '%s\n' "$ac_cv_c_future_darwin_options" >&6; } + case $ac_cv_c_future_darwin_options in #( + 'none needed') : + ac_c_future_darwin_options='' ;; #( + *) : + ac_c_future_darwin_options=$ac_cv_c_future_darwin_options ;; +esac + +ac_fn_c_check_member "$LINENO" "struct sigevent" "sigev_notify_kqueue" "ac_cv_member_struct_sigevent_sigev_notify_kqueue" "#include +#include +" +if test "x$ac_cv_member_struct_sigevent_sigev_notify_kqueue" = xyes +then : + ac_fn_check_decl "$LINENO" "EVFILT_AIO" "ac_cv_have_decl_EVFILT_AIO" "#include +" "$ac_c_undeclared_builtin_options$ac_c_future_darwin_options" "CFLAGS" +if test "x$ac_cv_have_decl_EVFILT_AIO" = xyes +then : + + +printf '%s\n' "#define HAVE_AIO_KQUEUE 1" >>confdefs.h + +fi +fi + + +ac_fn_c_check_header_compile "$LINENO" "pthread.h" "ac_cv_header_pthread_h" "$ac_includes_default" +if test "x$ac_cv_header_pthread_h" = xyes +then : + + +printf '%s\n' "#define HAVE_AIO_THREADPOOL 1" >>confdefs.h + +fi + + +# This is where we handle stuff that autoconf can't handle: compiler, +# preprocessor and load flags, libraries that the standard tests don't +# look for. +# +# There are additional libraries we need for some compiler/architecture +# combinations. +# +# Some architectures require DB to be compiled with special flags and/or +# libraries for threaded applications +# +# The makefile CC may be different than the CC used in config testing, +# because the makefile CC may be set to use $(LIBTOOL). +# +# Don't override anything if it's already set from the environment. +optimize_flag="-O" +extra_cflags="" + +case "$host_os" in +aix4.3.*|aix[5-9]*) + case "$host_os" in + aix4.3.*) + CPPFLAGS="$CPPFLAGS -D_LINUX_SOURCE_COMPAT";; + esac + # IBM's XLC compilers (at least versions 7/8/9) generate incorrect code + # when ordinary optimization is enabled because they make strong + # assumptions about the types held at each memory location, and some + # Berkeley DB code violates those assumptions. [#16141] + extra_cflags=" -qalias=noansi" + optimize_flag="-O2" + CC=${CC-"xlc_r"} + CPPFLAGS="$CPPFLAGS -D_THREAD_SAFE" + LDFLAGS="$LDFLAGS -Wl,-brtl";; +bsdi3*) CC=${CC-"shlicc2"} + LIBSO_LIBS="$LIBSO_LIBS -lipc";; +cygwin*) + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE -D_REENTRANT";; +freebsd*) + CPPFLAGS="$CPPFLAGS -D_THREAD_SAFE" + LDFLAGS="$LDFLAGS -pthread";; +gnu*|k*bsd*-gnu|linux*) + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE -D_REENTRANT";; +hpux*) CPPFLAGS="$CPPFLAGS -D_REENTRANT";; +irix*) optimize_flag="-O2" + CPPFLAGS="$CPPFLAGS -D_SGI_MP_SOURCE";; +mpeix*) CPPFLAGS="$CPPFLAGS -D_POSIX_SOURCE -D_SOCKET_SOURCE" + LIBSO_LIBS="$LIBSO_LIBS -lsocket -lsvipc";; +osf*) CPPFLAGS="$CPPFLAGS -pthread";; +*qnx*) qnx_build="yes" + printf '%s\n' "#define HAVE_QNX 1" >>confdefs.h + + ;; +solaris*) + CPPFLAGS="$CPPFLAGS -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS ";; +esac + +# Set CFLAGS/CXXFLAGS. We MUST set the flags before we call autoconf +# compiler configuration macros, because if we don't, they set CFLAGS +# to no optimization and -g, which isn't what we want. +# +# If the user wants a debugging environment, add -g the their compiler flags +# and don't automatically optimize. If you want to compile with a different +# set of flags, specify CFLAGS in the environment before configuring. +if test "$db_cv_debug" = "yes"; then + printf '%s\n' "#define DEBUG 1" >>confdefs.h + + + + CFLAGS="-g $CFLAGS" +else + CFLAGS=${CFLAGS-$optimize_flag} +fi + +CFLAGS="$CFLAGS$extra_cflags" +CXXFLAGS=${CXXFLAGS-"$CFLAGS"} + +# The default compiler is cc (NOT gcc), the default CFLAGS is as specified +# above, NOT what is set by AC_PROG_CC, as it won't set optimization flags +# for any compiler other than gcc. +# +# Berkeley DB is written in K&R (old-style) C: function definitions name +# their parameters in a list following the parenthesis, with the prototype +# supplied separately via the __P() macro. C23 (gnu23) removed old-style +# definitions from the language, so a modern Autoconf (>= 2.72) AC_PROG_CC, +# which probes -std=gnu23 first and adopts it, turns every definition in the +# tree into a hard error. Suppress that probe by pre-seeding the C11/C23 +# feature-test cache variables to "no" so AC_PROG_CC adds no -std flag, then +# pin a K&R-compatible standard (gnu99) ourselves below. +ac_cv_prog_cc_c23=no +ac_cv_prog_cc_c11=no +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + for ac_prog in cc gcc + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi ;; +esac +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf '%s\n' "$CC" >&6; } +else { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf '%s\n' "no" >&6; } -printf '%s\n' "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -{ { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error 77 "C compiler cannot create executables -See 'config.log' for more details" "$LINENO" 5; } -else case e in #( - e) { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -printf '%s\n' "yes" >&6; } ;; -esac fi -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 -printf %s "checking for C compiler default output file name... " >&6; } -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 -printf '%s\n' "$ac_file" >&6; } -ac_exeext=$ac_cv_exeext -rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out -ac_clean_files=$ac_clean_files_save -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 -printf %s "checking for suffix of executables... " >&6; } -if { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf '%s\n' "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cc gcc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} then : - # If both 'conftest.exe' and 'conftest' are 'present' (well, observable) -# catch 'conftest.exe'. For instance with Cygwin, 'ls conftest' will -# work properly (i.e., refer to 'conftest.exe'), while it won't with -# 'rm'. -for ac_file in conftest.exe conftest conftest.*; do - test -f "$ac_file" || continue - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; - *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` - break;; - * ) break;; + printf %s "(cached) " >&6 +else case e in #( + e) if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + printf '%s\n' "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi done -else case e in #( - e) { { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "cannot compute suffix of executables: cannot compile and link -See 'config.log' for more details" "$LINENO" 5; } ;; + done +IFS=$as_save_IFS + +fi ;; esac fi -rm -f conftest conftest$ac_cv_exeext -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 -printf '%s\n' "$ac_cv_exeext" >&6; } +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf '%s\n' "$ac_ct_CC" >&6; } +else + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf '%s\n' "no" >&6; } +fi -rm -f conftest.$ac_ext -EXEEXT=$ac_cv_exeext -ac_exeext=$EXEEXT -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main (void) -{ -FILE *f = fopen ("conftest.out", "w"); - if (!f) - return 1; - return ferror (f) || fclose (f) != 0; - ; - return 0; -} -_ACEOF -ac_clean_files="$ac_clean_files conftest.out" -# Check that the compiler produces executables we can run. If not, either -# the compiler is broken, or we cross compile. -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 -printf %s "checking whether we are cross compiling... " >&6; } -if test "$cross_compiling" != yes; then - { { ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf '%s\n' "$ac_try_echo"; } >&5 - (eval "$ac_link") 2>&5 - ac_status=$? - printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } - if { ac_try='./conftest$ac_cv_exeext' - { { case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -printf '%s\n' "$ac_try_echo"; } >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - cross_compiling=no + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" else - if test "$cross_compiling" = maybe; then - cross_compiling=yes - else - { { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error 77 "cannot run C compiled programs. -If you meant to cross compile, use '--host'. -See 'config.log' for more details" "$LINENO" 5; } - fi + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf '%s\n' "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC fi fi -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 -printf '%s\n' "$cross_compiling" >&6; } -rm -f conftest.$ac_ext conftest$ac_cv_exeext \ - conftest.o conftest.obj conftest.out -ac_clean_files=$ac_clean_files_save -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 -printf %s "checking for suffix of object files... " >&6; } -if test ${ac_cv_objext+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -int -main (void) -{ +test -z "$CC" && { { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See 'config.log' for more details" "$LINENO" 5; } - ; - return 0; -} -_ACEOF -rm -f conftest.o conftest.obj -if { { ac_try="$ac_compile" +# Provide some information about the compiler. +printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion -version; do + { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" printf '%s\n' "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err printf '%s\n' "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } -then : - for ac_file in conftest.o conftest.obj conftest.*; do - test -f "$ac_file" || continue; - case $ac_file in - *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; - *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` - break;; - esac done -else case e in #( - e) printf '%s\n' "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 -{ { printf '%s\n' "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 -printf '%s\n' "$as_me: error: in '$ac_pwd':" >&2;} -as_fn_error $? "cannot compute suffix of object files: cannot compile -See 'config.log' for more details" "$LINENO" 5; } ;; -esac -fi -rm -f conftest.$ac_cv_objext conftest.$ac_ext ;; -esac -fi -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 -printf '%s\n' "$ac_cv_objext" >&6; } -OBJEXT=$ac_cv_objext -ac_objext=$OBJEXT { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 printf %s "checking whether the compiler supports GNU C... " >&6; } if test ${ac_cv_c_compiler_gnu+y} @@ -6774,8 +7961,7 @@ esac for db_knr_flag in -Wno-deprecated-non-prototype -Wno-knr-promoted-parameter; do as_db_knr_cv=`printf '%s\n' "db_cv_cflag$db_knr_flag" | sed "$as_sed_sh"` - -{ printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts $db_knr_flag" >&5 + { printf '%s\n' "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts $db_knr_flag" >&5 printf %s "checking whether $CC accepts $db_knr_flag... " >&6; } if eval test \${$as_db_knr_cv+y} then : @@ -11549,35 +12735,6 @@ printf '%s\n' "$lt_cv_ld_force_load" >&6; } ;; esac -ac_header= ac_cache= -for ac_item in $ac_header_c_list -do - if test $ac_cache; then - ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" - if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then - printf '%s\n' "#define $ac_item 1" >> confdefs.h - fi - ac_header= ac_cache= - elif test $ac_header; then - ac_cache=$ac_item - else - ac_header=$ac_item - fi -done - - - - - - - - -if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes -then : - -printf '%s\n' "#define STDC_HEADERS 1" >>confdefs.h - -fi ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default " if test "x$ac_cv_header_dlfcn_h" = xyes @@ -19173,7 +20330,7 @@ else case e in #( JAVA_TEST=Test.java CLASS_TEST=Test.class cat << \EOF > $JAVA_TEST -/* #line 19176 "configure" */ +/* #line 20333 "configure" */ public class Test { } EOF @@ -19468,7 +20625,7 @@ EOF if uudecode$EXEEXT Test.uue; then ac_cv_prog_uudecode_base64=yes else - echo "configure: 19471: uudecode had trouble decoding base 64 file 'Test.uue'" >&5 + echo "configure: 20628: uudecode had trouble decoding base 64 file 'Test.uue'" >&5 echo "configure: failed file was:" >&5 cat Test.uue >&5 ac_cv_prog_uudecode_base64=no @@ -19600,7 +20757,7 @@ else case e in #( JAVA_TEST=Test.java CLASS_TEST=Test.class cat << \EOF > $JAVA_TEST -/* #line 19603 "configure" */ +/* #line 20760 "configure" */ public class Test { } EOF @@ -19637,7 +20794,7 @@ JAVA_TEST=Test.java CLASS_TEST=Test.class TEST=Test cat << \EOF > $JAVA_TEST -/* [#]line 19640 "configure" */ +/* [#]line 20797 "configure" */ public class Test { public static void main (String args[]) { System.exit (0); diff --git a/dist/configure.ac b/dist/configure.ac index 2612abdd3..68f7ad48d 100644 --- a/dist/configure.ac +++ b/dist/configure.ac @@ -175,6 +175,41 @@ fi # Respect the environment LIBS settings LIBSO_LIBS="$LIBS" +# Asynchronous I/O backends for the buffer pool (os_aio), selected at +# runtime in preference order: io_uring (Linux) > IOCP (Windows) > +# kqueue+aio (BSD) > POSIX aio (Solaris/illumos, macOS) > thread-pool +# offload (last-resort fallback). All are optional -- without any, os_aio +# falls back to synchronous I/O. +AC_CHECK_HEADER(liburing.h, [ + AC_CHECK_LIB(uring, io_uring_queue_init, [ + AC_DEFINE(HAVE_IO_URING, + 1, [Define to 1 to use the Linux io_uring AIO backend.]) + LIBSO_LIBS="$LIBSO_LIBS -luring" + LIBS="$LIBS -luring"])]) + +# POSIX aio: aio_read/aio_write may live in libc (BSD/macOS) or librt +# (Solaris/illumos, glibc). Record the library so os_aio_posix links. +AC_CHECK_HEADER(aio.h, [ + AC_SEARCH_LIBS(aio_read, [rt aio], [ + AC_DEFINE(HAVE_AIO_POSIX, + 1, [Define to 1 to use the POSIX aio backend.])])]) + +# kqueue+aio (BSD, notably FreeBSD): needs EVFILT_AIO and, crucially, the +# sigev_notify_kqueue member of struct sigevent so a completed aiocb posts a +# kevent. macOS has EVFILT_AIO but lacks sigev_notify_kqueue, so it is +# excluded here and uses the POSIX aio backend instead. +AC_CHECK_MEMBER([struct sigevent.sigev_notify_kqueue], + [AC_CHECK_DECL([EVFILT_AIO], [ + AC_DEFINE(HAVE_AIO_KQUEUE, + 1, [Define to 1 to use the BSD kqueue+aio backend.])], + [], [[#include ]])], + [], [[#include +#include ]]) + +AC_CHECK_HEADER(pthread.h, [ + AC_DEFINE(HAVE_AIO_THREADPOOL, + 1, [Define to 1 to use the thread-pool AIO offload backend.])]) + # This is where we handle stuff that autoconf can't handle: compiler, # preprocessor and load flags, libraries that the standard tests don't # look for. diff --git a/dist/pubdef.in b/dist/pubdef.in index 266f3aec0..fd25c6570 100644 --- a/dist/pubdef.in +++ b/dist/pubdef.in @@ -293,6 +293,7 @@ DB_MEM_LOCKER D I J C DB_MEM_LOGID D I J C DB_MEM_TRANSACTION D I J C DB_MEM_THREAD D I J C +DB_MPOOL_AIO D I * * DB_MPOOL_CREATE D I * * DB_MPOOL_DIRTY D I * * DB_MPOOL_TRY D I * * diff --git a/dist/srcfiles.in b/dist/srcfiles.in index 1e4d2c184..48dbe770a 100644 --- a/dist/srcfiles.in +++ b/dist/srcfiles.in @@ -253,6 +253,12 @@ src/mutex/mut_win32.c ce_small src/os/os_abort.c android vx vxsmall src/os/os_abs.c android src/os/os_addrinfo.c vx +src/os/os_aio.c android vx vxsmall +src/os/os_aio_uring.c android vx vxsmall +src/os/os_aio_posix.c android vx vxsmall +src/os/os_aio_kqueue.c android vx vxsmall +src/os/os_aio_pool.c android vx vxsmall +src/os/os_aio_iocp.c android vx vxsmall src/os/os_alloc.c android vx vxsmall src/os/os_clock.c android vx vxsmall src/os/os_config.c android diff --git a/docs/api_reference/C/envset_flags.html b/docs/api_reference/C/envset_flags.html index 06cbf0e4c..17f41c204 100644 --- a/docs/api_reference/C/envset_flags.html +++ b/docs/api_reference/C/envset_flags.html @@ -227,6 +227,38 @@

flags

The DB_DSYNC_DB flag may be used to configure Berkeley DB at any time during the life of the application. +

+ +
  • +

    + DB_MPOOL_AIO +

    +

    + If set, the Berkeley DB memory pool performs buffer-pool + writeback (checkpoint, sync, and trickle) asynchronously, + using a per-process native asynchronous I/O engine + (io_uring, IOCP, POSIX aio, or a portable thread-pool + fallback) when one is available. When this flag is not set + — the default — or when no asynchronous I/O backend + is configured for the platform, writeback is performed + synchronously exactly as in prior releases. Foreground + buffer eviction is always synchronous. +

    +

    + Asynchronous writeback can reduce stalls under write-heavy or + cache-pressure workloads on storage that benefits from + multiple outstanding I/Os; on a single slow device it may not + help, so it is opt-in. +

    +

    + Calling DB_ENV->set_flags() with the DB_MPOOL_AIO flag only + affects the specified DB_ENV handle (and any other + Berkeley DB handles opened within the scope of that handle). + The asynchronous I/O context is private to each process. +

    +

    + The DB_MPOOL_AIO flag may be used to configure Berkeley DB at any + time during the life of the application.

  • diff --git a/docs/api_reference/CXX/envset_flags.html b/docs/api_reference/CXX/envset_flags.html index dc7a65829..f868a26f3 100644 --- a/docs/api_reference/CXX/envset_flags.html +++ b/docs/api_reference/CXX/envset_flags.html @@ -229,6 +229,33 @@

    flags

    The DB_DSYNC_DB flag may be used to configure Berkeley DB at any time during the life of the application. +

    +
  • +
  • +

    + DB_MPOOL_AIO +

    +

    + If set, the Berkeley DB memory pool performs buffer-pool + writeback (checkpoint, sync, and trickle) asynchronously, + using a per-process native asynchronous I/O engine + (io_uring, IOCP, POSIX aio, or a portable thread-pool + fallback) when one is available. When this flag is not set + — the default — or when no asynchronous I/O backend + is configured for the platform, writeback is performed + synchronously exactly as in prior releases. Foreground + buffer eviction is always synchronous. +

    +

    + Asynchronous writeback can reduce stalls under write-heavy or + cache-pressure workloads on storage that benefits from + multiple outstanding I/Os; on a single slow device it may not + help, so it is opt-in. The asynchronous I/O context is + private to each process. +

    +

    + The DB_MPOOL_AIO flag may be used to configure Berkeley DB at any + time during the life of the application.

  • diff --git a/flake.nix b/flake.nix index f6db501e2..53009cd2e 100644 --- a/flake.nix +++ b/flake.nix @@ -28,6 +28,9 @@ inherit version; src = ./.; enableParallelBuilding = true; + buildInputs = pkgs.lib.optionals pkgs.stdenv.isLinux [ + pkgs.liburing # io_uring AIO backend + ]; configurePhase = '' runHook preConfigure cd build_unix @@ -45,6 +48,8 @@ pkgs.meson pkgs.ninja pkgs.python3 pkgs.pkg-config pkgs.gcc pkgs.clang pkgs.autoconf pkgs.gnumake pkgs.tcl # for the TCL test harness (--enable-test) + ] ++ pkgs.lib.optionals pkgs.stdenv.isLinux [ + pkgs.liburing # Linux io_uring AIO backend (HAVE_IO_URING) ]; shellHook = '' echo "libdb dev shell — Meson: 'meson setup build && ninja -C build'" diff --git a/lang/csharp/src/Internal/DbConstants.cs b/lang/csharp/src/Internal/DbConstants.cs index aeeafcb6e..f333a8970 100644 --- a/lang/csharp/src/Internal/DbConstants.cs +++ b/lang/csharp/src/Internal/DbConstants.cs @@ -158,19 +158,19 @@ internal class DbConstants { internal const uint DB_NEXT_DUP = 17; internal const uint DB_NEXT_NODUP = 18; internal const uint DB_NODUPDATA = 19; - internal const uint DB_NOLOCKING = 0x00002000; + internal const uint DB_NOLOCKING = 0x00004000; internal const uint DB_NOMMAP = 0x00000010; internal const uint DB_NOORDERCHK = 0x00000002; internal const uint DB_NOOVERWRITE = 20; - internal const uint DB_NOPANIC = 0x00004000; + internal const uint DB_NOPANIC = 0x00008000; internal const int DB_NOSERVER = -30989; internal const uint DB_NOSYNC = 0x00000001; internal const int DB_NOTFOUND = -30988; internal const int DB_OLD_VERSION = -30987; internal const uint DB_ORDERCHKONLY = 0x00000004; - internal const uint DB_OVERWRITE = 0x00008000; + internal const uint DB_OVERWRITE = 0x00010000; internal const int DB_PAGE_NOTFOUND = -30986; - internal const uint DB_PANIC_ENVIRONMENT = 0x00010000; + internal const uint DB_PANIC_ENVIRONMENT = 0x00020000; internal const uint DB_POSITION = 22; internal const uint DB_PREV = 23; internal const uint DB_PREV_DUP = 24; @@ -190,7 +190,7 @@ internal class DbConstants { internal const uint DB_RECNUM = 0x00000040; internal const uint DB_RECOVER = 0x00000002; internal const uint DB_RECOVER_FATAL = 0x00020000; - internal const uint DB_REGION_INIT = 0x00020000; + internal const uint DB_REGION_INIT = 0x00040000; internal const uint DB_REGISTER = 0x00040000; internal const uint DB_RENUMBER = 0x00000080; internal const int DB_REPMGR_ACKS_ALL = 1; @@ -265,7 +265,7 @@ internal class DbConstants { internal const uint DB_SYSTEM_MEM = 0x00080000; internal const uint DB_THREAD = 0x00000020; internal const int DB_TIMEOUT = -30971; - internal const uint DB_TIME_NOTGRANTED = 0x00040000; + internal const uint DB_TIME_NOTGRANTED = 0x00080000; internal const uint DB_TRUNCATE = 0x00020000; internal const uint DB_TXN_ABORT = 0; internal const uint DB_TXN_APPLY = 1; @@ -279,7 +279,7 @@ internal class DbConstants { internal const uint DB_TXN_SNAPSHOT = 0x00000004; internal const uint DB_TXN_SYNC = 0x00000008; internal const uint DB_TXN_TOKEN_SIZE = 20; - internal const uint DB_TXN_WAIT = 0x00000080; + internal const uint DB_TXN_WAIT = 0x00000100; internal const uint DB_TXN_WRITE_NOSYNC = 0x00000020; internal const uint DB_UNKNOWN = 5; internal const uint DB_UPGRADE = 0x00000001; @@ -313,12 +313,12 @@ internal class DbConstants { internal const uint DB_VERSION_MINOR = 3; internal const string DB_VERSION_MINOR_STR = "3"; internal const int DB_VERSION_MISMATCH = -30969; - internal const uint DB_VERSION_PATCH = 28; - internal const string DB_VERSION_PATCH_STR = "28"; - internal const string DB_VERSION_STRING = "Berkeley DB 5.3.28: September 9 2013 "; - internal const string DB_VERSION_FULL_STRING = "Berkeley DB 11g Release 2 library version 11.2.5.3.28: September 9 2013 "; + internal const uint DB_VERSION_PATCH = 29; + internal const string DB_VERSION_PATCH_STR = "29"; + internal const string DB_VERSION_STRING = "Berkeley DB 5.3.29: September 9 2013 "; + internal const string DB_VERSION_FULL_STRING = "Berkeley DB 11g Release 2 library version 11.2.5.3.29: September 9 2013 "; internal const uint DB_WRITECURSOR = 0x00000010; - internal const uint DB_YIELDCPU = 0x00080000; + internal const uint DB_YIELDCPU = 0x00100000; internal const uint DB_USERCOPY_GETDATA = 0x00000001; internal const uint DB_USERCOPY_SETDATA = 0x00000002; } diff --git a/lang/java/src/com/sleepycat/db/internal/DbConstants.java b/lang/java/src/com/sleepycat/db/internal/DbConstants.java index b5f88a953..e89608ddd 100644 --- a/lang/java/src/com/sleepycat/db/internal/DbConstants.java +++ b/lang/java/src/com/sleepycat/db/internal/DbConstants.java @@ -129,16 +129,16 @@ public interface DbConstants int DB_NEXT_DUP = 17; int DB_NEXT_NODUP = 18; int DB_NODUPDATA = 19; - int DB_NOLOCKING = 0x00002000; + int DB_NOLOCKING = 0x00004000; int DB_NOMMAP = 0x00000010; int DB_NOORDERCHK = 0x00000002; int DB_NOOVERWRITE = 20; - int DB_NOPANIC = 0x00004000; + int DB_NOPANIC = 0x00008000; int DB_NOSYNC = 0x00000001; int DB_NOTFOUND = -30988; int DB_ORDERCHKONLY = 0x00000004; - int DB_OVERWRITE = 0x00008000; - int DB_PANIC_ENVIRONMENT = 0x00010000; + int DB_OVERWRITE = 0x00010000; + int DB_PANIC_ENVIRONMENT = 0x00020000; int DB_POSITION = 22; int DB_PREV = 23; int DB_PREV_DUP = 24; @@ -158,7 +158,7 @@ public interface DbConstants int DB_RECNUM = 0x00000040; int DB_RECOVER = 0x00000002; int DB_RECOVER_FATAL = 0x00020000; - int DB_REGION_INIT = 0x00020000; + int DB_REGION_INIT = 0x00040000; int DB_REGISTER = 0x00040000; int DB_RENUMBER = 0x00000080; int DB_REPMGR_ACKS_ALL = 1; @@ -218,7 +218,7 @@ public interface DbConstants int DB_SYSTEM_MEM = 0x00080000; int DB_THREAD = 0x00000020; int DB_TIMEOUT = -30971; - int DB_TIME_NOTGRANTED = 0x00040000; + int DB_TIME_NOTGRANTED = 0x00080000; int DB_TRUNCATE = 0x00020000; int DB_TXN_ABORT = 0; int DB_TXN_APPLY = 1; @@ -232,7 +232,7 @@ public interface DbConstants int DB_TXN_SNAPSHOT = 0x00000004; int DB_TXN_SYNC = 0x00000008; int DB_TXN_TOKEN_SIZE = 20; - int DB_TXN_WAIT = 0x00000080; + int DB_TXN_WAIT = 0x00000100; int DB_TXN_WRITE_NOSYNC = 0x00000020; int DB_UNKNOWN = 5; int DB_UPGRADE = 0x00000001; @@ -258,9 +258,9 @@ public interface DbConstants int DB_VERIFY = 0x00000002; int DB_VERSION_MAJOR = 5; int DB_VERSION_MINOR = 3; - int DB_VERSION_PATCH = 28; + int DB_VERSION_PATCH = 29; int DB_WRITECURSOR = 0x00000010; - int DB_YIELDCPU = 0x00080000; + int DB_YIELDCPU = 0x00100000; } // end of DbConstants.java diff --git a/meson.build b/meson.build index 9e1839260..d483867ce 100644 --- a/meson.build +++ b/meson.build @@ -322,6 +322,12 @@ libdb_sources = files( 'src/os/os_abort.c', 'src/os/os_abs.c', 'src/os/os_addrinfo.c', + 'src/os/os_aio.c', + 'src/os/os_aio_iocp.c', + 'src/os/os_aio_kqueue.c', + 'src/os/os_aio_pool.c', + 'src/os/os_aio_posix.c', + 'src/os/os_aio_uring.c', 'src/os/os_alloc.c', 'src/os/os_atomic.c', 'src/os/os_clock.c', diff --git a/src/dbinc/db.in b/src/dbinc/db.in index 6ec48dbe7..b8a24e623 100644 --- a/src/dbinc/db.in +++ b/src/dbinc/db.in @@ -2420,6 +2420,7 @@ struct __db_env { #define DB_ENV_YIELDCPU 0x00020000 /* DB_YIELDCPU set */ #define DB_ENV_HOTBACKUP 0x00040000 /* DB_HOTBACKUP_IN_PROGRESS set */ #define DB_ENV_NOFLUSH 0x00080000 /* DB_NOFLUSH set */ +#define DB_ENV_MPOOL_AIO 0x00100000 /* DB_MPOOL_AIO set */ u_int32_t flags; /* DB_ENV PUBLIC HANDLE LIST BEGIN */ diff --git a/src/dbinc/mp.h b/src/dbinc/mp.h index aaa08f903..d4df2a9c8 100644 --- a/src/dbinc/mp.h +++ b/src/dbinc/mp.h @@ -75,6 +75,13 @@ struct __db_mpool { */ ENV *env; /* Enclosing environment. */ REGINFO *reginfo; /* Underlying cache regions. */ + + /* + * Per-process asynchronous-I/O context for buffer-pool writeback + * (checkpoint/sync). NULL if no async backend is available, in which + * case writeback is synchronous. Owned by this process. + */ + struct __db_aio_context *aio_ctx; }; /* @@ -751,5 +758,35 @@ struct __bh_frozen_a { } #endif +/* + * MEMP_PGW -- + * Page-write state shared across the prep / I-O / finish phases so a + * write can be issued synchronously or asynchronously from one copy of + * the WAL-ordering, pgout, and bookkeeping logic. + * MEMP_AIO_W -- + * One asynchronous checkpoint write in flight: the prep state plus the + * pinned buffer and held file-handle reference to release on completion. + */ +typedef struct __memp_pgw { + ENV *env; + DB_MPOOLFILE *dbmfp; + DB_MPOOL_HASH *hp; + BH *bhp; + MPOOLFILE *mfp; + void *buf; /* page image to write (bhp->buf or copy) */ + int writers_inced; /* mfp->writers was incremented */ +} MEMP_PGW; + +typedef struct __memp_aio_w { + MEMP_PGW ctx; /* prep state */ + BH *bhp; /* pinned buffer (ref + shared mtx_buf) */ + DB_MPOOLFILE *dbmfp; /* held file-handle reference */ + int opened; /* dbmfp opened here (close on finish) */ + volatile int io_ret; /* write result (set by completion) */ + volatile int done; /* completion observed */ +} MEMP_AIO_W; + +#define MEMP_AIO_WINDOW 16 /* max checkpoint writes in flight */ + #include "dbinc_auto/mp_ext.h" #endif /* !_DB_MP_H_ */ diff --git a/src/dbinc/os_aio.h b/src/dbinc/os_aio.h new file mode 100644 index 000000000..8abe017dc --- /dev/null +++ b/src/dbinc/os_aio.h @@ -0,0 +1,128 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Asynchronous I/O abstraction (Stage 2). + * + * A thin, pluggable async-I/O layer used by the buffer pool to (a) prefetch + * pages and (b) trickle dirty pages to disk in the background without blocking + * a foreground thread on a device write. The default backend is synchronous + * (submit performs the I/O inline and the completion runs immediately), so the + * abstraction is behaviour-preserving everywhere; platform backends + * (Linux io_uring, BSD/macOS kqueue+aio, Windows IOCP, POSIX aio) override it. + * + * Per-process: an AIO context is owned by the process that created it; in a + * multi-process environment a page marked in-transit by one process is reaped + * only by that process, and other processes fall back to the existing + * in-transit buffer wait (mtx_buf). + */ +#ifndef _DB_OS_AIO_H_ +#define _DB_OS_AIO_H_ + +#if defined(__cplusplus) +extern "C" { +#endif + +struct __db_aio_context; typedef struct __db_aio_context DB_AIO_CONTEXT; +struct __db_aio_op; typedef struct __db_aio_op DB_AIO_OP; + +/* + * Completion callback: invoked (in the reaping thread, or inline for the + * synchronous backend) when an op finishes. "ret" is 0 on success or an + * errno. "cookie" is the caller's opaque pointer (the buffer header). + */ +typedef void (*db_aio_done_fn) __P((ENV *, void *cookie, int ret)); + +/* One outstanding async I/O. */ +struct __db_aio_op { + int op; /* DB_IO_READ / DB_IO_WRITE. */ + DB_FH *fhp; /* Target file. */ + db_pgno_t pgno; /* Page number. */ + u_int32_t pagesize; /* Bytes. */ + void *buf; /* Data buffer (page-aligned). */ + void *cookie; /* Caller context (BH *). */ + db_aio_done_fn done; /* Completion callback. */ +}; + +/* + * Backend vtable. A backend implements submit/reap/cancel; the generic layer + * owns the context lifecycle and the synchronous fallback. + */ +typedef struct __db_aio_backend { + const char *name; + int (*submit) __P((ENV *, DB_AIO_CONTEXT *, DB_AIO_OP *)); + /* Reap up to max completions; -1 max means "all ready". */ + int (*reap) __P((ENV *, DB_AIO_CONTEXT *, int max, int wait)); + int (*cancel) __P((ENV *, DB_AIO_CONTEXT *)); + int (*destroy) __P((ENV *, DB_AIO_CONTEXT *)); +} DB_AIO_BACKEND; + +/* + * AIO context. Owned by the process that created it. A NULL backend + * means the synchronous fallback (see os_aio.c); a platform backend + * installs its vtable and per-context state via priv. + */ +struct __db_aio_context { + const DB_AIO_BACKEND *backend; /* NULL = synchronous fallback. */ + void *priv; /* Backend-private state. */ + u_int32_t depth; /* Requested queue depth. */ + u_int32_t inflight; /* Ops submitted, not yet reaped. */ +}; + +/* + * PUBLIC: int __os_aio_create __P((ENV *, u_int32_t, DB_AIO_CONTEXT **)); + * PUBLIC: int __os_aio_submit __P((ENV *, DB_AIO_CONTEXT *, DB_AIO_OP *)); + * PUBLIC: int __os_aio_reap __P((ENV *, DB_AIO_CONTEXT *, int, int)); + * PUBLIC: int __os_aio_destroy __P((ENV *, DB_AIO_CONTEXT *)); + * PUBLIC: int __os_aio_available __P((ENV *)); + */ +int __os_aio_create __P((ENV *, u_int32_t, DB_AIO_CONTEXT **)); +int __os_aio_submit __P((ENV *, DB_AIO_CONTEXT *, DB_AIO_OP *)); +int __os_aio_reap __P((ENV *, DB_AIO_CONTEXT *, int /*max*/, int /*wait*/)); +int __os_aio_destroy __P((ENV *, DB_AIO_CONTEXT *)); +int __os_aio_available __P((ENV *)); /* 1 if a real async backend is active */ +int __os_aio_ctx_available __P((DB_AIO_CONTEXT *)); /* per-context async? */ + +/* + * PUBLIC: int __os_aio_uring_init __P((ENV *, DB_AIO_CONTEXT *)); + * Install the Linux io_uring backend on a context (HAVE_IO_URING + * builds only). Returns 0 and sets ctx->backend on success, or a + * non-zero error leaving the context on the synchronous fallback. + */ +int __os_aio_uring_init __P((ENV *, DB_AIO_CONTEXT *)); + +/* + * PUBLIC: int __os_aio_posix_init __P((ENV *, DB_AIO_CONTEXT *)); + * Install the POSIX.1b aio backend (aio_read/aio_write + aio_suspend) on + * a context (HAVE_AIO_POSIX builds only). Native async path on + * Solaris/illumos and macOS; portable fallback ahead of the thread pool. + */ +int __os_aio_posix_init __P((ENV *, DB_AIO_CONTEXT *)); + +/* + * PUBLIC: int __os_aio_kqueue_init __P((ENV *, DB_AIO_CONTEXT *)); + * Install the BSD kqueue + aio backend (EVFILT_AIO completions) on a + * context (HAVE_AIO_KQUEUE builds only; FreeBSD/BSD, not macOS). + */ +int __os_aio_kqueue_init __P((ENV *, DB_AIO_CONTEXT *)); + +/* + * PUBLIC: int __os_aio_pool_init __P((ENV *, DB_AIO_CONTEXT *)); + * Install the portable thread-pool offload backend (HAVE_AIO_THREADPOOL + * builds only). Returns 0 and sets ctx->backend on success. + */ +int __os_aio_pool_init __P((ENV *, DB_AIO_CONTEXT *)); + +/* + * PUBLIC: int __os_aio_iocp_init __P((ENV *, DB_AIO_CONTEXT *)); + * Install the Windows IOCP native file-AIO backend (HAVE_IOCP builds + * only). Returns 0 and sets ctx->backend on success. + */ +int __os_aio_iocp_init __P((ENV *, DB_AIO_CONTEXT *)); + +/* Queue depth requested at create time; backends may clamp. */ +#define DB_AIO_DEFAULT_DEPTH 64 + +#if defined(__cplusplus) +} +#endif +#endif /* !_DB_OS_AIO_H_ */ diff --git a/src/dbinc_auto/api_flags.in b/src/dbinc_auto/api_flags.in index 43b9185c6..9c0e6c626 100644 --- a/src/dbinc_auto/api_flags.in +++ b/src/dbinc_auto/api_flags.in @@ -85,6 +85,7 @@ #define DB_LOG_VERIFY_WARNING 0x00000080 #define DB_LOG_WRNOSYNC 0x00000020 #define DB_LOG_ZERO 0x00000010 +#define DB_MPOOL_AIO 0x00001000 #define DB_MPOOL_CREATE 0x00000001 #define DB_MPOOL_DIRTY 0x00000002 #define DB_MPOOL_DISCARD 0x00000001 @@ -106,18 +107,18 @@ #define DB_MUTEX_SELF_BLOCK 0x00000010 #define DB_MUTEX_SHARED 0x00000020 #define DB_NOERROR 0x00004000 -#define DB_NOFLUSH 0x00001000 -#define DB_NOLOCKING 0x00002000 +#define DB_NOFLUSH 0x00002000 +#define DB_NOLOCKING 0x00004000 #define DB_NOMMAP 0x00000010 #define DB_NOORDERCHK 0x00000002 -#define DB_NOPANIC 0x00004000 +#define DB_NOPANIC 0x00008000 #define DB_NOSYNC 0x00000001 #define DB_NO_AUTO_COMMIT 0x00008000 #define DB_NO_CHECKPOINT 0x00008000 #define DB_ODDFILESIZE 0x00000080 #define DB_ORDERCHKONLY 0x00000004 -#define DB_OVERWRITE 0x00008000 -#define DB_PANIC_ENVIRONMENT 0x00010000 +#define DB_OVERWRITE 0x00010000 +#define DB_PANIC_ENVIRONMENT 0x00020000 #define DB_PRINTABLE 0x00000008 #define DB_PRIVATE 0x00010000 #define DB_PR_PAGE 0x00000010 @@ -129,7 +130,7 @@ #define DB_RECNUM 0x00000040 #define DB_RECOVER 0x00000002 #define DB_RECOVER_FATAL 0x00020000 -#define DB_REGION_INIT 0x00020000 +#define DB_REGION_INIT 0x00040000 #define DB_REGISTER 0x00040000 #define DB_RENUMBER 0x00000080 #define DB_REPMGR_CONF_2SITE_STRICT 0x00000001 @@ -187,7 +188,7 @@ #define DB_ST_TOPLEVEL 0x00010000 #define DB_SYSTEM_MEM 0x00080000 #define DB_THREAD 0x00000020 -#define DB_TIME_NOTGRANTED 0x00040000 +#define DB_TIME_NOTGRANTED 0x00080000 #define DB_TRUNCATE 0x00020000 #define DB_TXN_BULK 0x00000010 #define DB_TXN_FAMILY 0x00000040 @@ -226,4 +227,4 @@ #define DB_WRITELOCK 0x00000020 #define DB_WRITEOPEN 0x00040000 #define DB_XA_CREATE 0x00000001 -#define DB_YIELDCPU 0x00080000 +#define DB_YIELDCPU 0x00100000 diff --git a/src/dbinc_auto/mp_ext.h b/src/dbinc_auto/mp_ext.h index 8df869c35..699978d21 100644 --- a/src/dbinc_auto/mp_ext.h +++ b/src/dbinc_auto/mp_ext.h @@ -13,6 +13,8 @@ int __memp_backup_mpf __P((ENV *, DB_MPOOLFILE *, DB_THREAD_INFO *, db_pgno_t, d int __memp_backup_close __P((ENV *, DB_MPOOLFILE *, const char *, DB_FH *, void *HANDLE)); int __memp_failchk __P((ENV *)); int __memp_bhwrite __P((DB_MPOOL *, DB_MPOOL_HASH *, MPOOLFILE *, BH *, int)); +int __memp_bhwrite_async __P((DB_MPOOL *, DB_MPOOL_HASH *, MPOOLFILE *, BH *, struct __db_aio_context *, MEMP_AIO_W *, int *)); +int __memp_aio_drain __P((ENV *, DB_MPOOL *, struct __db_aio_context *, MEMP_AIO_W *, int)); int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); int __memp_pg __P((DB_MPOOLFILE *, db_pgno_t, void *, int)); int __memp_bhfree __P((DB_MPOOL *, REGINFO *, MPOOLFILE *, DB_MPOOL_HASH *, BH *, u_int32_t)); diff --git a/src/env/env_config.c b/src/env/env_config.c index 1b1509d9b..bb736e607 100644 --- a/src/env/env_config.c +++ b/src/env/env_config.c @@ -179,6 +179,7 @@ static const FN config_set_flags[] = { { DB_CDB_ALLDB, "db_cdb_alldb" }, { DB_DIRECT_DB, "db_direct_db" }, { DB_DSYNC_DB, "db_dsync_db" }, + { DB_MPOOL_AIO, "db_mpool_aio" }, { DB_MULTIVERSION, "db_multiversion" }, { DB_NOLOCKING, "db_nolocking" }, { DB_NOMMAP, "db_nommap" }, diff --git a/src/env/env_method.c b/src/env/env_method.c index d79bcf7d0..6e0b152e4 100644 --- a/src/env/env_method.c +++ b/src/env/env_method.c @@ -827,6 +827,7 @@ const FLAG_MAP EnvMap[] = { { DB_DIRECT_DB, DB_ENV_DIRECT_DB }, { DB_DSYNC_DB, DB_ENV_DSYNC_DB }, { DB_HOTBACKUP_IN_PROGRESS, DB_ENV_HOTBACKUP }, + { DB_MPOOL_AIO, DB_ENV_MPOOL_AIO }, { DB_MULTIVERSION, DB_ENV_MULTIVERSION }, { DB_NOFLUSH, DB_ENV_NOFLUSH }, { DB_NOLOCKING, DB_ENV_NOLOCKING }, @@ -939,7 +940,8 @@ __env_set_flags(dbenv, flags, on) #define OK_FLAGS \ (DB_AUTO_COMMIT | DB_CDB_ALLDB | DB_DATABASE_LOCKING | \ - DB_DIRECT_DB | DB_DSYNC_DB | DB_MULTIVERSION | \ + DB_DIRECT_DB | DB_DSYNC_DB | DB_MPOOL_AIO | \ + DB_MULTIVERSION | \ DB_NOLOCKING | DB_NOMMAP | DB_NOPANIC | DB_OVERWRITE | \ DB_PANIC_ENVIRONMENT | DB_REGION_INIT | \ DB_TIME_NOTGRANTED | DB_TXN_NOSYNC | DB_TXN_NOWAIT | \ diff --git a/src/mp/mp_bh.c b/src/mp/mp_bh.c index de057742a..6753dfc90 100644 --- a/src/mp/mp_bh.c +++ b/src/mp/mp_bh.c @@ -11,6 +11,7 @@ #include "db_int.h" #include "dbinc/db_page.h" /* Required for diagnostic code. */ #include "dbinc/mp.h" +#include "dbinc/os_aio.h" #include "dbinc/log.h" #include "dbinc/txn.h" @@ -300,87 +301,57 @@ err: return (ret); } /* - * __memp_pgwrite -- - * Write a page to a file. + * __memp_pgwrite_prep -- + * Everything before the page write: WAL flush, write-ahead verification, + * backup coordination, and pgout. On return *do_iop is set if there is a + * page image (c->buf) to write; otherwise the caller skips the I/O and + * calls __memp_pgwrite_finish directly. Returns 0 or an errno. */ static int -__memp_pgwrite(env, dbmfp, hp, bhp) +__memp_pgwrite_prep(env, dbmfp, hp, bhp, c, do_iop) ENV *env; DB_MPOOLFILE *dbmfp; DB_MPOOL_HASH *hp; BH *bhp; + MEMP_PGW *c; + int *do_iop; { DB_LSN lsn; - MPOOLFILE *mfp; - size_t nw; int ret; - void * buf; - /* - * Since writing does not require exclusive access, another thread - * could have already written this buffer. - */ + memset(c, 0, sizeof(*c)); + c->env = env; + c->dbmfp = dbmfp; + c->hp = hp; + c->bhp = bhp; + *do_iop = 0; + + /* Another thread could have already written this buffer. */ if (!F_ISSET(bhp, BH_DIRTY)) return (0); - mfp = dbmfp == NULL ? NULL : dbmfp->mfp; + c->mfp = dbmfp == NULL ? NULL : dbmfp->mfp; ret = 0; - buf = NULL; /* We should never be called with a frozen or trashed buffer. */ DB_ASSERT(env, !F_ISSET(bhp, BH_FROZEN | BH_TRASH)); - /* - * It's possible that the underlying file doesn't exist, either - * because of an outright removal or because it was a temporary - * file that's been closed. - * - * !!! - * Once we pass this point, we know that dbmfp and mfp aren't NULL, - * and that we have a valid file reference. - */ - if (mfp == NULL || mfp->deadfile) - goto file_dead; + /* The underlying file may not exist; finish discards the dirty page. */ + if (c->mfp == NULL || c->mfp->deadfile) + return (0); - /* - * If the page is in a file for which we have LSN information, we have - * to ensure the appropriate log records are on disk. - */ - if (LOGGING_ON(env) && mfp->lsn_off != DB_LSN_OFF_NOTSET && + /* Ensure the page's log records are on disk (WAL). */ + if (LOGGING_ON(env) && c->mfp->lsn_off != DB_LSN_OFF_NOTSET && !IS_CLIENT_PGRECOVER(env)) { - memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN)); + memcpy(&lsn, bhp->buf + c->mfp->lsn_off, sizeof(DB_LSN)); if (!IS_NOT_LOGGED_LSN(lsn) && (ret = __log_flush(env, &lsn)) != 0) - goto err; + return (ret); } #ifdef DIAGNOSTIC - /* - * Verify write-ahead logging semantics. - * - * !!! - * Two special cases. There is a single field on the meta-data page, - * the last-page-number-in-the-file field, for which we do not log - * changes. If the page was originally created in a database that - * didn't have logging turned on, we can see a page marked dirty but - * for which no corresponding log record has been written. However, - * the only way that a page can be created for which there isn't a - * previous log record and valid LSN is when the page was created - * without logging turned on, and so we check for that special-case - * LSN value. - * - * Second, when a client is reading database pages from a master - * during an internal backup, we may get pages modified after - * the current end-of-log. - */ if (LOGGING_ON(env) && !IS_NOT_LOGGED_LSN(LSN(bhp->buf)) && !IS_CLIENT_PGRECOVER(env)) { - /* - * There is a potential race here. If we are in the midst of - * switching log files, it's possible we could test against the - * old file and the new offset in the log region's LSN. If we - * fail the first test, acquire the log mutex and check again. - */ DB_LOG *dblp; LOG *lp; @@ -397,79 +368,92 @@ __memp_pgwrite(env, dbmfp, hp, bhp) #endif #ifndef HAVE_ATOMICFILEREAD - if (mfp->backup_in_progress != 0) { - MUTEX_READLOCK(env, mfp->mtx_write); - if (bhp->pgno >= mfp->low_pgno && bhp->pgno <= mfp->high_pgno) { - MUTEX_UNLOCK(env, mfp->mtx_write); - ret = EAGAIN; - goto err; + if (c->mfp->backup_in_progress != 0) { + MUTEX_READLOCK(env, c->mfp->mtx_write); + if (bhp->pgno >= c->mfp->low_pgno && + bhp->pgno <= c->mfp->high_pgno) { + MUTEX_UNLOCK(env, c->mfp->mtx_write); + return (EAGAIN); } - atomic_inc(env, &mfp->writers); - MUTEX_UNLOCK(env, mfp->mtx_write); - } else - atomic_inc(env, &mfp->writers); + atomic_inc(env, &c->mfp->writers); + c->writers_inced = 1; + MUTEX_UNLOCK(env, c->mfp->mtx_write); + } else { + atomic_inc(env, &c->mfp->writers); + c->writers_inced = 1; + } #endif /* - * Call any pgout function. If we have the page exclusive then - * we are going to reuse it otherwise make a copy of the page so - * that others can continue looking at the page while we write it. + * Call any pgout function. With the page exclusive we reuse it; + * otherwise copy it so others can read it while we write. */ - buf = bhp->buf; - if (mfp->ftype != 0) { + c->buf = bhp->buf; + if (c->mfp->ftype != 0) { if (F_ISSET(bhp, BH_EXCLUSIVE)) F_SET(bhp, BH_TRASH); else { - if ((ret = __os_malloc(env, mfp->pagesize, &buf)) != 0) - goto err; - memcpy(buf, bhp->buf, mfp->pagesize); + if ((ret = + __os_malloc(env, c->mfp->pagesize, &c->buf)) != 0) { + c->buf = NULL; + return (ret); + } + memcpy(c->buf, bhp->buf, c->mfp->pagesize); } - if ((ret = __memp_pg(dbmfp, bhp->pgno, buf, 0)) != 0) - goto err; + if ((ret = __memp_pg(dbmfp, bhp->pgno, c->buf, 0)) != 0) + return (ret); } - PERFMON3(env, mpool, write, __memp_fn(dbmfp), bhp->pgno, bhp); - /* Write the page. */ - if ((ret = __os_io(env, DB_IO_WRITE, dbmfp->fhp, bhp->pgno, - mfp->pagesize, 0, mfp->pagesize, buf, &nw)) != 0) { -#ifndef HAVE_ATOMICFILEREAD - atomic_dec(env, &mfp->writers); -#endif - __db_errx(env, DB_STR_A("3015", - "%s: write failed for page %lu", "%s %lu"), - __memp_fn(dbmfp), (u_long)bhp->pgno); - goto err; - } + *do_iop = 1; + return (0); +} + +/* + * __memp_pgwrite_finish -- + * Everything after the page write: release the backup-writer count and + * page-image copy, update statistics on a successful write, and clear + * BH_DIRTY/BH_TRASH under the hash-bucket latch. "did_io" is set if a + * write was actually issued; "io_ret" is its result (0 on success). + */ +static int +__memp_pgwrite_finish(c, did_io, io_ret) + MEMP_PGW *c; + int did_io; + int io_ret; +{ + ENV *env; + BH *bhp; + DB_MPOOL_HASH *hp; + int ret; + + env = c->env; + bhp = c->bhp; + hp = c->hp; + ret = io_ret; + #ifndef HAVE_ATOMICFILEREAD - atomic_dec(env, &mfp->writers); + if (c->writers_inced) + atomic_dec(env, &c->mfp->writers); #endif - STAT_INC_VERB(env, mpool, page_out, - mfp->stat.st_page_out, __memp_fn(dbmfp), bhp->pgno); - if (bhp->pgno > mfp->last_flushed_pgno) { - MUTEX_LOCK(env, mfp->mutex); - if (bhp->pgno > mfp->last_flushed_pgno) - mfp->last_flushed_pgno = bhp->pgno; - MUTEX_UNLOCK(env, mfp->mutex); + + if (did_io && ret == 0 && c->mfp != NULL) { + STAT_INC_VERB(env, mpool, page_out, + c->mfp->stat.st_page_out, __memp_fn(c->dbmfp), bhp->pgno); + if (bhp->pgno > c->mfp->last_flushed_pgno) { + MUTEX_LOCK(env, c->mfp->mutex); + if (bhp->pgno > c->mfp->last_flushed_pgno) + c->mfp->last_flushed_pgno = bhp->pgno; + MUTEX_UNLOCK(env, c->mfp->mutex); + } } -err: -file_dead: - if (buf != NULL && buf != bhp->buf) - __os_free(env, buf); - /* - * !!! - * Once we pass this point, dbmfp and mfp may be NULL, we may not have - * a valid file reference. - */ + if (c->buf != NULL && c->buf != bhp->buf) + __os_free(env, c->buf); /* - * Update the hash bucket statistics, reset the flags. If we were - * successful, the page is no longer dirty. Someone else may have - * also written the page so we need to latch the hash bucket here - * to get the accounting correct. Since we have the buffer - * shared it cannot be marked dirty again till we release it. - * This is the only place we update the flags field only holding - * a shared latch. + * Update the hash bucket statistics, reset the flags. On success the + * page is no longer dirty. We latch the hash bucket because this is + * the only place the flags are updated holding only a shared latch. */ if (F_ISSET(bhp, BH_DIRTY | BH_TRASH)) { MUTEX_LOCK(env, hp->mtx_hash); @@ -483,7 +467,7 @@ __memp_pgwrite(env, dbmfp, hp, bhp) /* put the page back if necessary. */ if ((ret != 0 || BH_REFCOUNT(bhp) > 1) && F_ISSET(bhp, BH_TRASH)) { - ret = __memp_pg(dbmfp, bhp->pgno, bhp->buf, 1); + ret = __memp_pg(c->dbmfp, bhp->pgno, bhp->buf, 1); F_CLR(bhp, BH_TRASH); } MUTEX_UNLOCK(env, hp->mtx_hash); @@ -492,6 +476,212 @@ __memp_pgwrite(env, dbmfp, hp, bhp) return (ret); } +/* + * __memp_pgwrite -- + * Write a page to a file. + */ +static int +__memp_pgwrite(env, dbmfp, hp, bhp) + ENV *env; + DB_MPOOLFILE *dbmfp; + DB_MPOOL_HASH *hp; + BH *bhp; +{ + MEMP_PGW c; + size_t nw; + int did_io, do_io, ret; + + ret = __memp_pgwrite_prep(env, dbmfp, hp, bhp, &c, &do_io); + did_io = 0; + if (ret == 0 && do_io) { + PERFMON3(env, mpool, write, __memp_fn(dbmfp), bhp->pgno, bhp); + did_io = 1; + if ((ret = __os_io(env, DB_IO_WRITE, dbmfp->fhp, bhp->pgno, + c.mfp->pagesize, 0, c.mfp->pagesize, c.buf, &nw)) != 0) + __db_errx(env, DB_STR_A("3015", + "%s: write failed for page %lu", "%s %lu"), + __memp_fn(dbmfp), (u_long)bhp->pgno); + } + return (__memp_pgwrite_finish(&c, did_io, ret)); +} + +/* + * __memp_aio_writeback_done -- + * os_aio completion callback for an async checkpoint write. Records the + * result only; the finish + reference release run in the reaping thread + * (via __memp_aio_drain), so no latches are taken in this callback. + */ +static void +__memp_aio_writeback_done(env, cookie, io_ret) + ENV *env; + void *cookie; + int io_ret; +{ + MEMP_AIO_W *w; + + COMPQUIET(env, NULL); + w = cookie; + w->io_ret = io_ret; + w->done = 1; +} + +/* + * __memp_aio_writeback_finish -- + * Complete one async checkpoint write: run the shared page-write finish, + * then release the held file-handle reference (mirrors __memp_bhwrite's + * tail; opened is always 0 on the async fast path). + */ +static int +__memp_aio_writeback_finish(dbmp, w) + DB_MPOOL *dbmp; + MEMP_AIO_W *w; +{ + DB_MPOOLFILE *dbmfp; + ENV *env; + MPOOLFILE *mfp; + int ret; + + env = dbmp->env; + ret = __memp_pgwrite_finish(&w->ctx, 1, w->io_ret); + + dbmfp = w->dbmfp; + mfp = dbmfp->mfp; + MUTEX_LOCK(env, dbmp->mutex); + if (!w->opened && dbmfp->ref == 1) { + if (!F_ISSET(dbmfp, MP_FLUSH)) { + F_SET(dbmfp, MP_FLUSH); + MUTEX_LOCK(env, mfp->mutex); + if (!F_ISSET(dbmfp, MP_FOR_FLUSH)) { + mfp->neutral_cnt++; + F_SET(dbmfp, MP_FOR_FLUSH); + } + MUTEX_UNLOCK(env, mfp->mutex); + } + } else + --dbmfp->ref; + MUTEX_UNLOCK(env, dbmp->mutex); + return (ret); +} + +/* + * __memp_bhwrite_async -- + * Asynchronous variant of __memp_bhwrite for the checkpoint/sync path. + * For the common case -- a durable, already-open file handle -- it + * prepares the write and submits it via os_aio, holding the buffer pin + * (the caller's ref + shared mtx_buf) and a file-handle reference until + * completion; *deferredp is set and the caller must later reap via + * __memp_aio_drain. Dead/temporary/extent/unopened/read-only files and + * the skip/error cases are handled synchronously here (*deferredp == 0). + * + * PUBLIC: int __memp_bhwrite_async __P((DB_MPOOL *, DB_MPOOL_HASH *, + * PUBLIC: MPOOLFILE *, BH *, struct __db_aio_context *, MEMP_AIO_W *, + * PUBLIC: int *)); + */ +int +__memp_bhwrite_async(dbmp, hp, mfp, bhp, aioc, w, deferredp) + DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; + MPOOLFILE *mfp; + BH *bhp; + struct __db_aio_context *aioc; + MEMP_AIO_W *w; + int *deferredp; +{ + DB_AIO_OP op; + DB_MPOOLFILE *dbmfp; + ENV *env; + size_t nw; + int do_io, ret; + + env = dbmp->env; + *deferredp = 0; + + /* Only an already-open durable handle takes the async fast path. */ + if (mfp->deadfile) + return (__memp_bhwrite(dbmp, hp, mfp, bhp, 1)); + MUTEX_LOCK(env, dbmp->mutex); + TAILQ_FOREACH(dbmfp, &dbmp->dbmfq, q) + if (dbmfp->mfp == mfp && !F_ISSET(dbmfp, MP_READONLY)) { + ++dbmfp->ref; + break; + } + MUTEX_UNLOCK(env, dbmp->mutex); + if (dbmfp == NULL || dbmfp->fhp == NULL) { + if (dbmfp != NULL) { + MUTEX_LOCK(env, dbmp->mutex); + --dbmfp->ref; + MUTEX_UNLOCK(env, dbmp->mutex); + } + return (__memp_bhwrite(dbmp, hp, mfp, bhp, 1)); + } + + /* Prepare the write (WAL flush + pgout) into the slot's context. */ + ret = __memp_pgwrite_prep(env, dbmfp, hp, bhp, &w->ctx, &do_io); + if (ret != 0 || !do_io) { + ret = __memp_pgwrite_finish(&w->ctx, 0, ret); + MUTEX_LOCK(env, dbmp->mutex); + --dbmfp->ref; + MUTEX_UNLOCK(env, dbmp->mutex); + return (ret); + } + + w->bhp = bhp; + w->dbmfp = dbmfp; + w->opened = 0; + w->io_ret = 0; + w->done = 0; + + op.op = DB_IO_WRITE; + op.fhp = dbmfp->fhp; + op.pgno = bhp->pgno; + op.pagesize = w->ctx.mfp->pagesize; + op.buf = w->ctx.buf; + op.cookie = w; + op.done = __memp_aio_writeback_done; + if ((ret = __os_aio_submit(env, aioc, &op)) != 0) { + /* Submit failed: complete the write synchronously. */ + ret = __os_io(env, DB_IO_WRITE, dbmfp->fhp, bhp->pgno, + op.pagesize, 0, op.pagesize, w->ctx.buf, &nw); + ret = __memp_pgwrite_finish(&w->ctx, 1, ret); + MUTEX_LOCK(env, dbmp->mutex); + --dbmfp->ref; + MUTEX_UNLOCK(env, dbmp->mutex); + return (ret); + } + *deferredp = 1; + return (0); +} + +/* + * __memp_aio_drain -- + * Reap all "n" outstanding async checkpoint writes, run each completion + * (BH_DIRTY clear + file-handle release), and release each buffer pin. + * Returns the number of writes completed (n). + * + * PUBLIC: int __memp_aio_drain __P((ENV *, DB_MPOOL *, + * PUBLIC: struct __db_aio_context *, MEMP_AIO_W *, int)); + */ +int +__memp_aio_drain(env, dbmp, aioc, w, n) + ENV *env; + DB_MPOOL *dbmp; + struct __db_aio_context *aioc; + MEMP_AIO_W *w; + int n; +{ + int got, j; + + for (got = 0; got < n; ) + got += __os_aio_reap(env, aioc, -1, 1); + for (j = 0; j < n; j++) { + (void)__memp_aio_writeback_finish(dbmp, &w[j]); + DB_ASSERT(env, atomic_read(&w[j].bhp->ref) > 0); + atomic_dec(env, &w[j].bhp->ref); + MUTEX_UNLOCK(env, w[j].bhp->mtx_buf); + } + return (n); +} + /* * __memp_pg -- * Call the pgin/pgout routine. diff --git a/src/mp/mp_region.c b/src/mp/mp_region.c index d5d6efce8..91f9d3eba 100644 --- a/src/mp/mp_region.c +++ b/src/mp/mp_region.c @@ -10,6 +10,7 @@ #include "db_int.h" #include "dbinc/mp.h" +#include "dbinc/os_aio.h" static int __memp_init_config __P((ENV *, MPOOL *)); static void __memp_region_size __P((ENV *, roff_t *, u_int32_t *)); @@ -170,6 +171,17 @@ __memp_open(env, create_ok) if ((ret = __memp_init_config(env, mp)) != 0) return (ret); + /* + * Best-effort per-process AIO context for asynchronous buffer-pool + * writeback, enabled only when the application requested it via + * DB_ENV->set_flags(DB_MPOOL_AIO). Default is synchronous writeback, + * so there is no behavior change unless AIO is explicitly turned on + * (and on platforms with no AIO backend, creation fails and writeback + * stays synchronous regardless). Failure is non-fatal. + */ + if (F_ISSET(env->dbenv, DB_ENV_MPOOL_AIO)) + (void)__os_aio_create(env, 0, &dbmp->aio_ctx); + return (0); err: env->mp_handle = NULL; @@ -516,6 +528,12 @@ __memp_env_refresh(env) ret = 0; dbmp = env->mp_handle; mp = dbmp->reginfo[0].primary; + + /* Tear down the per-process async-I/O writeback context, if any. */ + if (dbmp->aio_ctx != NULL) { + (void)__os_aio_destroy(env, dbmp->aio_ctx); + dbmp->aio_ctx = NULL; + } nreg = mp->nreg; hp = R_ADDR(&dbmp->reginfo[0], mp->htab); diff --git a/src/mp/mp_sync.c b/src/mp/mp_sync.c index 87497a023..d26a495e1 100644 --- a/src/mp/mp_sync.c +++ b/src/mp/mp_sync.c @@ -11,6 +11,7 @@ #include "db_int.h" #include "dbinc/log.h" #include "dbinc/mp.h" +#include "dbinc/os_aio.h" #include "dbinc/db_page.h" #include "dbinc/hash.h" @@ -301,11 +302,15 @@ __memp_sync_int(env, dbmfp, trickle_max, flags, wrote_totalp, interruptedp) u_int32_t ar_cnt, ar_max, i, n_cache, remaining, wrote_total; int32_t wrote_cnt; int dirty, filecnt, maxopenfd, required_write, ret, t_ret; + MEMP_AIO_W aiow[MEMP_AIO_WINDOW]; /* async writeback window */ + int deferred, nflight, use_aio; dbmp = env->mp_handle; mp = dbmp->reginfo[0].primary; last_mf_offset = INVALID_ROFF; filecnt = wrote_total = 0; + nflight = 0; + use_aio = dbmp->aio_ctx != NULL && __os_aio_ctx_available(dbmp->aio_ctx); if (wrote_totalp != NULL) *wrote_totalp = 0; @@ -557,10 +562,18 @@ __memp_sync_int(env, dbmfp, trickle_max, flags, wrote_totalp, interruptedp) * If the buffer is dirty, we write it. We only try to * write the buffer once. */ + deferred = 0; if (F_ISSET(bhp, BH_DIRTY)) { mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); - if ((t_ret = - __memp_bhwrite(dbmp, hp, mfp, bhp, 1)) == 0) { + if (use_aio) + t_ret = __memp_bhwrite_async(dbmp, hp, mfp, + bhp, dbmp->aio_ctx, &aiow[nflight], + &deferred); + else + t_ret = __memp_bhwrite(dbmp, hp, mfp, bhp, 1); + if (deferred) + ++nflight; + else if (t_ret == 0) { ++wrote_cnt; ++wrote_total; } else { @@ -583,10 +596,22 @@ __memp_sync_int(env, dbmfp, trickle_max, flags, wrote_totalp, interruptedp) --remaining; bharray[i].track_hp = NULL; - /* Discard our buffer reference. */ - DB_ASSERT(env, atomic_read(&bhp->ref) > 0); - atomic_dec(env, &bhp->ref); - MUTEX_UNLOCK(env, bhp->mtx_buf); + /* + * Discard our buffer reference. For a deferred async write + * the pin (ref + shared mtx_buf) is held until the write + * completes; drain the window when it is full. + */ + if (!deferred) { + DB_ASSERT(env, atomic_read(&bhp->ref) > 0); + atomic_dec(env, &bhp->ref); + MUTEX_UNLOCK(env, bhp->mtx_buf); + } else if (nflight >= MEMP_AIO_WINDOW) { + t_ret = __memp_aio_drain(env, dbmp, + dbmp->aio_ctx, aiow, nflight); + wrote_cnt += t_ret; + wrote_total += t_ret; + nflight = 0; + } /* Check if the call has been interrupted. */ if (LF_ISSET(DB_SYNC_INTERRUPT_OK) && @@ -594,6 +619,11 @@ __memp_sync_int(env, dbmfp, trickle_max, flags, wrote_totalp, interruptedp) STAT(++mp->stat.st_sync_interrupted); if (interruptedp != NULL) *interruptedp = 1; + if (nflight > 0) { + wrote_total += __memp_aio_drain(env, + dbmp, dbmp->aio_ctx, aiow, nflight); + nflight = 0; + } goto err; } @@ -612,6 +642,16 @@ __memp_sync_int(env, dbmfp, trickle_max, flags, wrote_totalp, interruptedp) } done: /* + * Drain any async writes still in flight before forcing pages to + * disk: the fsync below must follow all completed writes. + */ + if (nflight > 0) { + wrote_total += __memp_aio_drain(env, + dbmp, dbmp->aio_ctx, aiow, nflight); + nflight = 0; + } + + /* * If a write is required, we have to force the pages to disk. We * don't do this as we go along because we want to give the OS as * much time as possible to lazily flush, and because we have to flush diff --git a/src/os/os_aio.c b/src/os/os_aio.c new file mode 100644 index 000000000..452746bcc --- /dev/null +++ b/src/os/os_aio.c @@ -0,0 +1,160 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Asynchronous I/O abstraction -- generic layer + synchronous backend. + * + * The synchronous backend performs each submitted op inline and invokes its + * completion immediately; it is the behaviour-preserving default and the + * fallback when no platform async backend (io_uring/kqueue/IOCP/POSIX aio) is + * available or enabled. Platform backends plug in via DB_AIO_BACKEND. + */ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/os_aio.h" + +/* + * __os_aio_create -- + * Create an AIO context. Selects a platform backend if one is available + * and enabled, else uses the synchronous fallback. + */ +int +__os_aio_create(env, depth, ctxp) + ENV *env; + u_int32_t depth; + DB_AIO_CONTEXT **ctxp; +{ + DB_AIO_CONTEXT *ctx; + int ret; + + *ctxp = NULL; + if ((ret = __os_calloc(env, 1, sizeof(DB_AIO_CONTEXT), &ctx)) != 0) + return (ret); + ctx->depth = depth == 0 ? DB_AIO_DEFAULT_DEPTH : depth; + ctx->backend = NULL; /* synchronous fallback */ + ctx->priv = NULL; + ctx->inflight = 0; + + /* + * Probe and install a platform backend in preference order; on + * failure the context stays on the synchronous fallback + * (backend == NULL). Native file-completion engines first + * (io_uring on Linux, IOCP on Windows, kqueue+aio on BSD), then + * POSIX aio (Solaris/illumos, macOS), then the portable thread-pool + * offload as the last-resort async path. + */ +#ifdef HAVE_IO_URING + if (ctx->backend == NULL) + (void)__os_aio_uring_init(env, ctx); +#endif +#ifdef HAVE_IOCP + if (ctx->backend == NULL) + (void)__os_aio_iocp_init(env, ctx); +#endif +#ifdef HAVE_AIO_KQUEUE + if (ctx->backend == NULL) + (void)__os_aio_kqueue_init(env, ctx); +#endif +#ifdef HAVE_AIO_POSIX + if (ctx->backend == NULL) + (void)__os_aio_posix_init(env, ctx); +#endif +#ifdef HAVE_AIO_THREADPOOL + if (ctx->backend == NULL) + (void)__os_aio_pool_init(env, ctx); +#endif + *ctxp = ctx; + return (0); +} + +/* + * __os_aio_submit -- + * Submit one op. The synchronous backend performs it now and runs the + * completion inline; a real backend queues it for later reaping. + */ +int +__os_aio_submit(env, ctx, aio) + ENV *env; + DB_AIO_CONTEXT *ctx; + DB_AIO_OP *aio; +{ + int ret; + size_t nio; + + if (ctx->backend != NULL) + return (ctx->backend->submit(env, ctx, aio)); + + /* Synchronous fallback: do the I/O now, complete inline. */ + nio = 0; + ret = __os_io(env, aio->op, aio->fhp, aio->pgno, + aio->pagesize, 0, aio->pagesize, (u_int8_t *)aio->buf, &nio); + if (aio->done != NULL) + aio->done(env, aio->cookie, ret); + return (ret); +} + +/* + * __os_aio_reap -- + * Reap up to "max" completions (max < 0 means all ready). For the + * synchronous backend there is never anything outstanding. + */ +int +__os_aio_reap(env, ctx, max, wait) + ENV *env; + DB_AIO_CONTEXT *ctx; + int max, wait; +{ + if (ctx->backend != NULL) + return (ctx->backend->reap(env, ctx, max, wait)); + + /* Synchronous fallback: nothing is ever outstanding. */ + COMPQUIET(env, NULL); + COMPQUIET(max, 0); + COMPQUIET(wait, 0); + return (0); +} + +/* + * __os_aio_destroy -- + * Tear down an AIO context. + */ +int +__os_aio_destroy(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + int ret; + + ret = 0; + if (ctx == NULL) + return (0); + if (ctx->backend != NULL && ctx->backend->destroy != NULL) + ret = ctx->backend->destroy(env, ctx); + __os_free(env, ctx); + return (ret); +} + +/* + * __os_aio_available -- + * Return 1 if a real (non-synchronous) async backend is active. + */ +int +__os_aio_available(env) + ENV *env; +{ + COMPQUIET(env, NULL); + return (0); /* overridden per-context; see __os_aio_ctx_available */ +} + +/* + * __os_aio_ctx_available -- + * Return 1 if the given context has a real (async) backend. + * + * PUBLIC: int __os_aio_ctx_available __P((DB_AIO_CONTEXT *)); + */ +int +__os_aio_ctx_available(ctx) + DB_AIO_CONTEXT *ctx; +{ + return (ctx != NULL && ctx->backend != NULL); +} diff --git a/src/os/os_aio_iocp.c b/src/os/os_aio_iocp.c new file mode 100644 index 000000000..e59f17a51 --- /dev/null +++ b/src/os/os_aio_iocp.c @@ -0,0 +1,238 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Windows IOCP native file-AIO backend for the os_aio abstraction. + * + * Adapted from the XTC Project's libxtc (src/io/io_iocp.c, ISC, with + * the author's permission): a pread/pwrite is an overlapped + * ReadFile/WriteFile on a file HANDLE associated with a completion + * port, and completions are dequeued in batch by + * GetQueuedCompletionStatusEx -- no worker thread, no event handle. + * The socket AFD-poll machinery in libxtc is dropped; only file I/O + * is needed. + * + * Built only when configured with HAVE_IOCP (a Windows build). NOTE: + * this backend is a faithful adaptation but has NOT yet been compiled + * or validated on a Windows host; the portable thread-pool backend + * (os_aio_pool.c) is the validated Windows async path until then. + * Otherwise this is an empty translation unit. + */ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/os_aio.h" + +#ifdef HAVE_IOCP + +#include +#include + +typedef struct __aio_iocp_pend { + OVERLAPPED *ov; /* kernel-owned while pending. */ + HANDLE fh; /* file handle (for GetOverlappedResult). */ + void *cookie; + db_aio_done_fn done; + u_int32_t len; /* expected transfer. */ +} AIO_IOCP_PEND; + +typedef struct __aio_iocp_state { + HANDLE iocp; /* the completion port. */ + AIO_IOCP_PEND *pend; /* in-flight ops. */ + int n, cap; +} AIO_IOCP_STATE; + +static int __aio_iocp_submit __P((ENV *, DB_AIO_CONTEXT *, DB_AIO_OP *)); +static int __aio_iocp_reap __P((ENV *, DB_AIO_CONTEXT *, int, int)); +static int __aio_iocp_destroy __P((ENV *, DB_AIO_CONTEXT *)); + +static const DB_AIO_BACKEND __aio_iocp_backend = { + "iocp", + __aio_iocp_submit, + __aio_iocp_reap, + NULL, + __aio_iocp_destroy +}; + +/* + * __os_aio_iocp_init -- + * Create a completion port and attach the backend. + * + * PUBLIC: int __os_aio_iocp_init __P((ENV *, DB_AIO_CONTEXT *)); + */ +int +__os_aio_iocp_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_IOCP_STATE *st; + int ret; + + if ((ret = __os_calloc(env, 1, sizeof(*st), &st)) != 0) + return (ret); + st->iocp = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); + if (st->iocp == NULL) { + __os_free(env, st); + return (__os_get_errno()); + } + ctx->priv = st; + ctx->backend = &__aio_iocp_backend; + return (0); +} + +static int +__aio_iocp_submit(env, ctx, aio) + ENV *env; + DB_AIO_CONTEXT *ctx; + DB_AIO_OP *aio; +{ + AIO_IOCP_STATE *st; + OVERLAPPED *ov; + HANDLE fh; + BOOL ok; + DWORD err; + __uint64_t off; + int ret; + + st = ctx->priv; + if (aio->op != DB_IO_READ && aio->op != DB_IO_WRITE) + return (EINVAL); + fh = aio->fhp->handle; + if (fh == INVALID_HANDLE_VALUE) + return (EINVAL); + + /* Associate the file with the port once; ignore "already done". */ + if (CreateIoCompletionPort(fh, st->iocp, 0, 0) == NULL) { + err = GetLastError(); + if (err != ERROR_INVALID_PARAMETER) + return (EAGAIN); + } + + if ((ret = __os_calloc(env, 1, sizeof(*ov), &ov)) != 0) + return (ret); + off = (__uint64_t)aio->pgno * aio->pagesize; + ov->Offset = (DWORD)(off & 0xFFFFFFFFu); + ov->OffsetHigh = (DWORD)(off >> 32); + + if (aio->op == DB_IO_READ) + ok = ReadFile(fh, aio->buf, (DWORD)aio->pagesize, NULL, ov); + else + ok = WriteFile(fh, aio->buf, (DWORD)aio->pagesize, NULL, ov); + if (!ok && (err = GetLastError()) != ERROR_IO_PENDING) { + __os_free(env, ov); + return (EAGAIN); + } + + if (st->n >= st->cap) { + int nc = st->cap == 0 ? 16 : st->cap * 2; + void *p = NULL; + if ((ret = __os_realloc(env, + sizeof(*st->pend) * (size_t)nc, &p)) != 0) { + (void)CancelIoEx(fh, ov); + __os_free(env, ov); + return (ret); + } + st->pend = p; + st->cap = nc; + } + st->pend[st->n].ov = ov; + st->pend[st->n].fh = fh; + st->pend[st->n].cookie = aio->cookie; + st->pend[st->n].done = aio->done; + st->pend[st->n].len = aio->pagesize; + st->n++; + ctx->inflight++; + return (0); +} + +static int +__aio_iocp_reap(env, ctx, max, wait) + ENV *env; + DB_AIO_CONTEXT *ctx; + int max, wait; +{ + AIO_IOCP_STATE *st; + OVERLAPPED_ENTRY batch[64]; + ULONG n_done, j; + DWORD tmo, nbytes; + int got, i, bmax, ret; + + st = ctx->priv; + got = 0; + tmo = wait ? INFINITE : 0; + bmax = (int)(sizeof(batch) / sizeof(batch[0])); + if (max >= 0 && max < bmax) + bmax = max; + + if (!GetQueuedCompletionStatusEx(st->iocp, batch, + (ULONG)bmax, &n_done, tmo, FALSE)) + return (got); /* WAIT_TIMEOUT or error. */ + + for (j = 0; j < n_done; j++) { + OVERLAPPED *ov = batch[j].lpOverlapped; + for (i = 0; i < st->n; i++) + if (st->pend[i].ov == ov) + break; + if (i == st->n) + continue; /* unknown completion. */ + + nbytes = 0; + if (GetOverlappedResult(st->pend[i].fh, ov, &nbytes, FALSE)) + ret = (nbytes == st->pend[i].len) ? 0 : EIO; + else + ret = EIO; + if (st->pend[i].done != NULL) + st->pend[i].done(env, st->pend[i].cookie, ret); + __os_free(env, ov); + + st->n--; + if (i != st->n) + st->pend[i] = st->pend[st->n]; + if (ctx->inflight != 0) + ctx->inflight--; + got++; + } + return (got); +} + +static int +__aio_iocp_destroy(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_IOCP_STATE *st; + int i; + + if ((st = ctx->priv) == NULL) + return (0); + for (i = 0; i < st->n; i++) { + (void)CancelIoEx(st->pend[i].fh, st->pend[i].ov); + __os_free(env, st->pend[i].ov); + } + if (st->pend != NULL) + __os_free(env, st->pend); + if (st->iocp != NULL) + (void)CloseHandle(st->iocp); + __os_free(env, st); + ctx->priv = NULL; + return (0); +} + +#else /* !HAVE_IOCP */ + +/* + * __os_aio_iocp_init -- + * IOCP not configured (non-Windows or thread-pool preferred). + * + * PUBLIC: int __os_aio_iocp_init __P((ENV *, DB_AIO_CONTEXT *)); + */ +int +__os_aio_iocp_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + COMPQUIET(env, NULL); + COMPQUIET(ctx, NULL); + return (DB_OPNOTSUP); +} + +#endif /* HAVE_IOCP */ diff --git a/src/os/os_aio_kqueue.c b/src/os/os_aio_kqueue.c new file mode 100644 index 000000000..45dffd57a --- /dev/null +++ b/src/os/os_aio_kqueue.c @@ -0,0 +1,268 @@ +/*- + * See the file LICENSE for redistribution information. + * + * BSD kqueue + POSIX aio backend for the os_aio abstraction. + * + * Submits page I/O with aio_read/aio_write whose aiocb requests a kqueue + * completion (sigev_notify == SIGEV_KEVENT, sigev_notify_kqueue == our kq); + * each finished op posts an EVFILT_AIO kevent whose udata is the op record, + * so reaping is a single kevent() call rather than polling every aiocb with + * aio_error. This is the native event-driven async path on FreeBSD (and + * other BSDs that provide sigev_notify_kqueue). + * + * macOS is intentionally NOT this backend: its lacks + * sigev_notify_kqueue and its historical SIGEV_KEVENT aio support is + * unreliable, so macOS uses the POSIX-aio backend (aio_suspend) instead. + * Selection is decided at configure time via HAVE_AIO_KQUEUE. + * + * Built only when configured with kqueue-aio support (HAVE_AIO_KQUEUE); + * otherwise this file is an empty translation unit and contexts use the + * synchronous fallback in os_aio.c. + */ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/os_aio.h" + +#ifdef HAVE_AIO_KQUEUE + +#include +#include +#include +#include +#include + +/* One in-flight op: control block plus the caller's completion info. */ +typedef struct __aio_kq_op { + struct aiocb cb; /* control block (kevent udata = this). */ + void *cookie; /* caller context (BH *). */ + db_aio_done_fn done; /* completion callback. */ + u_int32_t len; /* expected transfer length. */ + int op; /* DB_IO_READ / DB_IO_WRITE. */ + int active; /* slot in use. */ +} AIO_KQ_OP; + +typedef struct __aio_kq_state { + int kq; /* kqueue descriptor. */ + AIO_KQ_OP *ops; /* in-flight table, depth entries. */ + u_int32_t depth; /* table capacity. */ +} AIO_KQ_STATE; + +static int __aio_kq_submit __P((ENV *, DB_AIO_CONTEXT *, DB_AIO_OP *)); +static int __aio_kq_reap __P((ENV *, DB_AIO_CONTEXT *, int, int)); +static int __aio_kq_destroy __P((ENV *, DB_AIO_CONTEXT *)); + +static const DB_AIO_BACKEND __aio_kq_backend = { + "kqueue-aio", + __aio_kq_submit, + __aio_kq_reap, + NULL, /* cancel: unused */ + __aio_kq_destroy +}; + +/* + * __os_aio_kqueue_init -- + * Create a kqueue and attach the backend to the context. + * + * PUBLIC: int __os_aio_kqueue_init __P((ENV *, DB_AIO_CONTEXT *)); + */ +int +__os_aio_kqueue_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_KQ_STATE *st; + int ret; + + if ((ret = __os_calloc(env, 1, sizeof(*st), &st)) != 0) + return (ret); + st->depth = ctx->depth == 0 ? DB_AIO_DEFAULT_DEPTH : ctx->depth; + if ((st->kq = kqueue()) < 0) { + ret = __os_get_errno(); + __os_free(env, st); + return (ret); + } + if ((ret = __os_calloc(env, + st->depth, sizeof(AIO_KQ_OP), &st->ops)) != 0) { + (void)close(st->kq); + __os_free(env, st); + return (ret); + } + ctx->priv = st; + ctx->backend = &__aio_kq_backend; + return (0); +} + +/* + * __aio_kq_submit -- + * Fill a free slot's aiocb to post an EVFILT_AIO kevent on completion, + * then submit via aio_read/aio_write. DB_OPNOTSUP if the table is full + * or the kernel refuses the op (caller writes the page synchronously). + */ +static int +__aio_kq_submit(env, ctx, aio) + ENV *env; + DB_AIO_CONTEXT *ctx; + DB_AIO_OP *aio; +{ + AIO_KQ_STATE *st; + AIO_KQ_OP *slot; + u_int32_t i; + int r; + + st = ctx->priv; + for (slot = NULL, i = 0; i < st->depth; i++) + if (!st->ops[i].active) { + slot = &st->ops[i]; + break; + } + if (slot == NULL) + return (DB_OPNOTSUP); + + memset(&slot->cb, 0, sizeof(slot->cb)); + slot->cb.aio_fildes = aio->fhp->fd; + slot->cb.aio_buf = aio->buf; + slot->cb.aio_nbytes = aio->pagesize; + slot->cb.aio_offset = (off_t)aio->pgno * aio->pagesize; + slot->cb.aio_sigevent.sigev_notify = SIGEV_KEVENT; + slot->cb.aio_sigevent.sigev_notify_kqueue = st->kq; + slot->cb.aio_sigevent.sigev_value.sival_ptr = slot; + slot->cookie = aio->cookie; + slot->done = aio->done; + slot->len = aio->pagesize; + slot->op = aio->op; + + r = aio->op == DB_IO_WRITE ? + aio_write(&slot->cb) : aio_read(&slot->cb); + if (r != 0) + return (DB_OPNOTSUP); + + slot->active = 1; + ctx->inflight++; + return (0); +} + +/* + * __aio_kq_finish_slot -- + * Collect a completed slot's result, run its completion, free the slot. + */ +static void +__aio_kq_finish_slot(env, ctx, slot) + ENV *env; + DB_AIO_CONTEXT *ctx; + AIO_KQ_OP *slot; +{ + ssize_t n; + int err, io_ret; + + err = aio_error(&slot->cb); + if (err == 0) { + n = aio_return(&slot->cb); + io_ret = (n == (ssize_t)slot->len) ? 0 : EIO; + } else { + (void)aio_return(&slot->cb); + io_ret = err; + } + if (slot->done != NULL) + slot->done(env, slot->cookie, io_ret); + slot->active = 0; + if (ctx->inflight != 0) + ctx->inflight--; +} + +/* + * __aio_kq_reap -- + * Reap up to "max" completions (max < 0 means all ready). Drains + * EVFILT_AIO kevents; with "wait" set and ops outstanding, blocks in + * kevent until at least one completes. + */ +static int +__aio_kq_reap(env, ctx, max, wait) + ENV *env; + DB_AIO_CONTEXT *ctx; + int max, wait; +{ + AIO_KQ_STATE *st; + struct kevent evs[64]; + struct timespec zero; + AIO_KQ_OP *slot; + int batch, got, i, n; + + st = ctx->priv; + got = 0; + zero.tv_sec = 0; + zero.tv_nsec = 0; + + for (;;) { + if (max >= 0 && got >= max) + break; + batch = (int)(sizeof(evs) / sizeof(evs[0])); + if (max >= 0 && (max - got) < batch) + batch = max - got; + + /* + * Block only when asked to wait, nothing has been reaped yet, + * and ops are still outstanding; otherwise poll (zero timeout). + */ + n = kevent(st->kq, NULL, 0, evs, batch, + (wait && got == 0 && ctx->inflight != 0) ? NULL : &zero); + if (n < 0) { + if (errno == EINTR) + continue; + break; + } + if (n == 0) + break; + for (i = 0; i < n; i++) { + slot = evs[i].udata; + if (slot == NULL || !slot->active) + continue; + __aio_kq_finish_slot(env, ctx, slot); + got++; + } + } + return (got); +} + +/* + * __aio_kq_destroy -- + * Drain all in-flight ops, close the kqueue, free state. + */ +static int +__aio_kq_destroy(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_KQ_STATE *st; + + if ((st = ctx->priv) == NULL) + return (0); + while (ctx->inflight != 0) + (void)__aio_kq_reap(env, ctx, -1, 1); + if (st->kq >= 0) + (void)close(st->kq); + __os_free(env, st->ops); + __os_free(env, st); + ctx->priv = NULL; + return (0); +} + +#else /* !HAVE_AIO_KQUEUE */ + +/* + * __os_aio_kqueue_init -- + * kqueue aio not configured. + * + * PUBLIC: int __os_aio_kqueue_init __P((ENV *, DB_AIO_CONTEXT *)); + */ +int +__os_aio_kqueue_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + COMPQUIET(env, NULL); + COMPQUIET(ctx, NULL); + return (DB_OPNOTSUP); +} + +#endif /* HAVE_AIO_KQUEUE */ diff --git a/src/os/os_aio_pool.c b/src/os/os_aio_pool.c new file mode 100644 index 000000000..ec20ca880 --- /dev/null +++ b/src/os/os_aio_pool.c @@ -0,0 +1,368 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Thread-pool offload backend for the os_aio abstraction. + * + * The portable async path for platforms without a native file completion + * engine (everything but Linux io_uring and Windows IOCP). A single, + * process-wide pool of worker threads -- created lazily on the first + * submitted op, not at context creation -- drains a global submission FIFO, + * performs each op with the normal synchronous __os_io, and routes the result + * to the submitting context's completion FIFO, which __os_aio_reap drains. + * + * Design notes: + * - Lazy + global: a context (one per environment) costs nothing until it + * actually issues an async write, and many environments share one pool + * rather than each spawning its own threads. + * - Fork-safe: a pthread_atfork child handler resets the pool to "unstarted" + * (the worker threads do not survive fork) so a child re-spawns lazily on + * its next submit and never deadlocks on an inherited-locked pool mutex. + * - The pool structure is adapted from libxtc (src/ptc/blocking.c, ISC). + * + * Built when configured with HAVE_AIO_THREADPOOL; otherwise an empty TU. + */ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/os_aio.h" + +#ifdef HAVE_AIO_THREADPOOL + +#include + +#define AIO_POOL_THREADS 4 + +/* One unit of work on the global pool. */ +typedef struct __aio_work { + ENV *env; + DB_AIO_OP op; /* copied from the caller */ + int result; /* 0 or errno */ + struct __aio_pool_state *owner; /* completion routed here */ + struct __aio_work *next; +} AIO_WORK; + +/* Per-context (per-environment) completion state. */ +typedef struct __aio_pool_state { + pthread_cond_t done_cv; /* reap waits here */ + AIO_WORK *cmp_head, *cmp_tail; /* this context's completions */ + DB_AIO_CONTEXT *ctx; /* owning context (inflight) */ + struct __aio_pool_state *reg_next; /* live-context registry link */ +} AIO_POOL_STATE; + +/* + * Process-wide lazy worker pool. Shared by every context; reset by the + * pthread_atfork child handler. g_lock protects the submission FIFO and + * every context's completion FIFO; each context has its own done_cv. + */ +static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t g_work_cv = PTHREAD_COND_INITIALIZER; +static AIO_WORK *g_sub_head, *g_sub_tail; +static pthread_t g_threads[AIO_POOL_THREADS]; +static int g_nthreads, g_started, g_stopping; +static pthread_once_t g_atfork_once = PTHREAD_ONCE_INIT; +static AIO_POOL_STATE *g_ctx_registry; /* all live contexts (g_lock). */ + +static AIO_WORK * +__aio_q_pop(head, tail) + AIO_WORK **head, **tail; +{ + AIO_WORK *w; + + if ((w = *head) == NULL) + return (NULL); + if ((*head = w->next) == NULL) + *tail = NULL; + w->next = NULL; + return (w); +} + +static void +__aio_q_push(head, tail, w) + AIO_WORK **head, **tail, *w; +{ + w->next = NULL; + if (*tail != NULL) + (*tail)->next = w; + else + *head = w; + *tail = w; +} + +static void * +__aio_pool_worker(arg) + void *arg; +{ + AIO_WORK *w; + size_t nio; + + COMPQUIET(arg, NULL); + for (;;) { + (void)pthread_mutex_lock(&g_lock); + while (g_sub_head == NULL && !g_stopping) + (void)pthread_cond_wait(&g_work_cv, &g_lock); + if (g_stopping && g_sub_head == NULL) { + (void)pthread_mutex_unlock(&g_lock); + return (NULL); + } + w = __aio_q_pop(&g_sub_head, &g_sub_tail); + (void)pthread_mutex_unlock(&g_lock); + + nio = 0; + w->result = __os_io(w->env, w->op.op, w->op.fhp, w->op.pgno, + w->op.pagesize, 0, w->op.pagesize, + (u_int8_t *)w->op.buf, &nio); + + (void)pthread_mutex_lock(&g_lock); + __aio_q_push(&w->owner->cmp_head, &w->owner->cmp_tail, w); + (void)pthread_cond_signal(&w->owner->done_cv); + (void)pthread_mutex_unlock(&g_lock); + } +} + +static void +__aio_atfork_prepare() +{ + (void)pthread_mutex_lock(&g_lock); +} + +static void +__aio_atfork_parent() +{ + (void)pthread_mutex_unlock(&g_lock); +} + +static void +__aio_atfork_child() +{ + AIO_POOL_STATE *st; + + /* + * The worker threads did not survive the fork. Reset the pool to + * unstarted (re-init the lock we held across the fork, drop inherited + * submissions) so the child re-spawns lazily on its next submit and + * never blocks on a pool mutex no live thread will release. + * + * The inherited submission/completion AIO_WORK nodes belonged to the + * parent's in-flight writes; in the child those I/Os will never run + * (no workers) and must NOT be completed here -- running a write + * completion would clear BH_DIRTY and drop a buffer pin for I/O the + * child never performed, corrupting the inherited (copy-on-write) + * buffer state. We therefore discard them and zero every live + * context's in-flight accounting so a subsequent reap does not block + * forever waiting on completions that can never arrive. The pinned + * buffers remain pinned in the child's address space, which is the + * conservative, safe outcome (they are simply never written by the + * child); the parent completes its own writes normally. + */ + (void)pthread_mutex_init(&g_lock, NULL); + (void)pthread_cond_init(&g_work_cv, NULL); + g_sub_head = g_sub_tail = NULL; + g_nthreads = 0; + g_started = 0; + g_stopping = 0; + + for (st = g_ctx_registry; st != NULL; st = st->reg_next) { + (void)pthread_cond_init(&st->done_cv, NULL); + st->cmp_head = st->cmp_tail = NULL; + if (st->ctx != NULL) + st->ctx->inflight = 0; + } +} + +static void +__aio_install_atfork() +{ + (void)pthread_atfork(__aio_atfork_prepare, + __aio_atfork_parent, __aio_atfork_child); +} + +/* Start the pool on first use. Caller holds g_lock. Returns 0 on success. */ +static int +__aio_pool_start_locked() +{ + int i; + + if (g_started) + return (0); + g_stopping = 0; + g_nthreads = 0; + for (i = 0; i < AIO_POOL_THREADS; i++) { + if (pthread_create(&g_threads[i], NULL, + __aio_pool_worker, NULL) != 0) + break; + g_nthreads++; + } + if (g_nthreads == 0) + return (-1); + g_started = 1; + return (0); +} + +static int +__aio_pool_submit(env, ctx, aio) + ENV *env; + DB_AIO_CONTEXT *ctx; + DB_AIO_OP *aio; +{ + AIO_POOL_STATE *st; + AIO_WORK *w; + int ret; + + (void)pthread_once(&g_atfork_once, __aio_install_atfork); + st = ctx->priv; + if ((ret = __os_calloc(env, 1, sizeof(*w), &w)) != 0) + return (ret); + w->env = env; + w->op = *aio; /* caller's op may be transient. */ + w->result = 0; + w->owner = st; + + (void)pthread_mutex_lock(&g_lock); + st->ctx = ctx; /* for the atfork-child inflight reset */ + if (__aio_pool_start_locked() != 0) { + (void)pthread_mutex_unlock(&g_lock); + __os_free(env, w); + return (DB_OPNOTSUP); /* caller writes synchronously */ + } + __aio_q_push(&g_sub_head, &g_sub_tail, w); + ctx->inflight++; + (void)pthread_cond_signal(&g_work_cv); + (void)pthread_mutex_unlock(&g_lock); + return (0); +} + +static int +__aio_pool_reap(env, ctx, max, wait) + ENV *env; + DB_AIO_CONTEXT *ctx; + int max, wait; +{ + AIO_POOL_STATE *st; + AIO_WORK *w; + int got; + + st = ctx->priv; + got = 0; + + (void)pthread_mutex_lock(&g_lock); + if (wait) + while (st->cmp_head == NULL && ctx->inflight != 0) + (void)pthread_cond_wait(&st->done_cv, &g_lock); + + for (;;) { + if (max >= 0 && got >= max) + break; + if ((w = __aio_q_pop(&st->cmp_head, &st->cmp_tail)) == NULL) + break; + if (ctx->inflight != 0) + ctx->inflight--; + (void)pthread_mutex_unlock(&g_lock); + + if (w->op.done != NULL) + w->op.done(env, w->op.cookie, w->result); + __os_free(env, w); + got++; + + (void)pthread_mutex_lock(&g_lock); + } + (void)pthread_mutex_unlock(&g_lock); + return (got); +} + +static int +__aio_pool_destroy(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_POOL_STATE *st; + AIO_WORK *w; + + if ((st = ctx->priv) == NULL) + return (0); + + /* Drain any writes still in flight so no worker references st. */ + while (ctx->inflight != 0) + (void)__aio_pool_reap(env, ctx, -1, 1); + + (void)pthread_mutex_lock(&g_lock); + while ((w = __aio_q_pop(&st->cmp_head, &st->cmp_tail)) != NULL) + __os_free(env, w); + /* Unlink from the live-context registry. */ + if (g_ctx_registry == st) + g_ctx_registry = st->reg_next; + else { + AIO_POOL_STATE *p; + for (p = g_ctx_registry; p != NULL; p = p->reg_next) + if (p->reg_next == st) { + p->reg_next = st->reg_next; + break; + } + } + (void)pthread_mutex_unlock(&g_lock); + + (void)pthread_cond_destroy(&st->done_cv); + __os_free(env, st); + ctx->priv = NULL; + return (0); +} + +static const DB_AIO_BACKEND __aio_pool_backend = { + "threadpool", + __aio_pool_submit, + __aio_pool_reap, + NULL, + __aio_pool_destroy +}; + +/* + * __os_aio_pool_init -- + * Attach the thread-pool backend to a context. No worker threads are + * created here; the shared pool starts lazily on the first submit. + * + * PUBLIC: int __os_aio_pool_init __P((ENV *, DB_AIO_CONTEXT *)); + */ +int +__os_aio_pool_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_POOL_STATE *st; + int ret; + + if ((ret = __os_calloc(env, 1, sizeof(*st), &st)) != 0) + return (ret); + if (pthread_cond_init(&st->done_cv, NULL) != 0) { + __os_free(env, st); + return (DB_OPNOTSUP); + } + st->ctx = ctx; + /* Register so the atfork-child handler can reset our in-flight state. */ + (void)pthread_once(&g_atfork_once, __aio_install_atfork); + (void)pthread_mutex_lock(&g_lock); + st->reg_next = g_ctx_registry; + g_ctx_registry = st; + (void)pthread_mutex_unlock(&g_lock); + ctx->priv = st; + ctx->backend = &__aio_pool_backend; + return (0); +} + +#else /* !HAVE_AIO_THREADPOOL */ + +/* + * __os_aio_pool_init -- + * Thread-pool offload not configured. + * + * PUBLIC: int __os_aio_pool_init __P((ENV *, DB_AIO_CONTEXT *)); + */ +int +__os_aio_pool_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + COMPQUIET(env, NULL); + COMPQUIET(ctx, NULL); + return (DB_OPNOTSUP); +} + +#endif /* HAVE_AIO_THREADPOOL */ diff --git a/src/os/os_aio_posix.c b/src/os/os_aio_posix.c new file mode 100644 index 000000000..55e9b679b --- /dev/null +++ b/src/os/os_aio_posix.c @@ -0,0 +1,266 @@ +/*- + * See the file LICENSE for redistribution information. + * + * POSIX.1b asynchronous I/O backend for the os_aio abstraction. + * + * Uses the standard facility: aio_read/aio_write to submit and + * aio_suspend + aio_error/aio_return to reap. This is the native async + * path on systems that provide a working POSIX aio implementation but no + * other completion engine -- notably Solaris/illumos (libaio) -- and a + * portable fallback ahead of the thread pool elsewhere. + * + * Per-context state holds a fixed table of in-flight slots (ctx->depth + * entries); a submit that finds the table full reports DB_OPNOTSUP so the + * caller writes that one page synchronously, exactly as it would when no + * async backend is configured. The buffer-pool writeback path never has + * more than MEMP_AIO_WINDOW writes outstanding, so a reasonable depth keeps + * the table from filling in practice. + * + * Built only when configured with POSIX aio support (HAVE_AIO_POSIX); + * otherwise this file is an empty translation unit and contexts use the + * synchronous fallback in os_aio.c. + */ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/os_aio.h" + +#ifdef HAVE_AIO_POSIX + +#include +#include + +/* One in-flight op: its control block plus the caller's completion info. */ +typedef struct __aio_posix_op { + struct aiocb cb; /* kernel/library control block. */ + void *cookie; /* caller context (BH *). */ + db_aio_done_fn done; /* completion callback. */ + u_int32_t len; /* expected transfer length. */ + int op; /* DB_IO_READ / DB_IO_WRITE. */ + int active; /* slot in use. */ +} AIO_POSIX_OP; + +typedef struct __aio_posix_state { + AIO_POSIX_OP *ops; /* in-flight table, depth entries. */ + u_int32_t depth; /* table capacity. */ +} AIO_POSIX_STATE; + +static int __aio_posix_submit __P((ENV *, DB_AIO_CONTEXT *, DB_AIO_OP *)); +static int __aio_posix_reap __P((ENV *, DB_AIO_CONTEXT *, int, int)); +static int __aio_posix_destroy __P((ENV *, DB_AIO_CONTEXT *)); + +static const DB_AIO_BACKEND __aio_posix_backend = { + "posixaio", + __aio_posix_submit, + __aio_posix_reap, + NULL, /* cancel: unused */ + __aio_posix_destroy +}; + +/* + * __os_aio_posix_init -- + * Attach the POSIX aio backend to a context. + * + * PUBLIC: int __os_aio_posix_init __P((ENV *, DB_AIO_CONTEXT *)); + */ +int +__os_aio_posix_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_POSIX_STATE *st; + int ret; + + if ((ret = __os_calloc(env, 1, sizeof(*st), &st)) != 0) + return (ret); + st->depth = ctx->depth == 0 ? DB_AIO_DEFAULT_DEPTH : ctx->depth; + if ((ret = __os_calloc(env, + st->depth, sizeof(AIO_POSIX_OP), &st->ops)) != 0) { + __os_free(env, st); + return (ret); + } + ctx->priv = st; + ctx->backend = &__aio_posix_backend; + return (0); +} + +/* + * __aio_posix_submit -- + * Find a free slot, fill its aiocb, and hand it to aio_read/aio_write. + * Returns DB_OPNOTSUP if the table is full (caller writes synchronously). + */ +static int +__aio_posix_submit(env, ctx, aio) + ENV *env; + DB_AIO_CONTEXT *ctx; + DB_AIO_OP *aio; +{ + AIO_POSIX_STATE *st; + AIO_POSIX_OP *slot; + u_int32_t i; + int r; + + st = ctx->priv; + for (slot = NULL, i = 0; i < st->depth; i++) + if (!st->ops[i].active) { + slot = &st->ops[i]; + break; + } + if (slot == NULL) + return (DB_OPNOTSUP); /* table full: caller does it inline. */ + + memset(&slot->cb, 0, sizeof(slot->cb)); + slot->cb.aio_fildes = aio->fhp->fd; + slot->cb.aio_buf = aio->buf; + slot->cb.aio_nbytes = aio->pagesize; + slot->cb.aio_offset = (off_t)aio->pgno * aio->pagesize; + slot->cb.aio_sigevent.sigev_notify = SIGEV_NONE; + slot->cookie = aio->cookie; + slot->done = aio->done; + slot->len = aio->pagesize; + slot->op = aio->op; + + r = aio->op == DB_IO_WRITE ? + aio_write(&slot->cb) : aio_read(&slot->cb); + if (r != 0) + return (DB_OPNOTSUP); /* submit refused: caller does it. */ + + slot->active = 1; + ctx->inflight++; + return (0); +} + +/* + * __aio_posix_finish_slot -- + * Collect one completed slot's result, run its completion, free the slot. + */ +static void +__aio_posix_finish_slot(env, ctx, slot, aio_err) + ENV *env; + DB_AIO_CONTEXT *ctx; + AIO_POSIX_OP *slot; + int aio_err; +{ + ssize_t n; + int io_ret; + + /* + * aio_error returns 0 on success, an errno on failure. On success + * aio_return yields the byte count and must be called exactly once to + * release the kernel/library resources; treat a short transfer as EIO. + */ + if (aio_err == 0) { + n = aio_return(&slot->cb); + io_ret = (n == (ssize_t)slot->len) ? 0 : EIO; + } else { + (void)aio_return(&slot->cb); + io_ret = aio_err; + } + if (slot->done != NULL) + slot->done(env, slot->cookie, io_ret); + slot->active = 0; + if (ctx->inflight != 0) + ctx->inflight--; +} + +/* + * __aio_posix_reap -- + * Reap up to "max" completions (max < 0 means all ready). When "wait" + * is set and nothing is ready, block in aio_suspend on the in-flight set. + */ +static int +__aio_posix_reap(env, ctx, max, wait) + ENV *env; + DB_AIO_CONTEXT *ctx; + int max, wait; +{ + AIO_POSIX_STATE *st; + const struct aiocb **list; + u_int32_t i; + int err, got, nlist; + + st = ctx->priv; + got = 0; + + for (;;) { + if (max >= 0 && got >= max) + break; + + /* Collect every still-active slot that has finished. */ + nlist = 0; + for (i = 0; i < st->depth; i++) { + if (!st->ops[i].active) + continue; + err = aio_error(&st->ops[i].cb); + if (err == EINPROGRESS) + continue; + __aio_posix_finish_slot(env, ctx, &st->ops[i], err); + if (++got >= max && max >= 0) + return (got); + } + + if (got > 0 || !wait || ctx->inflight == 0) + break; + + /* + * Nothing was ready but the caller asked to wait and ops are + * outstanding: build the in-flight aiocb list and block until + * at least one finishes, then loop to harvest it. + */ + if ((__os_malloc(env, + st->depth * sizeof(*list), &list)) != 0) + break; + for (nlist = 0, i = 0; i < st->depth; i++) + if (st->ops[i].active) + list[nlist++] = &st->ops[i].cb; + if (nlist == 0) { + __os_free(env, list); + break; + } + while (aio_suspend(list, nlist, NULL) != 0 && errno == EINTR) + ; + __os_free(env, list); + } + return (got); +} + +/* + * __aio_posix_destroy -- + * Drain all in-flight ops, then free the context state. + */ +static int +__aio_posix_destroy(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_POSIX_STATE *st; + + if ((st = ctx->priv) == NULL) + return (0); + while (ctx->inflight != 0) + (void)__aio_posix_reap(env, ctx, -1, 1); + __os_free(env, st->ops); + __os_free(env, st); + ctx->priv = NULL; + return (0); +} + +#else /* !HAVE_AIO_POSIX */ + +/* + * __os_aio_posix_init -- + * POSIX aio not configured. + * + * PUBLIC: int __os_aio_posix_init __P((ENV *, DB_AIO_CONTEXT *)); + */ +int +__os_aio_posix_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + COMPQUIET(env, NULL); + COMPQUIET(ctx, NULL); + return (DB_OPNOTSUP); +} + +#endif /* HAVE_AIO_POSIX */ diff --git a/src/os/os_aio_uring.c b/src/os/os_aio_uring.c new file mode 100644 index 000000000..2152bc2a2 --- /dev/null +++ b/src/os/os_aio_uring.c @@ -0,0 +1,223 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Linux io_uring backend for the os_aio abstraction. + * + * The submit/reap mechanics (io_uring_prep_read/write, SQE acquisition + * with a submit-and-retry on a full ring, and the CQE drain that maps + * each completion back to its op) are adapted from the XTC Project's + * libxtc (src/io/io_uring.c, ISC License) with the author's permission; + * the readiness/poll machinery there is dropped since the buffer pool + * only needs file reads/writes. + * + * Built only when configured with io_uring support (HAVE_IO_URING); + * otherwise this file is an empty translation unit and contexts use + * the synchronous fallback in os_aio.c. + */ +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/os_aio.h" + +#ifdef HAVE_IO_URING + +#include + +typedef struct __aio_uring_state { + struct io_uring ring; +} AIO_URING_STATE; + +/* Per-op completion record; user_data points here until reaped. */ +typedef struct __aio_uring_op { + void *cookie; + db_aio_done_fn done; + u_int32_t len; /* expected transfer (read/write). */ + int op; /* DB_IO_READ / DB_IO_WRITE. */ +} AIO_URING_OP; + +static int __aio_uring_submit __P((ENV *, DB_AIO_CONTEXT *, DB_AIO_OP *)); +static int __aio_uring_reap __P((ENV *, DB_AIO_CONTEXT *, int, int)); +static int __aio_uring_destroy __P((ENV *, DB_AIO_CONTEXT *)); + +static const DB_AIO_BACKEND __aio_uring_backend = { + "io_uring", + __aio_uring_submit, + __aio_uring_reap, + NULL, /* cancel: unused */ + __aio_uring_destroy +}; + +/* + * __os_aio_uring_init -- + * Bring up an io_uring and attach the backend to the context. + */ +int +__os_aio_uring_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_URING_STATE *st; + int ret; + + if ((ret = __os_calloc(env, 1, sizeof(*st), &st)) != 0) + return (ret); + if (io_uring_queue_init((unsigned)ctx->depth, &st->ring, 0) < 0) { + ret = __os_get_errno(); + __os_free(env, st); + return (ret); + } + ctx->priv = st; + ctx->backend = &__aio_uring_backend; + return (0); +} + +/* + * __aio_uring_get_sqe -- + * Get an SQE, flushing the ring once if it is momentarily full. + */ +static struct io_uring_sqe * +__aio_uring_get_sqe(st) + AIO_URING_STATE *st; +{ + struct io_uring_sqe *sqe; + + if ((sqe = io_uring_get_sqe(&st->ring)) == NULL) { + (void)io_uring_submit(&st->ring); + sqe = io_uring_get_sqe(&st->ring); + } + return (sqe); +} + +static int +__aio_uring_submit(env, ctx, aio) + ENV *env; + DB_AIO_CONTEXT *ctx; + DB_AIO_OP *aio; +{ + AIO_URING_STATE *st; + AIO_URING_OP *rec; + struct io_uring_sqe *sqe; + off_t off; + int ret; + + st = ctx->priv; + if ((sqe = __aio_uring_get_sqe(st)) == NULL) + return (EAGAIN); + + off = (off_t)aio->pgno * aio->pagesize; + switch (aio->op) { + case DB_IO_READ: + io_uring_prep_read(sqe, aio->fhp->fd, + aio->buf, aio->pagesize, (unsigned long long)off); + break; + case DB_IO_WRITE: + io_uring_prep_write(sqe, aio->fhp->fd, + aio->buf, aio->pagesize, (unsigned long long)off); + break; + default: + return (EINVAL); + } + + if ((ret = __os_calloc(env, 1, sizeof(*rec), &rec)) != 0) + return (ret); + rec->cookie = aio->cookie; + rec->done = aio->done; + rec->len = aio->pagesize; + rec->op = aio->op; + io_uring_sqe_set_data(sqe, rec); + + (void)io_uring_submit(&st->ring); + ctx->inflight++; + return (0); +} + +/* + * __aio_uring_reap -- + * Drain up to "max" completions (max < 0 means all currently ready), + * invoking each op's completion callback. If "wait" is set and ops + * are outstanding, block for at least one completion first. + */ +static int +__aio_uring_reap(env, ctx, max, wait) + ENV *env; + DB_AIO_CONTEXT *ctx; + int max, wait; +{ + AIO_URING_STATE *st; + AIO_URING_OP *rec; + struct io_uring_cqe *cqe; + int got, ret; + + st = ctx->priv; + got = 0; + + if (wait && ctx->inflight != 0) { + if (io_uring_wait_cqe(&st->ring, &cqe) < 0) + return (got); + } else if (io_uring_peek_cqe(&st->ring, &cqe) != 0) + return (got); + + for (;;) { + rec = io_uring_cqe_get_data(cqe); + if (rec != NULL) { + /* + * cqe->res is the byte count (>= 0) or a negative + * errno; a short transfer is treated as an I/O error. + */ + if (cqe->res < 0) + ret = -cqe->res; + else + ret = ((u_int32_t)cqe->res == rec->len) ? + 0 : EIO; + if (rec->done != NULL) + rec->done(env, rec->cookie, ret); + __os_free(env, rec); + if (ctx->inflight != 0) + ctx->inflight--; + got++; + } + io_uring_cqe_seen(&st->ring, cqe); + + if (max >= 0 && got >= max) + break; + if (io_uring_peek_cqe(&st->ring, &cqe) != 0) + break; + } + return (got); +} + +static int +__aio_uring_destroy(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + AIO_URING_STATE *st; + + if ((st = ctx->priv) != NULL) { + /* Drain any stragglers so their records are freed. */ + (void)__aio_uring_reap(env, ctx, -1, 0); + io_uring_queue_exit(&st->ring); + __os_free(env, st); + ctx->priv = NULL; + } + return (0); +} + +#else /* !HAVE_IO_URING */ + +/* + * __os_aio_uring_init -- + * io_uring not configured; leave the context on the synchronous + * fallback. + */ +int +__os_aio_uring_init(env, ctx) + ENV *env; + DB_AIO_CONTEXT *ctx; +{ + COMPQUIET(env, NULL); + COMPQUIET(ctx, NULL); + return (DB_OPNOTSUP); +} + +#endif /* HAVE_IO_URING */