Use #if !EV_MULTIPLICITY instead of #ifndef because it should be defined always.

[software/libev.git] / ev.pod
diff --git a/ev.pod b/ev.pod

index b98a930aa621be1dc30f2e707b662bdfa264e4c7..9ef6da23f19aa8e8a1b5d474d3a199a24fc7aa7a 100644 (file)
--- a/ev.pod
+++ b/ev.pod
@@ -6,7 +6,7 @@ libev - a high performance full-featured event loop written in C
  
    #include <ev.h>
  
-=head1 EXAMPLE PROGRAM
+=head2 EXAMPLE PROGRAM
  
    #include <ev.h>
  
@@ -67,7 +67,7 @@ watchers>, which are relatively small C structures you initialise with the
  details of the event, and then hand it over to libev by I<starting> the
  watcher.
  
-=head1 FEATURES
+=head2 FEATURES
  
  Libev supports C<select>, C<poll>, the Linux-specific C<epoll>, the
  BSD-specific C<kqueue> and the Solaris-specific event port mechanisms
@@ -84,7 +84,7 @@ It also is quite fast (see this
  L<benchmark|http://libev.schmorp.de/bench.html> comparing it to libevent
  for example).
  
-=head1 CONVENTIONS
+=head2 CONVENTIONS
  
  Libev is very configurable. In this manual the default configuration will
  be described, which supports multiple event loops. For more info about
@@ -93,7 +93,7 @@ this manual. If libev was configured without support for multiple event
  loops, then all functions taking an initial argument of name C<loop>
  (which is always of type C<struct ev_loop *>) will not have this argument.
  
-=head1 TIME REPRESENTATION
+=head2 TIME REPRESENTATION
  
  Libev represents time as a single floating point number, representing the
  (fractional) number of seconds since the (POSIX) epoch (somewhere near
@@ -117,6 +117,12 @@ Returns the current time as libev would use it. Please note that the
  C<ev_now> function is usually faster and also often returns the timestamp
  you actually want to know.
  
+=item ev_sleep (ev_tstamp interval)
+
+Sleep for the given interval: The current thread will be blocked until
+either it is interrupted or the given time interval has passed. Basically
+this is a subsecond-resolution C<sleep ()>.
+
  =item int ev_version_major ()
  
  =item int ev_version_minor ()
@@ -256,6 +262,13 @@ flags. If that is troubling you, check C<ev_backend ()> afterwards).
  If you don't know what event loop to use, use the one returned from this
  function.
  
+The default loop is the only loop that can handle C<ev_signal> and
+C<ev_child> watchers, and to do this, it always registers a handler
+for C<SIGCHLD>. If this is a problem for your app you can either
+create a dynamic loop with C<ev_loop_new> that doesn't do that, or you
+can simply overwrite the C<SIGCHLD> signal handler I<after> calling
+C<ev_default_init>.
+
  The flags argument can be used to specify special behaviour or specific
  backends to use, and is usually specified as C<0> (or C<EVFLAG_AUTO>).
  
@@ -302,15 +315,24 @@ environment variable.
  This is your standard select(2) backend. Not I<completely> standard, as
  libev tries to roll its own fd_set with no limits on the number of fds,
  but if that fails, expect a fairly low limit on the number of fds when
-using this backend. It doesn't scale too well (O(highest_fd)), but its usually
-the fastest backend for a low number of fds.
+using this backend. It doesn't scale too well (O(highest_fd)), but its
+usually the fastest backend for a low number of (low-numbered :) fds.
+
+To get good performance out of this backend you need a high amount of
+parallelity (most of the file descriptors should be busy). If you are
+writing a server, you should C<accept ()> in a loop to accept as many
+connections as possible during one iteration. You might also want to have
+a look at C<ev_set_io_collect_interval ()> to increase the amount of
+readyness notifications you get per iteration.
  
  =item C<EVBACKEND_POLL>    (value 2, poll backend, available everywhere except on windows)
  
-And this is your standard poll(2) backend. It's more complicated than
-select, but handles sparse fds better and has no artificial limit on the
-number of fds you can use (except it will slow down considerably with a
-lot of inactive fds). It scales similarly to select, i.e. O(total_fds).
+And this is your standard poll(2) backend. It's more complicated
+than select, but handles sparse fds better and has no artificial
+limit on the number of fds you can use (except it will slow down
+considerably with a lot of inactive fds). It scales similarly to select,
+i.e. O(total_fds). See the entry for C<EVBACKEND_SELECT>, above, for
+performance tips.
  
  =item C<EVBACKEND_EPOLL>   (value 4, Linux)
  
@@ -319,8 +341,8 @@ but it scales phenomenally better. While poll and select usually scale
  like O(total_fds) where n is the total number of fds (or the highest fd),
  epoll scales either O(1) or O(active_fds). The epoll design has a number
  of shortcomings, such as silently dropping events in some hard-to-detect
-cases and rewuiring a syscall per fd change, no fork support and bad
-support for dup:
+cases and rewiring a syscall per fd change, no fork support and bad
+support for dup.
  
  While stopping, setting and starting an I/O watcher in the same iteration
  will result in some caching, there is still a syscall per such incident
@@ -332,27 +354,49 @@ Please note that epoll sometimes generates spurious notifications, so you
  need to use non-blocking I/O or other means to avoid blocking when no data
  (or space) is available.
  
+Best performance from this backend is achieved by not unregistering all
+watchers for a file descriptor until it has been closed, if possible, i.e.
+keep at least one watcher active per fd at all times.
+
+While nominally embeddeble in other event loops, this feature is broken in
+all kernel versions tested so far.
+
  =item C<EVBACKEND_KQUEUE>  (value 8, most BSD clones)
  
  Kqueue deserves special mention, as at the time of this writing, it
-was broken on I<all> BSDs (usually it doesn't work with anything but
-sockets and pipes, except on Darwin, where of course it's completely
-useless. On NetBSD, it seems to work for all the FD types I tested, so it
-is used by default there). For this reason it's not being "autodetected"
+was broken on all BSDs except NetBSD (usually it doesn't work reliably
+with anything but sockets and pipes, except on Darwin, where of course
+it's completely useless). For this reason it's not being "autodetected"
  unless you explicitly specify it explicitly in the flags (i.e. using
  C<EVBACKEND_KQUEUE>) or libev was compiled on a known-to-be-good (-enough)
  system like NetBSD.
  
+You still can embed kqueue into a normal poll or select backend and use it
+only for sockets (after having made sure that sockets work with kqueue on
+the target platform). See C<ev_embed> watchers for more info.
+
  It scales in the same way as the epoll backend, but the interface to the
-kernel is more efficient (which says nothing about its actual speed,
-of course). While stopping, setting and starting an I/O watcher does
-never cause an extra syscall as with epoll, it still adds up to two event
-changes per incident, support for C<fork ()> is very bad and it drops fds
-silently in similarly hard-to-detetc cases.
+kernel is more efficient (which says nothing about its actual speed, of
+course). While stopping, setting and starting an I/O watcher does never
+cause an extra syscall as with C<EVBACKEND_EPOLL>, it still adds up to
+two event changes per incident, support for C<fork ()> is very bad and it
+drops fds silently in similarly hard-to-detect cases.
+
+This backend usually performs well under most conditions.
+
+While nominally embeddable in other event loops, this doesn't work
+everywhere, so you might need to test for this. And since it is broken
+almost everywhere, you should only use it when you have a lot of sockets
+(for which it usually works), by embedding it into another event loop
+(e.g. C<EVBACKEND_SELECT> or C<EVBACKEND_POLL>) and using it only for
+sockets.
  
  =item C<EVBACKEND_DEVPOLL> (value 16, Solaris 8)
  
-This is not implemented yet (and might never be).
+This is not implemented yet (and might never be, unless you send me an
+implementation). According to reports, C</dev/poll> only supports sockets
+and is not embeddable, which would limit the usefulness of this backend
+immensely.
  
  =item C<EVBACKEND_PORT>    (value 32, Solaris 10)
  
@@ -363,18 +407,28 @@ Please note that solaris event ports can deliver a lot of spurious
  notifications, so you need to use non-blocking I/O or other means to avoid
  blocking when no data (or space) is available.
  
+While this backend scales well, it requires one system call per active
+file descriptor per loop iteration. For small and medium numbers of file
+descriptors a "slow" C<EVBACKEND_SELECT> or C<EVBACKEND_POLL> backend
+might perform better.
+
+On the positive side, ignoring the spurious readyness notifications, this
+backend actually performed to specification in all tests and is fully
+embeddable, which is a rare feat among the OS-specific backends.
+
  =item C<EVBACKEND_ALL>
  
  Try all backends (even potentially broken ones that wouldn't be tried
  with C<EVFLAG_AUTO>). Since this is a mask, you can do stuff such as
  C<EVBACKEND_ALL & ~EVBACKEND_KQUEUE>.
  
+It is definitely not recommended to use this flag.
+
  =back
  
  If one or more of these are ored into the flags value, then only these
-backends will be tried (in the reverse order as given here). If none are
-specified, most compiled-in backend will be tried, usually in reverse
-order of their flag values :)
+backends will be tried (in the reverse order as listed here). If none are
+specified, all backends in C<ev_recommended_backends ()> will be tried.
  
  The most typical usage is like this:
  
@@ -431,14 +485,16 @@ earlier call to C<ev_loop_new>.
  
  =item ev_default_fork ()
  
-This function reinitialises the kernel state for backends that have
-one. Despite the name, you can call it anytime, but it makes most sense
-after forking, in either the parent or child process (or both, but that
-again makes little sense).
+This function sets a flag that causes subsequent C<ev_loop> iterations
+to reinitialise the kernel state for backends that have one. Despite the
+name, you can call it anytime, but it makes most sense after forking, in
+the child process (or both child and parent, but that again makes little
+sense). You I<must> call it in the child before using any of the libev
+functions, and it will only take effect at the next C<ev_loop> iteration.
  
-You I<must> call this function in the child process after forking if and
-only if you want to use the event library in both processes. If you just
-fork+exec, you don't have to call it.
+On the other hand, you only need to call this function in the child
+process if and only if you want to use the event library in the child. If
+you just fork+exec, you don't have to call it at all.
  
  The function itself is quite fast and it's usually not a problem to call
  it just in case after a fork. To make this easy, the function will fit in
@@ -446,10 +502,6 @@ quite nicely into a call to C<pthread_atfork>:
  
      pthread_atfork (0, 0, ev_default_fork);
  
-At the moment, C<EVBACKEND_SELECT> and C<EVBACKEND_POLL> are safe to use
-without calling this function, so if you force one of those backends you
-do not need to care.
-
  =item ev_loop_fork (loop)
  
  Like C<ev_default_fork>, but acts on an event loop created by
@@ -509,12 +561,16 @@ usually a better approach for this kind of thing.
  Here are the gory details of what C<ev_loop> does:
  
     - Before the first iteration, call any pending watchers.
-   * If there are no active watchers (reference count is zero), return.
-   - Queue all prepare watchers and then call all outstanding watchers.
+   * If EVFLAG_FORKCHECK was used, check for a fork.
+   - If a fork was detected, queue and call all fork watchers.
+   - Queue and call all prepare watchers.
     - If we have been forked, recreate the kernel state.
     - Update the kernel state with all outstanding changes.
     - Update the "event loop time".
-   - Calculate for how long to block.
+   - Calculate for how long to sleep or block, if at all
+     (active idle watchers, EVLOOP_NONBLOCK or not having
+     any active watchers at all will result in not sleeping).
+   - Sleep if the I/O and timer collect interval say so.
     - Block the process, waiting for any events.
     - Queue all outstanding I/O (fd) events.
     - Update the "event loop time" and do time jump handling.
@@ -525,10 +581,11 @@ Here are the gory details of what C<ev_loop> does:
     - Call all queued watchers in reverse order (i.e. check watchers first).
       Signals and child watchers are implemented as I/O watchers, and will
       be handled here by queueing them when their watcher gets executed.
-   - If ev_unloop has been called or EVLOOP_ONESHOT or EVLOOP_NONBLOCK
-     were used, return, otherwise continue with step *.
+   - If ev_unloop has been called, or EVLOOP_ONESHOT or EVLOOP_NONBLOCK
+     were used, or there are no active watchers, return, otherwise
+     continue with step *.
  
-Example: Queue some jobs and then loop until no events are outsanding
+Example: Queue some jobs and then loop until no events are outstanding
  anymore.
  
     ... queue jobs here, make sure they register event watchers as long
@@ -543,6 +600,8 @@ has processed all outstanding events). The C<how> argument must be either
  C<EVUNLOOP_ONE>, which will make the innermost C<ev_loop> call return, or
  C<EVUNLOOP_ALL>, which will make all nested C<ev_loop> calls return.
  
+This "unloop state" will be cleared when entering C<ev_loop> again.
+
  =item ev_ref (loop)
  
  =item ev_unref (loop)
@@ -556,7 +615,9 @@ example, libev itself uses this for its internal signal pipe: It is not
  visible to the libev user and should not keep C<ev_loop> from exiting if
  no event watchers registered by it are active. It is also an excellent
  way to do this for generic recurring timers or from within third-party
-libraries. Just remember to I<unref after start> and I<ref before stop>.
+libraries. Just remember to I<unref after start> and I<ref before stop>
+(but only if the watcher wasn't active before, or was active before,
+respectively).
  
  Example: Create a signal watcher, but keep it from keeping C<ev_loop>
  running when nothing else is active.
@@ -571,6 +632,42 @@ Example: For some weird reason, unregister the above signal handler again.
    ev_ref (loop);
    ev_signal_stop (loop, &exitsig);
  
+=item ev_set_io_collect_interval (loop, ev_tstamp interval)
+
+=item ev_set_timeout_collect_interval (loop, ev_tstamp interval)
+
+These advanced functions influence the time that libev will spend waiting
+for events. Both are by default C<0>, meaning that libev will try to
+invoke timer/periodic callbacks and I/O callbacks with minimum latency.
+
+Setting these to a higher value (the C<interval> I<must> be >= C<0>)
+allows libev to delay invocation of I/O and timer/periodic callbacks to
+increase efficiency of loop iterations.
+
+The background is that sometimes your program runs just fast enough to
+handle one (or very few) event(s) per loop iteration. While this makes
+the program responsive, it also wastes a lot of CPU time to poll for new
+events, especially with backends like C<select ()> which have a high
+overhead for the actual polling but can deliver many events at once.
+
+By setting a higher I<io collect interval> you allow libev to spend more
+time collecting I/O events, so you can handle more events per iteration,
+at the cost of increasing latency. Timeouts (both C<ev_periodic> and
+C<ev_timer>) will be not affected. Setting this to a non-null value will
+introduce an additional C<ev_sleep ()> call into most loop iterations.
+
+Likewise, by setting a higher I<timeout collect interval> you allow libev
+to spend more time collecting timeouts, at the expense of increased
+latency (the watcher callback will be called later). C<ev_io> watchers
+will not be affected. Setting this to a non-null value will not introduce
+any overhead in libev.
+
+Many (busy) programs can usually benefit by setting the io collect
+interval to a value near C<0.1> or so, which is often enough for
+interactive servers (of course not for games), likewise for timeouts. It
+usually doesn't make much sense to set it to a lower value than C<0.01>,
+as this approsaches the timing granularity of most systems.
+
  =back
  
  
@@ -905,12 +1002,6 @@ fd as you want (as long as you don't confuse yourself). Setting all file
  descriptors to non-blocking mode is also usually a good idea (but not
  required if you know what you are doing).
  
-You have to be careful with dup'ed file descriptors, though. Some backends
-(the linux epoll backend is a notable example) cannot handle dup'ed file
-descriptors correctly if you register interest in two or more fds pointing
-to the same underlying file/socket/etc. description (that is, they share
-the same underlying "file open").
-
  If you must do this, then force the use of a known-to-be-good backend
  (at the time of this writing, this includes only C<EVBACKEND_SELECT> and
  C<EVBACKEND_POLL>).
@@ -951,15 +1042,15 @@ This is how one would do it normally anyway, the important point is that
  the libev application should not optimise around libev but should leave
  optimisations to libev.
  
-=head3 Ths special problem of dup'ed file descriptors
+=head3 The special problem of dup'ed file descriptors
  
  Some backends (e.g. epoll), cannot register events for file descriptors,
-but only events for the underlying file descriptions. That menas when you
-have C<dup ()>'ed file descriptors and register events for them, only one
-file descriptor might actually receive events.
+but only events for the underlying file descriptions. That means when you
+have C<dup ()>'ed file descriptors or weirder constellations, and register
+events for them, only one file descriptor might actually receive events.
  
-There is no workaorund possible except not registering events
-for potentially C<dup ()>'ed file descriptors or to resort to
+There is no workaround possible except not registering events
+for potentially C<dup ()>'ed file descriptors, or to resort to
  C<EVBACKEND_SELECT> or C<EVBACKEND_POLL>.
  
  =head3 The special problem of fork
@@ -996,6 +1087,8 @@ The events being watched.
  
  =back
  
+=head3 Examples
+
  Example: Call C<stdin_readable_cb> when STDIN_FILENO has become, well
  readable, but only once. Since it is likely line-buffered, you could
  attempt to read a whole line in the callback.
@@ -1102,6 +1195,8 @@ which is also when any modifications are taken into account.
  
  =back
  
+=head3 Examples
+
  Example: Create a timer that fires after 60 seconds.
  
    static void
@@ -1268,6 +1363,8 @@ trigger next.
  
  =back
  
+=head3 Examples
+
  Example: Call a callback every hour, or, more precisely, whenever the
  system clock is divisible by 3600. The callback invocation times have
  potentially a lot of jittering, but good long-term stability.
@@ -1369,6 +1466,8 @@ C<waitpid> and C<sys/wait.h> documentation for details).
  
  =back
  
+=head3 Examples
+
  Example: Try to exit cleanly on SIGINT and SIGTERM.
  
    static void
@@ -1418,6 +1517,39 @@ to fall back to regular polling again even with inotify, but changes are
  usually detected immediately, and if the file exists there will be no
  polling.
  
+=head3 Inotify
+
+When C<inotify (7)> support has been compiled into libev (generally only
+available on Linux) and present at runtime, it will be used to speed up
+change detection where possible. The inotify descriptor will be created lazily
+when the first C<ev_stat> watcher is being started.
+
+Inotify presense does not change the semantics of C<ev_stat> watchers
+except that changes might be detected earlier, and in some cases, to avoid
+making regular C<stat> calls. Even in the presense of inotify support
+there are many cases where libev has to resort to regular C<stat> polling.
+
+(There is no support for kqueue, as apparently it cannot be used to
+implement this functionality, due to the requirement of having a file
+descriptor open on the object at all times).
+
+=head3 The special problem of stat time resolution
+
+The C<stat ()> syscall only supports full-second resolution portably, and
+even on systems where the resolution is higher, many filesystems still
+only support whole seconds.
+
+That means that, if the time is the only thing that changes, you might
+miss updates: on the first update, C<ev_stat> detects a change and calls
+your callback, which does something. When there is another update within
+the same second, C<ev_stat> will be unable to detect it.
+
+The solution to this is to delay acting on a change for a second (or till
+the next second boundary), using a roughly one-second delay C<ev_timer>
+(C<ev_timer_set (w, 0., 1.01); ev_timer_again (loop, w)>). The C<.01>
+is added to work around small timing inconsistencies of some operating
+systems.
+
  =head3 Watcher-Specific Functions and Data Members
  
  =over 4
@@ -1465,6 +1597,8 @@ The filesystem path that is being watched.
  
  =back
  
+=head3 Examples
+
  Example: Watch C</etc/passwd> for attribute changes.
  
    static void
@@ -1486,9 +1620,37 @@ Example: Watch C</etc/passwd> for attribute changes.
    ...
    ev_stat passwd;
  
-  ev_stat_init (&passwd, passwd_cb, "/etc/passwd");
+  ev_stat_init (&passwd, passwd_cb, "/etc/passwd", 0.);
    ev_stat_start (loop, &passwd);
  
+Example: Like above, but additionally use a one-second delay so we do not
+miss updates (however, frequent updates will delay processing, too, so
+one might do the work both on C<ev_stat> callback invocation I<and> on
+C<ev_timer> callback invocation).
+
+  static ev_stat passwd;
+  static ev_timer timer;
+
+  static void
+  timer_cb (EV_P_ ev_timer *w, int revents)
+  {
+    ev_timer_stop (EV_A_ w);
+
+    /* now it's one second after the most recent passwd change */
+  }
+
+  static void
+  stat_cb (EV_P_ ev_stat *w, int revents)
+  {
+    /* reset the one-second timer */
+    ev_timer_again (EV_A_ &timer);
+  }
+
+  ...
+  ev_stat_init (&passwd, stat_cb, "/etc/passwd", 0.);
+  ev_stat_start (loop, &passwd);
+  ev_timer_init (&timer, timer_cb, 0., 1.01);
+
  
  =head2 C<ev_idle> - when you've got nothing better to do...
  
@@ -1523,6 +1685,8 @@ believe me.
  
  =back
  
+=head3 Examples
+
  Example: Dynamically allocate an C<ev_idle> watcher, start it, and in the
  callback, free it. Also, use no error checking, as usual.
  
@@ -1583,11 +1747,11 @@ It is recommended to give C<ev_check> watchers highest (C<EV_MAXPRI>)
  priority, to ensure that they are being run before any other watchers
  after the poll. Also, C<ev_check> watchers (and C<ev_prepare> watchers,
  too) should not activate ("feed") events into libev. While libev fully
-supports this, they will be called before other C<ev_check> watchers did
-their job. As C<ev_check> watchers are often used to embed other event
-loops those other event loops might be in an unusable state until their
-C<ev_check> watcher ran (always remind yourself to coexist peacefully with
-others).
+supports this, they will be called before other C<ev_check> watchers
+did their job. As C<ev_check> watchers are often used to embed other
+(non-libev) event loops those other event loops might be in an unusable
+state until their C<ev_check> watcher ran (always remind yourself to
+coexist peacefully with others).
  
  =head3 Watcher-Specific Functions and Data Members
  
@@ -1603,6 +1767,8 @@ macros, but using them is utterly, utterly and completely pointless.
  
  =back
  
+=head3 Examples
+
  There are a number of principal ways to embed other event loops or modules
  into libev. Here are some ideas on how to include libadns into libev
  (there is a Perl module named C<EV::ADNS> that does this, which you could
@@ -1736,7 +1902,7 @@ this.
  This is a rather advanced watcher type that lets you embed one event loop
  into another (currently only C<ev_io> events are supported in the embedded
  loop, other types of watchers might be handled in a delayed or incorrect
-fashion and must not be used). (See portability notes, below).
+fashion and must not be used).
  
  There are primarily two reasons you would want that: work around bugs and
  prioritise I/O.
@@ -1780,42 +1946,7 @@ portable one.
  So when you want to use this feature you will always have to be prepared
  that you cannot get an embeddable loop. The recommended way to get around
  this is to have a separate variables for your embeddable loop, try to
-create it, and if that fails, use the normal loop for everything:
-
-  struct ev_loop *loop_hi = ev_default_init (0);
-  struct ev_loop *loop_lo = 0;
-  struct ev_embed embed;
-  
-  // see if there is a chance of getting one that works
-  // (remember that a flags value of 0 means autodetection)
-  loop_lo = ev_embeddable_backends () & ev_recommended_backends ()
-    ? ev_loop_new (ev_embeddable_backends () & ev_recommended_backends ())
-    : 0;
-
-  // if we got one, then embed it, otherwise default to loop_hi
-  if (loop_lo)
-    {
-      ev_embed_init (&embed, 0, loop_lo);
-      ev_embed_start (loop_hi, &embed);
-    }
-  else
-    loop_lo = loop_hi;
-
-=head2 Portability notes
-
-Kqueue is nominally embeddable, but this is broken on all BSDs that I
-tried, in various ways. Usually the embedded event loop will simply never
-receive events, sometimes it will only trigger a few times, sometimes in a
-loop. Epoll is also nominally embeddable, but many Linux kernel versions
-will always eport the epoll fd as ready, even when no events are pending.
-
-While libev allows embedding these backends (they are contained in
-C<ev_embeddable_backends ()>), take extreme care that it will actually
-work.
-
-When in doubt, create a dynamic event loop forced to use sockets (this
-usually works) and possibly another thread and a pipe or so to report to
-your main event loop.
+create it, and if that fails, use the normal loop for everything.
  
  =head3 Watcher-Specific Functions and Data Members
  
@@ -1843,6 +1974,54 @@ The embedded event loop.
  
  =back
  
+=head3 Examples
+
+Example: Try to get an embeddable event loop and embed it into the default
+event loop. If that is not possible, use the default loop. The default
+loop is stored in C<loop_hi>, while the mebeddable loop is stored in
+C<loop_lo> (which is C<loop_hi> in the acse no embeddable loop can be
+used).
+
+  struct ev_loop *loop_hi = ev_default_init (0);
+  struct ev_loop *loop_lo = 0;
+  struct ev_embed embed;
+  
+  // see if there is a chance of getting one that works
+  // (remember that a flags value of 0 means autodetection)
+  loop_lo = ev_embeddable_backends () & ev_recommended_backends ()
+    ? ev_loop_new (ev_embeddable_backends () & ev_recommended_backends ())
+    : 0;
+
+  // if we got one, then embed it, otherwise default to loop_hi
+  if (loop_lo)
+    {
+      ev_embed_init (&embed, 0, loop_lo);
+      ev_embed_start (loop_hi, &embed);
+    }
+  else
+    loop_lo = loop_hi;
+
+Example: Check if kqueue is available but not recommended and create
+a kqueue backend for use with sockets (which usually work with any
+kqueue implementation). Store the kqueue/socket-only event loop in
+C<loop_socket>. (One might optionally use C<EVFLAG_NOENV>, too).
+
+  struct ev_loop *loop = ev_default_init (0);
+  struct ev_loop *loop_socket = 0;
+  struct ev_embed embed;
+  
+  if (ev_supported_backends () & ~ev_recommended_backends () & EVBACKEND_KQUEUE)
+    if ((loop_socket = ev_loop_new (EVBACKEND_KQUEUE))
+      {
+        ev_embed_init (&embed, 0, loop_socket);
+        ev_embed_start (loop, &embed);
+      }
+
+  if (!loop_socket)
+    loop_socket = loop;
+
+  // now use loop_socket for all sockets, and loop for everything else
+
  
  =head2 C<ev_fork> - the audacity to resume the event loop after a fork
  
@@ -2299,6 +2478,11 @@ be attempted. This effectively replaces C<gettimeofday> by C<clock_get
  (CLOCK_REALTIME, ...)> and will not normally affect correctness. See the
  note about libraries in the description of C<EV_USE_MONOTONIC>, though.
  
+=item EV_USE_NANOSLEEP
+
+If defined to be C<1>, libev will assume that C<nanosleep ()> is available
+and will use it for delays. Otherwise it will use C<select ()>.
+
  =item EV_USE_SELECT
  
  If undefined or defined to be C<1>, libev will compile in support for the
@@ -2326,6 +2510,14 @@ C<_get_osfhandle> on the fd to convert it to an OS handle. Otherwise,
  it is assumed that all these functions actually work on fds, even
  on win32. Should not be defined on non-win32 platforms.
  
+=item EV_FD_TO_WIN32_HANDLE
+
+If C<EV_SELECT_IS_WINSOCKET> is enabled, then libev needs a way to map
+file descriptors to socket handles. When not defining this symbol (the
+default), then libev will call C<_get_osfhandle>, which is usually
+correct. In some cases, programs use their own file descriptor management,
+in which case they can provide this function to map fds to socket handles.
+
  =item EV_USE_POLL
  
  If defined to be C<1>, libev will compile in support for the C<poll>(2)
@@ -2371,8 +2563,8 @@ be detected at runtime.
  =item EV_H
  
  The name of the F<ev.h> header file used to include it. The default if
-undefined is C<< <ev.h> >> in F<event.h> and C<"ev.h"> in F<ev.c>. This
-can be used to virtually rename the F<ev.h> header file in case of conflicts.
+undefined is C<"ev.h"> in F<event.h>, F<ev.c> and F<ev++.h>. This can be
+used to virtually rename the F<ev.h> header file in case of conflicts.
  
  =item EV_CONFIG_H
  
@@ -2383,7 +2575,7 @@ C<EV_H>, above.
  =item EV_EVENT_H
  
  Similarly to C<EV_H>, this macro can be used to override F<event.c>'s idea
-of how the F<event.h> header can be found.
+of how the F<event.h> header can be found, the default is C<"event.h">.
  
  =item EV_PROTOTYPES
  
@@ -2459,7 +2651,7 @@ increase this value (I<must> be a power of two).
  
  =item EV_INOTIFY_HASHSIZE
  
-C<ev_staz> watchers use a small hash table to distribute workload by
+C<ev_stat> watchers use a small hash table to distribute workload by
  inotify watch id. The default size is C<16> (or C<1> with C<EV_MINIMAL>),
  usually more than enough. If you need to manage thousands of C<ev_stat>
  watchers you might want to increase this value (I<must> be a power of
@@ -2565,16 +2757,17 @@ it is much faster and asymptotically approaches constant time.
  
  This means that, when you have a watcher that triggers in one hour and
  there are 100 watchers that would trigger before that then inserting will
-have to skip those 100 watchers.
+have to skip roughly seven (C<ld 100>) of these watchers.
  
-=item Changing timer/periodic watchers (by autorepeat, again): O(log skipped_other_timers)
+=item Changing timer/periodic watchers (by autorepeat or calling again): O(log skipped_other_timers)
  
-That means that for changing a timer costs less than removing/adding them
+That means that changing a timer costs less than removing/adding them
  as only the relative motion in the event queue has to be paid for.
  
  =item Starting io/check/prepare/idle/signal/child watchers: O(1)
  
  These just add the watcher into an array or at the head of a list.
+
  =item Stopping check/prepare/idle watchers: O(1)
  
  =item Stopping an io/signal/child watcher: O(number_of_watchers_for_this_(fd/signal/pid % EV_PID_HASHSIZE))
@@ -2583,20 +2776,92 @@ These watchers are stored in lists then need to be walked to find the
  correct watcher to remove. The lists are usually short (you don't usually
  have many watchers waiting for the same fd or signal).
  
-=item Finding the next timer per loop iteration: O(1)
+=item Finding the next timer in each loop iteration: O(1)
+
+By virtue of using a binary heap, the next timer is always found at the
+beginning of the storage array.
  
  =item Each change on a file descriptor per loop iteration: O(number_of_watchers_for_this_fd)
  
  A change means an I/O watcher gets started or stopped, which requires
-libev to recalculate its status (and possibly tell the kernel).
+libev to recalculate its status (and possibly tell the kernel, depending
+on backend and wether C<ev_io_set> was used).
  
-=item Activating one watcher: O(1)
+=item Activating one watcher (putting it into the pending state): O(1)
  
  =item Priority handling: O(number_of_priorities)
  
  Priorities are implemented by allocating some space for each
  priority. When doing priority-based operations, libev usually has to
-linearly search all the priorities.
+linearly search all the priorities, but starting/stopping and activating
+watchers becomes O(1) w.r.t. prioritiy handling.
+
+=back
+
+
+=head1 Win32 platform limitations and workarounds
+
+Win32 doesn't support any of the standards (e.g. POSIX) that libev
+requires, and its I/O model is fundamentally incompatible with the POSIX
+model. Libev still offers limited functionality on this platform in
+the form of the C<EVBACKEND_SELECT> backend, and only supports socket
+descriptors. This only applies when using Win32 natively, not when using
+e.g. cygwin.
+
+There is no supported compilation method available on windows except
+embedding it into other applications.
+
+Due to the many, low, and arbitrary limits on the win32 platform and the
+abysmal performance of winsockets, using a large number of sockets is not
+recommended (and not reasonable). If your program needs to use more than
+a hundred or so sockets, then likely it needs to use a totally different
+implementation for windows, as libev offers the POSIX model, which cannot
+be implemented efficiently on windows (microsoft monopoly games).
+
+=over 4
+
+=item The winsocket select function
+
+The winsocket C<select> function doesn't follow POSIX in that it requires
+socket I<handles> and not socket I<file descriptors>. This makes select
+very inefficient, and also requires a mapping from file descriptors
+to socket handles. See the discussion of the C<EV_SELECT_USE_FD_SET>,
+C<EV_SELECT_IS_WINSOCKET> and C<EV_FD_TO_WIN32_HANDLE> preprocessor
+symbols for more info.
+
+The configuration for a "naked" win32 using the microsoft runtime
+libraries and raw winsocket select is:
+
+  #define EV_USE_SELECT 1
+  #define EV_SELECT_IS_WINSOCKET 1   /* forces EV_SELECT_USE_FD_SET, too */
+
+Note that winsockets handling of fd sets is O(n), so you can easily get a
+complexity in the O(n²) range when using win32.
+
+=item Limited number of file descriptors
+
+Windows has numerous arbitrary (and low) limits on things. Early versions
+of winsocket's select only supported waiting for a max. of C<64> handles
+(probably owning to the fact that all windows kernels can only wait for
+C<64> things at the same time internally; microsoft recommends spawning a
+chain of threads and wait for 63 handles and the previous thread in each).
+
+Newer versions support more handles, but you need to define C<FD_SETSIZE>
+to some high number (e.g. C<2048>) before compiling the winsocket select
+call (which might be in libev or elsewhere, for example, perl does its own
+select emulation on windows).
+
+Another limit is the number of file descriptors in the microsoft runtime
+libraries, which by default is C<64> (there must be a hidden I<64> fetish
+or something like this inside microsoft). You can increase this by calling
+C<_setmaxstdio>, which can increase this limit to C<2048> (another
+arbitrary limit), but is broken in many versions of the microsoft runtime
+libraries.
+
+This might get you to about C<512> or C<2048> sockets (depending on
+windows version and/or the phase of the moon). To get more, you need to
+wrap all I/O functions and provide your own fd management, but the cost of
+calling select (O(n²)) will likely make this unworkable.
  
  =back