screenshooter: Grab pixel data directly before buffer swap
diff --git a/src/compositor-drm.c b/src/compositor-drm.c
index 837da8c..dcd5e99 100644
--- a/src/compositor-drm.c
+++ b/src/compositor-drm.c
@@ -215,6 +215,8 @@
 	wl_list_for_each_reverse(surface, &compositor->base.surface_list, link)
 		weston_surface_draw(surface, &output->base, damage);
 
+	weston_output_do_read_pixels(&output->base);
+
 	eglSwapBuffers(compositor->base.display, output->egl_surface);
 	output->next_bo = gbm_surface_lock_front_buffer(output->surface);
 	if (!output->next_bo) {
@@ -1190,21 +1192,6 @@
 	drmModeFreeConnector(connector);
 }
 
-static void
-drm_output_read_pixels(struct weston_output *output_base, void *data)
-{
-	struct drm_output *output = (struct drm_output *) output_base;
-	struct drm_compositor *compositor =
-		(struct drm_compositor *) output->base.compositor;
-
-	eglMakeCurrent(compositor->base.display, output->egl_surface,
-			output->egl_surface, compositor->base.context);
-
-	glReadPixels(0, 0, output_base->current->width,
-		     output_base->current->height,
-		     compositor->base.read_format, GL_UNSIGNED_BYTE, data);
-}
-
 static int
 create_output_for_connector(struct drm_compositor *ec,
 			    drmModeRes *resources,
@@ -1313,7 +1300,6 @@
 	output->base.repaint = drm_output_repaint;
 	output->base.destroy = drm_output_destroy;
 	output->base.assign_planes = drm_assign_planes;
-	output->base.read_pixels = drm_output_read_pixels;
 	output->base.set_dpms = drm_set_dpms;
 	output->base.switch_mode = drm_output_switch_mode;
 
diff --git a/src/compositor-wayland.c b/src/compositor-wayland.c
index afd9121..3260c8e 100644
--- a/src/compositor-wayland.c
+++ b/src/compositor-wayland.c
@@ -357,6 +357,8 @@
 
 	draw_border(output);
 
+	weston_output_do_read_pixels(&output->base);
+
 	eglSwapBuffers(compositor->base.display, output->egl_surface);
 	callback = wl_surface_frame(output->parent.surface);
 	wl_callback_add_listener(callback, &frame_listener, output);
@@ -377,21 +379,6 @@
 	return;
 }
 
-static void
-wayland_output_read_pixels(struct weston_output *output_base, void *data)
-{
-	struct wayland_output *output = (struct wayland_output *) output_base;
-	struct wayland_compositor *compositor =
-		(struct wayland_compositor *) output->base.compositor;
-
-	eglMakeCurrent(compositor->base.display, output->egl_surface,
-			output->egl_surface, compositor->base.context);
-
-	glReadPixels(0, 0, output_base->current->width,
-		     output_base->current->height,
-		     compositor->base.read_format, GL_UNSIGNED_BYTE, data);
-}
-
 static int
 wayland_compositor_create_output(struct wayland_compositor *c,
 				 int width, int height)
@@ -460,7 +447,6 @@
 	output->base.repaint = wayland_output_repaint;
 	output->base.destroy = wayland_output_destroy;
 	output->base.assign_planes = NULL;
-	output->base.read_pixels = wayland_output_read_pixels;
 	output->base.set_backlight = NULL;
 	output->base.set_dpms = NULL;
 	output->base.switch_mode = NULL;
diff --git a/src/compositor-x11.c b/src/compositor-x11.c
index b910831..6d9bb0e 100644
--- a/src/compositor-x11.c
+++ b/src/compositor-x11.c
@@ -212,6 +212,8 @@
 	wl_list_for_each_reverse(surface, &compositor->base.surface_list, link)
 		weston_surface_draw(surface, &output->base, damage);
 
+	weston_output_do_read_pixels(&output->base);
+
 	eglSwapBuffers(compositor->base.display, output->egl_surface);
 
 	wl_event_source_timer_update(output->finish_frame_timer, 10);
@@ -344,21 +346,6 @@
 	pixman_image_unref(image);
 }
 
-static void
-x11_output_read_pixels(struct weston_output *output_base, void *data)
-{
-	struct x11_output *output = (struct x11_output *) output_base;
-	struct x11_compositor *compositor =
-		(struct x11_compositor *) output->base.compositor;
-
-	eglMakeCurrent(compositor->base.display, output->egl_surface,
-			output->egl_surface, compositor->base.context);
-
-	glReadPixels(0, 0, output_base->current->width,
-		     output_base->current->height,
-		     compositor->base.read_format, GL_UNSIGNED_BYTE, data);
-}
-
 static int
 x11_compositor_create_output(struct x11_compositor *c, int x, int y,
 			     int width, int height, int fullscreen)
@@ -470,7 +457,6 @@
 	output->base.repaint = x11_output_repaint;
 	output->base.destroy = x11_output_destroy;
 	output->base.assign_planes = NULL;
-	output->base.read_pixels = x11_output_read_pixels;
 	output->base.set_backlight = NULL;
 	output->base.set_dpms = NULL;
 	output->base.switch_mode = NULL;
diff --git a/src/compositor.c b/src/compositor.c
index ddbc077..2b46fbc 100644
--- a/src/compositor.c
+++ b/src/compositor.c
@@ -2389,6 +2389,7 @@
 	output->mm_width = width;
 	output->mm_height = height;
 	output->dirty = 1;
+	wl_list_init(&output->read_pixels_list);
 
 	output->zoom.active = 0;
 	output->zoom.increment = 0.05;
@@ -2412,6 +2413,20 @@
 				      output, bind_output);
 }
 
+WL_EXPORT void
+weston_output_do_read_pixels(struct weston_output *output)
+{
+	struct weston_read_pixels *r, *next;
+
+	glPixelStorei(GL_PACK_ALIGNMENT, 1);
+	wl_list_for_each_safe(r, next, &output->read_pixels_list, link) {
+		glReadPixels(r->x, r->y, r->width, r->height,
+			     output->compositor->read_format,
+			     GL_UNSIGNED_BYTE, r->data);
+		r->done(r, output);
+	}
+}
+
 static void
 compositor_bind(struct wl_client *client,
 		void *data, uint32_t version, uint32_t id)
diff --git a/src/compositor.h b/src/compositor.h
index 648b045..59494ad 100644
--- a/src/compositor.h
+++ b/src/compositor.h
@@ -44,6 +44,7 @@
 
 struct weston_surface;
 struct weston_input_device;
+struct weston_output;
 
 struct weston_mode {
 	uint32_t flags;
@@ -72,6 +73,14 @@
 	WESTON_DPMS_OFF
 };
 
+struct weston_read_pixels {
+	void *data;
+	int x, y, width, height;
+	void (*done)(struct weston_read_pixels *read_pixels,
+		     struct weston_output *output);
+	struct wl_list link;
+};
+
 struct weston_output {
 	uint32_t id;
 
@@ -90,6 +99,7 @@
 	int repaint_scheduled;
 	struct weston_output_zoom zoom;
 	int dirty;
+	struct wl_list read_pixels_list;
 
 	char *make, *model;
 	uint32_t subpixel;
@@ -102,7 +112,6 @@
 			pixman_region32_t *damage);
 	void (*destroy)(struct weston_output *output);
 	void (*assign_planes)(struct weston_output *output);
-	void (*read_pixels)(struct weston_output *output, void *data);
 	int (*switch_mode)(struct weston_output *output, struct weston_mode *mode);
 
 	/* backlight values are on 0-255 range, where higher is brighter */
@@ -424,6 +433,8 @@
 void
 weston_output_damage(struct weston_output *output);
 void
+weston_output_do_read_pixels(struct weston_output *output);
+void
 weston_compositor_repick(struct weston_compositor *compositor);
 void
 weston_compositor_schedule_repaint(struct weston_compositor *compositor);
diff --git a/src/screenshooter.c b/src/screenshooter.c
index b4b341b..7ba475d 100644
--- a/src/screenshooter.c
+++ b/src/screenshooter.c
@@ -36,6 +36,12 @@
 	struct wl_listener destroy_listener;
 };
 
+struct screenshooter_read_pixels {
+	struct weston_read_pixels base;
+	struct wl_buffer *buffer;
+	struct wl_resource *resource;
+};
+
 static void
 copy_bgra_yflip(uint8_t *dst, uint8_t *src, int height,
 		int dst_stride, int src_stride)
@@ -82,37 +88,20 @@
 }
 
 static void
-screenshooter_shoot(struct wl_client *client,
-		    struct wl_resource *resource,
-		    struct wl_resource *output_resource,
-		    struct wl_resource *buffer_resource)
+screenshooter_read_pixels_done(struct weston_read_pixels *base,
+			       struct weston_output *output)
 {
-	struct weston_output *output = output_resource->data;
-	struct wl_buffer *buffer = buffer_resource->data;
-	uint8_t *tmp, *d, *s;
+	struct screenshooter_read_pixels *r =
+		(struct screenshooter_read_pixels *) base;
 	int32_t buffer_stride, output_stride;
+	uint8_t *d, *s;
 
-	if (!wl_buffer_is_shm(buffer))
-		return;
-
-	if (buffer->width < output->current->width ||
-	    buffer->height < output->current->height)
-		return;
-
-	buffer_stride = wl_shm_buffer_get_stride(buffer);
+	buffer_stride = wl_shm_buffer_get_stride(r->buffer);
 	output_stride = output->current->width * 4;
-	tmp = malloc(output_stride * output->current->height);
-	if (tmp == NULL) {
-		wl_resource_post_no_memory(resource);
-		return;
-	}
 
-	glPixelStorei(GL_PACK_ALIGNMENT, 1);
-	output->read_pixels(output, tmp);
-
-	d = wl_shm_buffer_get_data(buffer) + output->y * buffer_stride +
-							output->x * 4;
-	s = tmp + output_stride * (output->current->height - 1);
+	d = wl_shm_buffer_get_data(r->buffer) +
+		output->y * buffer_stride + output->x * 4;
+	s = r->base.data + output_stride * (output->current->height - 1);
 
 	switch (output->compositor->read_format) {
 	case GL_BGRA_EXT:
@@ -127,7 +116,56 @@
 		break;
 	}
 
-	free(tmp);
+	wl_list_remove(&r->base.link);
+
+	screenshooter_send_done(r->resource);
+	free(r->base.data);
+	free(r);
+
+}
+
+static void
+screenshooter_shoot(struct wl_client *client,
+		    struct wl_resource *resource,
+		    struct wl_resource *output_resource,
+		    struct wl_resource *buffer_resource)
+{
+	struct weston_output *output = output_resource->data;
+	struct screenshooter_read_pixels *r;
+	struct wl_buffer *buffer = buffer_resource->data;
+	int32_t stride;
+
+	if (!wl_buffer_is_shm(buffer))
+		return;
+
+	if (buffer->width < output->current->width ||
+	    buffer->height < output->current->height)
+		return;
+
+	r = malloc(sizeof *r);
+	if (r == NULL) {
+		wl_resource_post_no_memory(resource);
+		return;
+	}
+
+	r->base.x = 0;
+	r->base.y = 0;
+	r->base.width = output->current->width;
+	r->base.height = output->current->height;
+	r->base.done = screenshooter_read_pixels_done;
+	r->buffer = buffer;
+	r->resource = resource;
+	stride = buffer->width * 4;
+	r->base.data = malloc(stride * buffer->height);
+
+	if (r->base.data == NULL) {
+		free(r);
+		wl_resource_post_no_memory(resource);
+		return;
+	}
+
+	wl_list_insert(output->read_pixels_list.prev, &r->base.link);
+	weston_compositor_schedule_repaint(output->compositor);
 }
 
 struct screenshooter_interface screenshooter_implementation = {