There are many ways to scale an image. One can use provided libraries, or implement scaling via OpenGL pipeline. Latter choice looks preferred, as it uses hardware part which is desined to do suck tasks efficiently. But how fast OpenGL is? I've got following results on Celeron P4500 (it has integrated Ironlake video chip):
Here in gl1 case I create and destroy textures on every scale while in gl2 case I create two large textures once, and then use them as upload targets. Last two columns represent case where no copying data back performed. As OpenGL is tailored for output, retrieving data back can be slow. And you see here, how slow it can be. Around 10 times slower than doing scale on CPU! But when you don't need data back (i.e. you are going to display scaled image on screen, or perform any additional processing), it can be dramatically faster.
Source image was 600x372. w and h columns represent width and height of destination. Hardware: Celeron P4500 with integrated video chip.
Here is the source of scaling.c:
CMakeLists.txt:
w h soft gl1 gl2 gl1nocopy gl2nocopy 256; 256;0.002582;0.005684;0.007081;0.000657;0.002122 256; 512;0.002958;0.009531;0.011303;0.000887;0.002118 256; 768;0.003201;0.013818;0.015991;0.000859;0.002552 256;1024;0.003485;0.018340;0.019503;0.001634;0.002126 512; 256;0.003370;0.009567;0.011041;0.001214;0.002147 512; 512;0.003848;0.019100;0.019351;0.001630;0.002103 512; 768;0.004408;0.027939;0.028123;0.001745;0.002439 512;1024;0.004896;0.036727;0.035284;0.002099;0.002110 768; 256;0.004105;0.014059;0.015718;0.000943;0.002477 768; 512;0.004929;0.027467;0.027748;0.001842;0.002419 768; 768;0.005623;0.041481;0.040696;0.002376;0.002828 768;1024;0.006465;0.056259;0.051331;0.002579;0.002218 1024; 256;0.004955;0.019852;0.019108;0.001627;0.002216 1024; 512;0.006256;0.037294;0.035924;0.002014;0.002284 1024; 768;0.006949;0.055599;0.052430;0.002492;0.002436 1024;1024;0.007896;0.073875;0.067410;0.002786;0.002413 1280; 256;0.005839;0.023752;0.024084;0.001527;0.002734 1280; 512;0.006979;0.045716;0.043938;0.002335;0.002552 1280; 768;0.008185;0.069451;0.063058;0.002651;0.002615 1280;1024;0.009488;0.093946;0.084176;0.003533;0.002774 1536; 256;0.006622;0.029007;0.028052;0.001837;0.002686 1536; 512;0.008031;0.055607;0.051383;0.002668;0.002287 1536; 768;0.009423;0.084270;0.076563;0.003294;0.002679 1536;1024;0.010719;0.114334;0.100322;0.003849;0.003139 1792; 256;0.007528;0.033786;0.032293;0.001866;0.002958 1792; 512;0.009060;0.064492;0.059214;0.002730;0.002416 1792; 768;0.010708;0.097901;0.087288;0.003630;0.002867 1792;1024;0.012345;0.131178;0.117498;0.003919;0.004337 2048; 256;0.008322;0.038345;0.035222;0.001984;0.002280 2048; 512;0.010135;0.074094;0.067603;0.002736;0.002468 2048; 768;0.011943;0.111330;0.101121;0.003874;0.003175 2048;1024;0.013746;0.149898;0.134717;0.004516;0.004226
Here in gl1 case I create and destroy textures on every scale while in gl2 case I create two large textures once, and then use them as upload targets. Last two columns represent case where no copying data back performed. As OpenGL is tailored for output, retrieving data back can be slow. And you see here, how slow it can be. Around 10 times slower than doing scale on CPU! But when you don't need data back (i.e. you are going to display scaled image on screen, or perform any additional processing), it can be dramatically faster.
Source image was 600x372. w and h columns represent width and height of destination. Hardware: Celeron P4500 with integrated video chip.
Here is the source of scaling.c:
#define _POSIX_C_SOURCE 199309L #define GL_GLEXT_PROTOTYPES #include <libswscale/swscale.h> #include <cairo/cairo.h> #include <time.h> #include <GL/gl.h> #include <GL/glext.h> #include <GL/glx.h> #include <X11/Xlib.h> int max_w = 2048; int max_h = 1024; int reps_count = 100; int step = 256; void scale_image_libswscale(void *src_buf, int src_w, int src_h, int src_pitch, void *dst_buf, int dst_w, int dst_h, int dst_pitch) { struct SwsContext *sws_ctx = sws_getContext(src_w, src_h, PIX_FMT_BGRA, dst_w, dst_h, PIX_FMT_BGRA, SWS_FAST_BILINEAR, NULL, NULL, NULL); uint8_t const * const src[] = {src_buf, 0, 0, 0}; int src_stride[] = {src_pitch, 0, 0, 0}; uint8_t * const dst[] = {dst_buf, 0, 0, 0}; int dst_stride[] = {dst_pitch, 0, 0, 0}; sws_scale(sws_ctx, src, src_stride, 0, src_h, dst, dst_stride); sws_freeContext(sws_ctx); } void scale_image_gl(void *src_buf, int src_w, int src_h, int src_pitch, void *dst_buf, int dst_w, int dst_h, int dst_pitch, int nocopy) { GLuint tex[2]; glGenTextures(2, tex); glBindTexture(GL_TEXTURE_2D, tex[0]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, dst_w, dst_h, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex[0], 0); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0, dst_w-1, 0, dst_h-1, -1.0, 1.0); glViewport(0, 0, dst_w, dst_h); glEnable(GL_TEXTURE_2D); glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch/4); glBindTexture(GL_TEXTURE_2D, tex[1]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_w, src_h, 0, GL_BGRA, GL_UNSIGNED_BYTE, src_buf); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glBegin(GL_QUADS); glTexCoord2f(0.0f, 0.0f); glVertex2i(0, 0); glTexCoord2f(1.0f, 0.0f); glVertex2i(dst_w - 1, 0); glTexCoord2f(1.0f, 1.0f); glVertex2i(dst_w - 1, dst_h - 1); glTexCoord2f(0.0f, 1.0f); glVertex2i(0, dst_h - 1); glEnd(); glBindTexture(GL_TEXTURE_2D, tex[0]); if (!nocopy) glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, dst_buf); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glDeleteTextures(2, tex); } void scale_image_gl2(void *src_buf, int src_w, int src_h, int src_pitch, void *dst_buf, int dst_w, int dst_h, int dst_pitch, int nocopy, GLuint tex_src) { glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0, dst_w-1, 0, dst_h-1, -1.0, 1.0); glViewport(0, 0, dst_w, dst_h); glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, tex_src); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, src_w, src_h, GL_BGRA, GL_UNSIGNED_BYTE, src_buf); glBegin(GL_QUADS); glTexCoord2f(0.0f, 0.0f); glVertex2i(0, 0); glTexCoord2f(1.0f, 0.0f); glVertex2i(dst_w - 1, 0); glTexCoord2f(1.0f, 1.0f); glVertex2i(dst_w - 1, dst_h - 1); glTexCoord2f(0.0f, 1.0f); glVertex2i(0, dst_h - 1); glEnd(); if (!nocopy) { glPixelStorei(GL_UNPACK_ROW_LENGTH, dst_pitch/4); glReadPixels(0, 0, dst_w, dst_h, GL_BGRA, GL_UNSIGNED_BYTE, dst_buf); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } } int main(void) { struct timespec t_begin, t_end; double interval_sws, interval_gl1[2], interval_gl2[2]; cairo_surface_t *img_surf = cairo_image_surface_create_from_png("kuzina.png"); if (CAIRO_STATUS_SUCCESS != cairo_surface_status(img_surf)) { printf("image load failuren"); exit(1); } void *src_buf = cairo_image_surface_get_data(img_surf); int src_w = cairo_image_surface_get_width(img_surf); int src_h = cairo_image_surface_get_height(img_surf); int src_stride = cairo_image_surface_get_stride(img_surf); void *dst_buf = malloc(max_w*max_h*4); if (NULL == dst_buf) { printf("malloc failedn"); exit(1); } Display *display = XOpenDisplay(NULL); GLint att[] = {GLX_RGBA, GLX_DEPTH_SIZE, 24, GLX_DOUBLEBUFFER, None}; XVisualInfo *vi = glXChooseVisual(display, 0, att); GLXContext glc = glXCreateContext(display, vi, NULL, GL_TRUE); glXMakeCurrent(display, DefaultRootWindow(display), glc); GLuint fbo_id[2]; glGenFramebuffers(2, fbo_id); glBindFramebuffer(GL_FRAMEBUFFER, fbo_id[0]); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glBindFramebuffer(GL_FRAMEBUFFER, fbo_id[1]); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); GLuint tex[2]; glGenTextures(2, tex); glBindTexture(GL_TEXTURE_2D, tex[0]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, max_w, max_h, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex[0], 0); glBindTexture(GL_TEXTURE_2D, tex[1]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, max_w, max_h, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); for (int dst_w = step; dst_w <= max_w; dst_w += step) { for (int dst_h = step; dst_h <= max_h; dst_h += step) { // swscale clock_gettime(CLOCK_MONOTONIC_RAW, &t_begin); for (int k = 0; k < reps_count; k ++) { scale_image_libswscale(src_buf, src_w, src_h, src_stride, dst_buf, dst_w, dst_h, 4*dst_w); } clock_gettime(CLOCK_MONOTONIC_RAW, &t_end); interval_sws = (t_end.tv_nsec - t_begin.tv_nsec)/1e9 + (t_end.tv_sec - t_begin.tv_sec); for (int nocopy = 0; nocopy < 2; nocopy ++) { // gl scale clock_gettime(CLOCK_MONOTONIC_RAW, &t_begin); glBindFramebuffer(GL_FRAMEBUFFER, fbo_id[0]); for (int k = 0; k < reps_count; k ++) { scale_image_gl(src_buf, src_w, src_h, src_stride, dst_buf, dst_w, dst_h, 4*dst_w, nocopy); } clock_gettime(CLOCK_MONOTONIC_RAW, &t_end); interval_gl1[nocopy] = (t_end.tv_nsec - t_begin.tv_nsec)/1e9 + (t_end.tv_sec - t_begin.tv_sec); // gl scale 2 clock_gettime(CLOCK_MONOTONIC_RAW, &t_begin); glBindFramebuffer(GL_FRAMEBUFFER, fbo_id[1]); for (int k = 0; k < reps_count; k ++) { scale_image_gl2(src_buf, src_w, src_h, src_stride, dst_buf, dst_w, dst_h, 4*dst_w, nocopy, tex[1]); } clock_gettime(CLOCK_MONOTONIC_RAW, &t_end); interval_gl2[nocopy] = (t_end.tv_nsec - t_begin.tv_nsec)/1e9 + (t_end.tv_sec - t_begin.tv_sec); } printf("%4d;%4d;%8f;%8f;%8f;%8f;%8fn", dst_w, dst_h, interval_sws/reps_count, interval_gl1[0]/reps_count, interval_gl2[0]/reps_count, interval_gl1[1]/reps_count, interval_gl2[1]/reps_count); } } free(dst_buf); cairo_surface_destroy(img_surf); }
CMakeLists.txt:
cmake_minimum_required(VERSION 2.8) add_definitions(-std=gnu99 -Wall -Wextra) find_package(X11) find_package(PkgConfig REQUIRED) pkg_check_modules(QW cairo gl glu libswscale REQUIRED) link_directories(${QW_LIBRARY_DIRS} ${X11_LIBRARY_DIRS}) include_directories(${QW_LIBRARY_DIRS} ${X11_INCLUDE_DIRS}) add_executable(sc scaling.c) target_link_libraries(sc ${QW_LIBRARIES} rt ${X11_LIBRARIES})
Комментариев нет:
Отправить комментарий