libjpeg update

git-svn-id: http://svn.miranda-ng.org/main/trunk@5843 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
author: Kirill Volinsky <mataes2007@gmail.com> 2013-08-26 14:14:54 +0000
committer: Kirill Volinsky <mataes2007@gmail.com> 2013-08-26 14:14:54 +0000
commit: 4906fb47f340bea7d2ba551364d1a5d8d0473861 (patch)
tree: 1ca2cbed515675150be45f06f0cbb779683ca626
parent: af85ed40e26a345cc15ba7f310c92a62116865d1 (diff)
73 files changed, 37901 insertions, 37477 deletions
diff --git a/plugins/AdvaImg/AdvaImg_10.vcxproj b/plugins/AdvaImg/AdvaImg_10.vcxproj
index 175902f338..510a11755d 100644
--- a/plugins/AdvaImg/AdvaImg_10.vcxproj
+++ b/plugins/AdvaImg/AdvaImg_10.vcxproj
@@ -335,7 +335,7 @@
     <ClInclude Include="src\LibJPEG\transupp.h" />
     <ClInclude Include="src\LibPNG\png.h" />
     <ClInclude Include="src\LibPNG\pngconf.h" />
-    <ClInclude Include="src\CacheFile.h" />
+    <ClInclude Include="src\FreeImage\CacheFile.h" />
     <ClInclude Include="src\LibPNG\pngdebug.h" />
     <ClInclude Include="src\LibPNG\pnginfo.h" />
     <ClInclude Include="src\LibPNG\pnglibconf.h" />
diff --git a/plugins/AdvaImg/AdvaImg_10.vcxproj.filters b/plugins/AdvaImg/AdvaImg_10.vcxproj.filters
index a8736aaefd..535b33b4bc 100644
--- a/plugins/AdvaImg/AdvaImg_10.vcxproj.filters
+++ b/plugins/AdvaImg/AdvaImg_10.vcxproj.filters
@@ -495,7 +495,7 @@
     <ClInclude Include="src\LibPNG\pngconf.h">
       <Filter>Source Files\LibPNG\Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="src\CacheFile.h">
+    <ClInclude Include="src\FreeImage\CacheFile.h">
       <Filter>Source Files\LibJPEG\Header Files</Filter>
     </ClInclude>
     <ClInclude Include="src\Metadata\FIRational.h">
diff --git a/plugins/AdvaImg/AdvaImg_11.vcxproj b/plugins/AdvaImg/AdvaImg_11.vcxproj
index 70bb9f44db..ce005b5160 100644
--- a/plugins/AdvaImg/AdvaImg_11.vcxproj
+++ b/plugins/AdvaImg/AdvaImg_11.vcxproj
@@ -338,7 +338,7 @@
     <ClInclude Include="src\LibJPEG\transupp.h" />
     <ClInclude Include="src\LibPNG\png.h" />
     <ClInclude Include="src\LibPNG\pngconf.h" />
-    <ClInclude Include="src\CacheFile.h" />
+    <ClInclude Include="src\FreeImage\CacheFile.h" />
     <ClInclude Include="src\LibPNG\pngdebug.h" />
     <ClInclude Include="src\LibPNG\pnginfo.h" />
     <ClInclude Include="src\LibPNG\pnglibconf.h" />
diff --git a/plugins/AdvaImg/AdvaImg_11.vcxproj.filters b/plugins/AdvaImg/AdvaImg_11.vcxproj.filters
index a8736aaefd..535b33b4bc 100644
--- a/plugins/AdvaImg/AdvaImg_11.vcxproj.filters
+++ b/plugins/AdvaImg/AdvaImg_11.vcxproj.filters
@@ -495,7 +495,7 @@
     <ClInclude Include="src\LibPNG\pngconf.h">
       <Filter>Source Files\LibPNG\Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="src\CacheFile.h">
+    <ClInclude Include="src\FreeImage\CacheFile.h">
       <Filter>Source Files\LibJPEG\Header Files</Filter>
     </ClInclude>
     <ClInclude Include="src\Metadata\FIRational.h">
diff --git a/plugins/AdvaImg/src/CacheFile.h b/plugins/AdvaImg/src/FreeImage/CacheFile.h
index a1e5e782c9..a1e5e782c9 100644
--- a/plugins/AdvaImg/src/CacheFile.h
+++ b/plugins/AdvaImg/src/FreeImage/CacheFile.h
diff --git a/plugins/AdvaImg/src/LibJPEG/README b/plugins/AdvaImg/src/LibJPEG/README
index 07bb50b20d..4f2645397a 100644
--- a/plugins/AdvaImg/src/LibJPEG/README
+++ b/plugins/AdvaImg/src/LibJPEG/README
@@ -1,10 +1,10 @@
 The Independent JPEG Group's JPEG software
 ==========================================
 
-README for release 8d of 15-Jan-2012
-====================================
+README for release 9 of 13-Jan-2013
+===================================
 
-This distribution contains the eighth public release of the Independent JPEG
+This distribution contains the ninth public release of the Independent JPEG
 Group's free JPEG software.  You are welcome to redistribute this software and
 to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
 
@@ -115,7 +115,7 @@ with respect to this software, its quality, accuracy, merchantability, or
 fitness for a particular purpose.  This software is provided "AS IS", and you,
 its user, assume the entire risk as to its quality and accuracy.
 
-This software is copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+This software is copyright (C) 1991-2013, Thomas G. Lane, Guido Vollbeding.
 All Rights Reserved except as specified below.
 
 Permission is hereby granted to use, copy, modify, and distribute this
@@ -146,15 +146,6 @@ commercial products, provided that all warranty or liability claims are
 assumed by the product vendor.
 
 
-ansi2knr.c is included in this distribution by permission of L. Peter Deutsch,
-sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA.
-ansi2knr.c is NOT covered by the above copyright and conditions, but instead
-by the usual distribution terms of the Free Software Foundation; principally,
-that you must include source code if you redistribute it.  (See the file
-ansi2knr.c for full details.)  However, since ansi2knr.c is not needed as part
-of any program generated from the IJG code, this does not limit you more than
-the foregoing paragraphs do.
-
 The Unix configuration script "configure" was produced with GNU Autoconf.
 It is copyright by the Free Software Foundation but is freely distributable.
 The same holds for its supporting scripts (config.guess, config.sub,
@@ -222,12 +213,16 @@ Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
 10918-1, ITU-T T.81.  Part 2 is titled "Digital Compression and Coding of
 Continuous-tone Still Images, Part 2: Compliance testing" and has document
 numbers ISO/IEC IS 10918-2, ITU-T T.83.
-IJG JPEG 8 introduces an implementation of the JPEG SmartScale extension
+IJG JPEG 8 introduced an implementation of the JPEG SmartScale extension
 which is specified in two documents:  A contributed document at ITU and ISO
 with title "ITU-T JPEG-Plus Proposal for Extending ITU-T T.81 for Advanced
 Image Coding", April 2006, Geneva, Switzerland.  The latest version of this
 document is Revision 3.  And a contributed document ISO/IEC JTC1/SC29/WG1 N
 5799 with title "Evolution of JPEG", June/July 2011, Berlin, Germany.
+IJG JPEG 9 introduces a reversible color transform for improved lossless
+compression which is described in a contributed document ISO/IEC JTC1/SC29/
+WG1 N 6080 with title "JPEG 9 Lossless Coding", June/July 2012, Paris,
+France.
 
 The JPEG standard does not specify all details of an interchangeable file
 format.  For the omitted details we follow the "JFIF" conventions, revision
@@ -257,8 +252,8 @@ ARCHIVE LOCATIONS
 The "official" archive site for this software is www.ijg.org.
 The most recent released version can always be found there in
 directory "files".  This particular version will be archived as
-http://www.ijg.org/files/jpegsrc.v8d.tar.gz, and in Windows-compatible
-"zip" archive format as http://www.ijg.org/files/jpegsr8d.zip.
+http://www.ijg.org/files/jpegsrc.v9.tar.gz, and in Windows-compatible
+"zip" archive format as http://www.ijg.org/files/jpegsr9.zip.
 
 The JPEG FAQ (Frequently Asked Questions) article is a source of some
 general information about JPEG.
@@ -331,20 +326,24 @@ Furthermore, the ISO committee pretends to be "responsible for the popular
 JPEG" in their public reports which is not true because they don't respond to
 actual requirements for the maintenance of the original JPEG specification.
 
-There are currently distributions in circulation containing the name
-"libjpeg" which claim to be a "derivative" or "fork" of the original
-libjpeg, but don't have the features and are incompatible with formats
-supported by actual IJG libjpeg distributions.  Furthermore, they
-violate the license conditions as described under LEGAL ISSUES above.
-We have no sympathy for the release of misleading and illegal
-distributions derived from obsolete code bases.
+There are currently different distributions in circulation containing the
+name "libjpeg" which is misleading because they don't have the features and
+are incompatible with formats supported by actual IJG libjpeg distributions.
+One of those fakes is released by members of the ISO committee and just uses
+the name of libjpeg for misdirection of people, similar to the abuse of the
+name JPEG as described above, while having nothing in common with actual IJG
+libjpeg distributions.
+The other one claims to be a "derivative" or "fork" of the original libjpeg
+and violates the license conditions as described under LEGAL ISSUES above.
+We have no sympathy for the release of misleading and illegal distributions
+derived from obsolete code bases.
 Don't use an obsolete code base!
 
 
 TO DO
 =====
 
-Version 8 is the first release of a new generation JPEG standard
+Version 9 is the second release of a new generation JPEG standard
 to overcome the limitations of the original JPEG specification.
 More features are being prepared for coming releases...
 
diff --git a/plugins/AdvaImg/src/LibJPEG/cderror.h b/plugins/AdvaImg/src/LibJPEG/cderror.h
index e19c475c5c..fb72a51f6b 100644
--- a/plugins/AdvaImg/src/LibJPEG/cderror.h
+++ b/plugins/AdvaImg/src/LibJPEG/cderror.h
@@ -1,134 +1,134 @@
-/*
- * cderror.h
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file defines the error and message codes for the cjpeg/djpeg
- * applications.  These strings are not needed as part of the JPEG library
- * proper.
- * Edit this file to add new codes, or to translate the message strings to
- * some other language.
- */
-
-/*
- * To define the enum list of message codes, include this file without
- * defining macro JMESSAGE.  To create a message string table, include it
- * again with a suitable JMESSAGE definition (see jerror.c for an example).
- */
-#ifndef JMESSAGE
-#ifndef CDERROR_H
-#define CDERROR_H
-/* First time through, define the enum list */
-#define JMAKE_ENUM_LIST
-#else
-/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
-#define JMESSAGE(code,string)
-#endif /* CDERROR_H */
-#endif /* JMESSAGE */
-
-#ifdef JMAKE_ENUM_LIST
-
-typedef enum {
-
-#define JMESSAGE(code,string)	code ,
-
-#endif /* JMAKE_ENUM_LIST */
-
-JMESSAGE(JMSG_FIRSTADDONCODE=1000, NULL) /* Must be first entry! */
-
-#ifdef BMP_SUPPORTED
-JMESSAGE(JERR_BMP_BADCMAP, "Unsupported BMP colormap format")
-JMESSAGE(JERR_BMP_BADDEPTH, "Only 8- and 24-bit BMP files are supported")
-JMESSAGE(JERR_BMP_BADHEADER, "Invalid BMP file: bad header length")
-JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1")
-JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB")
-JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported")
-JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image")
-JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM")
-JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image")
-JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image")
-JMESSAGE(JTRC_BMP_OS2, "%ux%u 24-bit OS2 BMP image")
-JMESSAGE(JTRC_BMP_OS2_MAPPED, "%ux%u 8-bit colormapped OS2 BMP image")
-#endif /* BMP_SUPPORTED */
-
-#ifdef GIF_SUPPORTED
-JMESSAGE(JERR_GIF_BUG, "GIF output got confused")
-JMESSAGE(JERR_GIF_CODESIZE, "Bogus GIF codesize %d")
-JMESSAGE(JERR_GIF_COLORSPACE, "GIF output must be grayscale or RGB")
-JMESSAGE(JERR_GIF_IMAGENOTFOUND, "Too few images in GIF file")
-JMESSAGE(JERR_GIF_NOT, "Not a GIF file")
-JMESSAGE(JTRC_GIF, "%ux%ux%d GIF image")
-JMESSAGE(JTRC_GIF_BADVERSION,
-	 "Warning: unexpected GIF version number '%c%c%c'")
-JMESSAGE(JTRC_GIF_EXTENSION, "Ignoring GIF extension block of type 0x%02x")
-JMESSAGE(JTRC_GIF_NONSQUARE, "Caution: nonsquare pixels in input")
-JMESSAGE(JWRN_GIF_BADDATA, "Corrupt data in GIF file")
-JMESSAGE(JWRN_GIF_CHAR, "Bogus char 0x%02x in GIF file, ignoring")
-JMESSAGE(JWRN_GIF_ENDCODE, "Premature end of GIF image")
-JMESSAGE(JWRN_GIF_NOMOREDATA, "Ran out of GIF bits")
-#endif /* GIF_SUPPORTED */
-
-#ifdef PPM_SUPPORTED
-JMESSAGE(JERR_PPM_COLORSPACE, "PPM output must be grayscale or RGB")
-JMESSAGE(JERR_PPM_NONNUMERIC, "Nonnumeric data in PPM file")
-JMESSAGE(JERR_PPM_NOT, "Not a PPM/PGM file")
-JMESSAGE(JTRC_PGM, "%ux%u PGM image")
-JMESSAGE(JTRC_PGM_TEXT, "%ux%u text PGM image")
-JMESSAGE(JTRC_PPM, "%ux%u PPM image")
-JMESSAGE(JTRC_PPM_TEXT, "%ux%u text PPM image")
-#endif /* PPM_SUPPORTED */
-
-#ifdef RLE_SUPPORTED
-JMESSAGE(JERR_RLE_BADERROR, "Bogus error code from RLE library")
-JMESSAGE(JERR_RLE_COLORSPACE, "RLE output must be grayscale or RGB")
-JMESSAGE(JERR_RLE_DIMENSIONS, "Image dimensions (%ux%u) too large for RLE")
-JMESSAGE(JERR_RLE_EMPTY, "Empty RLE file")
-JMESSAGE(JERR_RLE_EOF, "Premature EOF in RLE header")
-JMESSAGE(JERR_RLE_MEM, "Insufficient memory for RLE header")
-JMESSAGE(JERR_RLE_NOT, "Not an RLE file")
-JMESSAGE(JERR_RLE_TOOMANYCHANNELS, "Cannot handle %d output channels for RLE")
-JMESSAGE(JERR_RLE_UNSUPPORTED, "Cannot handle this RLE setup")
-JMESSAGE(JTRC_RLE, "%ux%u full-color RLE file")
-JMESSAGE(JTRC_RLE_FULLMAP, "%ux%u full-color RLE file with map of length %d")
-JMESSAGE(JTRC_RLE_GRAY, "%ux%u grayscale RLE file")
-JMESSAGE(JTRC_RLE_MAPGRAY, "%ux%u grayscale RLE file with map of length %d")
-JMESSAGE(JTRC_RLE_MAPPED, "%ux%u colormapped RLE file with map of length %d")
-#endif /* RLE_SUPPORTED */
-
-#ifdef TARGA_SUPPORTED
-JMESSAGE(JERR_TGA_BADCMAP, "Unsupported Targa colormap format")
-JMESSAGE(JERR_TGA_BADPARMS, "Invalid or unsupported Targa file")
-JMESSAGE(JERR_TGA_COLORSPACE, "Targa output must be grayscale or RGB")
-JMESSAGE(JTRC_TGA, "%ux%u RGB Targa image")
-JMESSAGE(JTRC_TGA_GRAY, "%ux%u grayscale Targa image")
-JMESSAGE(JTRC_TGA_MAPPED, "%ux%u colormapped Targa image")
-#else
-JMESSAGE(JERR_TGA_NOTCOMP, "Targa support was not compiled")
-#endif /* TARGA_SUPPORTED */
-
-JMESSAGE(JERR_BAD_CMAP_FILE,
-	 "Color map file is invalid or of unsupported format")
-JMESSAGE(JERR_TOO_MANY_COLORS,
-	 "Output file format cannot handle %d colormap entries")
-JMESSAGE(JERR_UNGETC_FAILED, "ungetc failed")
-#ifdef TARGA_SUPPORTED
-JMESSAGE(JERR_UNKNOWN_FORMAT,
-	 "Unrecognized input file format --- perhaps you need -targa")
-#else
-JMESSAGE(JERR_UNKNOWN_FORMAT, "Unrecognized input file format")
-#endif
-JMESSAGE(JERR_UNSUPPORTED_FORMAT, "Unsupported output file format")
-
-#ifdef JMAKE_ENUM_LIST
-
-  JMSG_LASTADDONCODE
-} ADDON_MESSAGE_CODE;
-
-#undef JMAKE_ENUM_LIST
-#endif /* JMAKE_ENUM_LIST */
-
-/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
-#undef JMESSAGE
+/*
+ * cderror.h
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the error and message codes for the cjpeg/djpeg
+ * applications.  These strings are not needed as part of the JPEG library
+ * proper.
+ * Edit this file to add new codes, or to translate the message strings to
+ * some other language.
+ */
+
+/*
+ * To define the enum list of message codes, include this file without
+ * defining macro JMESSAGE.  To create a message string table, include it
+ * again with a suitable JMESSAGE definition (see jerror.c for an example).
+ */
+#ifndef JMESSAGE
+#ifndef CDERROR_H
+#define CDERROR_H
+/* First time through, define the enum list */
+#define JMAKE_ENUM_LIST
+#else
+/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
+#define JMESSAGE(code,string)
+#endif /* CDERROR_H */
+#endif /* JMESSAGE */
+
+#ifdef JMAKE_ENUM_LIST
+
+typedef enum {
+
+#define JMESSAGE(code,string)	code ,
+
+#endif /* JMAKE_ENUM_LIST */
+
+JMESSAGE(JMSG_FIRSTADDONCODE=1000, NULL) /* Must be first entry! */
+
+#ifdef BMP_SUPPORTED
+JMESSAGE(JERR_BMP_BADCMAP, "Unsupported BMP colormap format")
+JMESSAGE(JERR_BMP_BADDEPTH, "Only 8- and 24-bit BMP files are supported")
+JMESSAGE(JERR_BMP_BADHEADER, "Invalid BMP file: bad header length")
+JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1")
+JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB")
+JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported")
+JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image")
+JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM")
+JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image")
+JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image")
+JMESSAGE(JTRC_BMP_OS2, "%ux%u 24-bit OS2 BMP image")
+JMESSAGE(JTRC_BMP_OS2_MAPPED, "%ux%u 8-bit colormapped OS2 BMP image")
+#endif /* BMP_SUPPORTED */
+
+#ifdef GIF_SUPPORTED
+JMESSAGE(JERR_GIF_BUG, "GIF output got confused")
+JMESSAGE(JERR_GIF_CODESIZE, "Bogus GIF codesize %d")
+JMESSAGE(JERR_GIF_COLORSPACE, "GIF output must be grayscale or RGB")
+JMESSAGE(JERR_GIF_IMAGENOTFOUND, "Too few images in GIF file")
+JMESSAGE(JERR_GIF_NOT, "Not a GIF file")
+JMESSAGE(JTRC_GIF, "%ux%ux%d GIF image")
+JMESSAGE(JTRC_GIF_BADVERSION,
+	 "Warning: unexpected GIF version number '%c%c%c'")
+JMESSAGE(JTRC_GIF_EXTENSION, "Ignoring GIF extension block of type 0x%02x")
+JMESSAGE(JTRC_GIF_NONSQUARE, "Caution: nonsquare pixels in input")
+JMESSAGE(JWRN_GIF_BADDATA, "Corrupt data in GIF file")
+JMESSAGE(JWRN_GIF_CHAR, "Bogus char 0x%02x in GIF file, ignoring")
+JMESSAGE(JWRN_GIF_ENDCODE, "Premature end of GIF image")
+JMESSAGE(JWRN_GIF_NOMOREDATA, "Ran out of GIF bits")
+#endif /* GIF_SUPPORTED */
+
+#ifdef PPM_SUPPORTED
+JMESSAGE(JERR_PPM_COLORSPACE, "PPM output must be grayscale or RGB")
+JMESSAGE(JERR_PPM_NONNUMERIC, "Nonnumeric data in PPM file")
+JMESSAGE(JERR_PPM_NOT, "Not a PPM/PGM file")
+JMESSAGE(JTRC_PGM, "%ux%u PGM image")
+JMESSAGE(JTRC_PGM_TEXT, "%ux%u text PGM image")
+JMESSAGE(JTRC_PPM, "%ux%u PPM image")
+JMESSAGE(JTRC_PPM_TEXT, "%ux%u text PPM image")
+#endif /* PPM_SUPPORTED */
+
+#ifdef RLE_SUPPORTED
+JMESSAGE(JERR_RLE_BADERROR, "Bogus error code from RLE library")
+JMESSAGE(JERR_RLE_COLORSPACE, "RLE output must be grayscale or RGB")
+JMESSAGE(JERR_RLE_DIMENSIONS, "Image dimensions (%ux%u) too large for RLE")
+JMESSAGE(JERR_RLE_EMPTY, "Empty RLE file")
+JMESSAGE(JERR_RLE_EOF, "Premature EOF in RLE header")
+JMESSAGE(JERR_RLE_MEM, "Insufficient memory for RLE header")
+JMESSAGE(JERR_RLE_NOT, "Not an RLE file")
+JMESSAGE(JERR_RLE_TOOMANYCHANNELS, "Cannot handle %d output channels for RLE")
+JMESSAGE(JERR_RLE_UNSUPPORTED, "Cannot handle this RLE setup")
+JMESSAGE(JTRC_RLE, "%ux%u full-color RLE file")
+JMESSAGE(JTRC_RLE_FULLMAP, "%ux%u full-color RLE file with map of length %d")
+JMESSAGE(JTRC_RLE_GRAY, "%ux%u grayscale RLE file")
+JMESSAGE(JTRC_RLE_MAPGRAY, "%ux%u grayscale RLE file with map of length %d")
+JMESSAGE(JTRC_RLE_MAPPED, "%ux%u colormapped RLE file with map of length %d")
+#endif /* RLE_SUPPORTED */
+
+#ifdef TARGA_SUPPORTED
+JMESSAGE(JERR_TGA_BADCMAP, "Unsupported Targa colormap format")
+JMESSAGE(JERR_TGA_BADPARMS, "Invalid or unsupported Targa file")
+JMESSAGE(JERR_TGA_COLORSPACE, "Targa output must be grayscale or RGB")
+JMESSAGE(JTRC_TGA, "%ux%u RGB Targa image")
+JMESSAGE(JTRC_TGA_GRAY, "%ux%u grayscale Targa image")
+JMESSAGE(JTRC_TGA_MAPPED, "%ux%u colormapped Targa image")
+#else
+JMESSAGE(JERR_TGA_NOTCOMP, "Targa support was not compiled")
+#endif /* TARGA_SUPPORTED */
+
+JMESSAGE(JERR_BAD_CMAP_FILE,
+	 "Color map file is invalid or of unsupported format")
+JMESSAGE(JERR_TOO_MANY_COLORS,
+	 "Output file format cannot handle %d colormap entries")
+JMESSAGE(JERR_UNGETC_FAILED, "ungetc failed")
+#ifdef TARGA_SUPPORTED
+JMESSAGE(JERR_UNKNOWN_FORMAT,
+	 "Unrecognized input file format --- perhaps you need -targa")
+#else
+JMESSAGE(JERR_UNKNOWN_FORMAT, "Unrecognized input file format")
+#endif
+JMESSAGE(JERR_UNSUPPORTED_FORMAT, "Unsupported output file format")
+
+#ifdef JMAKE_ENUM_LIST
+
+  JMSG_LASTADDONCODE
+} ADDON_MESSAGE_CODE;
+
+#undef JMAKE_ENUM_LIST
+#endif /* JMAKE_ENUM_LIST */
+
+/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
+#undef JMESSAGE
diff --git a/plugins/AdvaImg/src/LibJPEG/cdjpeg.h b/plugins/AdvaImg/src/LibJPEG/cdjpeg.h
index ed024ac3ae..c0d064ccab 100644
--- a/plugins/AdvaImg/src/LibJPEG/cdjpeg.h
+++ b/plugins/AdvaImg/src/LibJPEG/cdjpeg.h
@@ -1,187 +1,187 @@
-/*
- * cdjpeg.h
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains common declarations for the sample applications
- * cjpeg and djpeg.  It is NOT used by the core JPEG library.
- */
-
-#define JPEG_CJPEG_DJPEG	/* define proper options in jconfig.h */
-#define JPEG_INTERNAL_OPTIONS	/* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jerror.h"		/* get library error codes too */
-#include "cderror.h"		/* get application-specific error codes */
-
-
-/*
- * Object interface for cjpeg's source file decoding modules
- */
-
-typedef struct cjpeg_source_struct * cjpeg_source_ptr;
-
-struct cjpeg_source_struct {
-  JMETHOD(void, start_input, (j_compress_ptr cinfo,
-			      cjpeg_source_ptr sinfo));
-  JMETHOD(JDIMENSION, get_pixel_rows, (j_compress_ptr cinfo,
-				       cjpeg_source_ptr sinfo));
-  JMETHOD(void, finish_input, (j_compress_ptr cinfo,
-			       cjpeg_source_ptr sinfo));
-
-  FILE *input_file;
-
-  JSAMPARRAY buffer;
-  JDIMENSION buffer_height;
-};
-
-
-/*
- * Object interface for djpeg's output file encoding modules
- */
-
-typedef struct djpeg_dest_struct * djpeg_dest_ptr;
-
-struct djpeg_dest_struct {
-  /* start_output is called after jpeg_start_decompress finishes.
-   * The color map will be ready at this time, if one is needed.
-   */
-  JMETHOD(void, start_output, (j_decompress_ptr cinfo,
-			       djpeg_dest_ptr dinfo));
-  /* Emit the specified number of pixel rows from the buffer. */
-  JMETHOD(void, put_pixel_rows, (j_decompress_ptr cinfo,
-				 djpeg_dest_ptr dinfo,
-				 JDIMENSION rows_supplied));
-  /* Finish up at the end of the image. */
-  JMETHOD(void, finish_output, (j_decompress_ptr cinfo,
-				djpeg_dest_ptr dinfo));
-
-  /* Target file spec; filled in by djpeg.c after object is created. */
-  FILE * output_file;
-
-  /* Output pixel-row buffer.  Created by module init or start_output.
-   * Width is cinfo->output_width * cinfo->output_components;
-   * height is buffer_height.
-   */
-  JSAMPARRAY buffer;
-  JDIMENSION buffer_height;
-};
-
-
-/*
- * cjpeg/djpeg may need to perform extra passes to convert to or from
- * the source/destination file format.  The JPEG library does not know
- * about these passes, but we'd like them to be counted by the progress
- * monitor.  We use an expanded progress monitor object to hold the
- * additional pass count.
- */
-
-struct cdjpeg_progress_mgr {
-  struct jpeg_progress_mgr pub;	/* fields known to JPEG library */
-  int completed_extra_passes;	/* extra passes completed */
-  int total_extra_passes;	/* total extra */
-  /* last printed percentage stored here to avoid multiple printouts */
-  int percent_done;
-};
-
-typedef struct cdjpeg_progress_mgr * cd_progress_ptr;
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jinit_read_bmp		jIRdBMP
-#define jinit_write_bmp		jIWrBMP
-#define jinit_read_gif		jIRdGIF
-#define jinit_write_gif		jIWrGIF
-#define jinit_read_ppm		jIRdPPM
-#define jinit_write_ppm		jIWrPPM
-#define jinit_read_rle		jIRdRLE
-#define jinit_write_rle		jIWrRLE
-#define jinit_read_targa	jIRdTarga
-#define jinit_write_targa	jIWrTarga
-#define read_quant_tables	RdQTables
-#define read_scan_script	RdScnScript
-#define set_quality_ratings     SetQRates
-#define set_quant_slots		SetQSlots
-#define set_sample_factors	SetSFacts
-#define read_color_map		RdCMap
-#define enable_signal_catcher	EnSigCatcher
-#define start_progress_monitor	StProgMon
-#define end_progress_monitor	EnProgMon
-#define read_stdin		RdStdin
-#define write_stdout		WrStdout
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-/* Module selection routines for I/O modules. */
-
-EXTERN(cjpeg_source_ptr) jinit_read_bmp JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_bmp JPP((j_decompress_ptr cinfo,
-					    boolean is_os2));
-EXTERN(cjpeg_source_ptr) jinit_read_gif JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_gif JPP((j_decompress_ptr cinfo));
-EXTERN(cjpeg_source_ptr) jinit_read_ppm JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_ppm JPP((j_decompress_ptr cinfo));
-EXTERN(cjpeg_source_ptr) jinit_read_rle JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_rle JPP((j_decompress_ptr cinfo));
-EXTERN(cjpeg_source_ptr) jinit_read_targa JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_targa JPP((j_decompress_ptr cinfo));
-
-/* cjpeg support routines (in rdswitch.c) */
-
-EXTERN(boolean) read_quant_tables JPP((j_compress_ptr cinfo, char * filename,
-				       boolean force_baseline));
-EXTERN(boolean) read_scan_script JPP((j_compress_ptr cinfo, char * filename));
-EXTERN(boolean) set_quality_ratings JPP((j_compress_ptr cinfo, char *arg,
-					 boolean force_baseline));
-EXTERN(boolean) set_quant_slots JPP((j_compress_ptr cinfo, char *arg));
-EXTERN(boolean) set_sample_factors JPP((j_compress_ptr cinfo, char *arg));
-
-/* djpeg support routines (in rdcolmap.c) */
-
-EXTERN(void) read_color_map JPP((j_decompress_ptr cinfo, FILE * infile));
-
-/* common support routines (in cdjpeg.c) */
-
-EXTERN(void) enable_signal_catcher JPP((j_common_ptr cinfo));
-EXTERN(void) start_progress_monitor JPP((j_common_ptr cinfo,
-					 cd_progress_ptr progress));
-EXTERN(void) end_progress_monitor JPP((j_common_ptr cinfo));
-EXTERN(boolean) keymatch JPP((char * arg, const char * keyword, int minchars));
-EXTERN(FILE *) read_stdin JPP((void));
-EXTERN(FILE *) write_stdout JPP((void));
-
-/* miscellaneous useful macros */
-
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
-#define WRITE_BINARY	"w"
-#else
-#ifdef VMS			/* VMS is very nonstandard */
-#define READ_BINARY	"rb", "ctx=stm"
-#define WRITE_BINARY	"wb", "ctx=stm"
-#else				/* standard ANSI-compliant case */
-#define READ_BINARY	"rb"
-#define WRITE_BINARY	"wb"
-#endif
-#endif
-
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
-#define EXIT_FAILURE  1
-#endif
-#ifndef EXIT_SUCCESS
-#ifdef VMS
-#define EXIT_SUCCESS  1		/* VMS is very nonstandard */
-#else
-#define EXIT_SUCCESS  0
-#endif
-#endif
-#ifndef EXIT_WARNING
-#ifdef VMS
-#define EXIT_WARNING  1		/* VMS is very nonstandard */
-#else
-#define EXIT_WARNING  2
-#endif
-#endif
+/*
+ * cdjpeg.h
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains common declarations for the sample applications
+ * cjpeg and djpeg.  It is NOT used by the core JPEG library.
+ */
+
+#define JPEG_CJPEG_DJPEG	/* define proper options in jconfig.h */
+#define JPEG_INTERNAL_OPTIONS	/* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"		/* get library error codes too */
+#include "cderror.h"		/* get application-specific error codes */
+
+
+/*
+ * Object interface for cjpeg's source file decoding modules
+ */
+
+typedef struct cjpeg_source_struct * cjpeg_source_ptr;
+
+struct cjpeg_source_struct {
+  JMETHOD(void, start_input, (j_compress_ptr cinfo,
+			      cjpeg_source_ptr sinfo));
+  JMETHOD(JDIMENSION, get_pixel_rows, (j_compress_ptr cinfo,
+				       cjpeg_source_ptr sinfo));
+  JMETHOD(void, finish_input, (j_compress_ptr cinfo,
+			       cjpeg_source_ptr sinfo));
+
+  FILE *input_file;
+
+  JSAMPARRAY buffer;
+  JDIMENSION buffer_height;
+};
+
+
+/*
+ * Object interface for djpeg's output file encoding modules
+ */
+
+typedef struct djpeg_dest_struct * djpeg_dest_ptr;
+
+struct djpeg_dest_struct {
+  /* start_output is called after jpeg_start_decompress finishes.
+   * The color map will be ready at this time, if one is needed.
+   */
+  JMETHOD(void, start_output, (j_decompress_ptr cinfo,
+			       djpeg_dest_ptr dinfo));
+  /* Emit the specified number of pixel rows from the buffer. */
+  JMETHOD(void, put_pixel_rows, (j_decompress_ptr cinfo,
+				 djpeg_dest_ptr dinfo,
+				 JDIMENSION rows_supplied));
+  /* Finish up at the end of the image. */
+  JMETHOD(void, finish_output, (j_decompress_ptr cinfo,
+				djpeg_dest_ptr dinfo));
+
+  /* Target file spec; filled in by djpeg.c after object is created. */
+  FILE * output_file;
+
+  /* Output pixel-row buffer.  Created by module init or start_output.
+   * Width is cinfo->output_width * cinfo->output_components;
+   * height is buffer_height.
+   */
+  JSAMPARRAY buffer;
+  JDIMENSION buffer_height;
+};
+
+
+/*
+ * cjpeg/djpeg may need to perform extra passes to convert to or from
+ * the source/destination file format.  The JPEG library does not know
+ * about these passes, but we'd like them to be counted by the progress
+ * monitor.  We use an expanded progress monitor object to hold the
+ * additional pass count.
+ */
+
+struct cdjpeg_progress_mgr {
+  struct jpeg_progress_mgr pub;	/* fields known to JPEG library */
+  int completed_extra_passes;	/* extra passes completed */
+  int total_extra_passes;	/* total extra */
+  /* last printed percentage stored here to avoid multiple printouts */
+  int percent_done;
+};
+
+typedef struct cdjpeg_progress_mgr * cd_progress_ptr;
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jinit_read_bmp		jIRdBMP
+#define jinit_write_bmp		jIWrBMP
+#define jinit_read_gif		jIRdGIF
+#define jinit_write_gif		jIWrGIF
+#define jinit_read_ppm		jIRdPPM
+#define jinit_write_ppm		jIWrPPM
+#define jinit_read_rle		jIRdRLE
+#define jinit_write_rle		jIWrRLE
+#define jinit_read_targa	jIRdTarga
+#define jinit_write_targa	jIWrTarga
+#define read_quant_tables	RdQTables
+#define read_scan_script	RdScnScript
+#define set_quality_ratings     SetQRates
+#define set_quant_slots		SetQSlots
+#define set_sample_factors	SetSFacts
+#define read_color_map		RdCMap
+#define enable_signal_catcher	EnSigCatcher
+#define start_progress_monitor	StProgMon
+#define end_progress_monitor	EnProgMon
+#define read_stdin		RdStdin
+#define write_stdout		WrStdout
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Module selection routines for I/O modules. */
+
+EXTERN(cjpeg_source_ptr) jinit_read_bmp JPP((j_compress_ptr cinfo));
+EXTERN(djpeg_dest_ptr) jinit_write_bmp JPP((j_decompress_ptr cinfo,
+					    boolean is_os2));
+EXTERN(cjpeg_source_ptr) jinit_read_gif JPP((j_compress_ptr cinfo));
+EXTERN(djpeg_dest_ptr) jinit_write_gif JPP((j_decompress_ptr cinfo));
+EXTERN(cjpeg_source_ptr) jinit_read_ppm JPP((j_compress_ptr cinfo));
+EXTERN(djpeg_dest_ptr) jinit_write_ppm JPP((j_decompress_ptr cinfo));
+EXTERN(cjpeg_source_ptr) jinit_read_rle JPP((j_compress_ptr cinfo));
+EXTERN(djpeg_dest_ptr) jinit_write_rle JPP((j_decompress_ptr cinfo));
+EXTERN(cjpeg_source_ptr) jinit_read_targa JPP((j_compress_ptr cinfo));
+EXTERN(djpeg_dest_ptr) jinit_write_targa JPP((j_decompress_ptr cinfo));
+
+/* cjpeg support routines (in rdswitch.c) */
+
+EXTERN(boolean) read_quant_tables JPP((j_compress_ptr cinfo, char * filename,
+				       boolean force_baseline));
+EXTERN(boolean) read_scan_script JPP((j_compress_ptr cinfo, char * filename));
+EXTERN(boolean) set_quality_ratings JPP((j_compress_ptr cinfo, char *arg,
+					 boolean force_baseline));
+EXTERN(boolean) set_quant_slots JPP((j_compress_ptr cinfo, char *arg));
+EXTERN(boolean) set_sample_factors JPP((j_compress_ptr cinfo, char *arg));
+
+/* djpeg support routines (in rdcolmap.c) */
+
+EXTERN(void) read_color_map JPP((j_decompress_ptr cinfo, FILE * infile));
+
+/* common support routines (in cdjpeg.c) */
+
+EXTERN(void) enable_signal_catcher JPP((j_common_ptr cinfo));
+EXTERN(void) start_progress_monitor JPP((j_common_ptr cinfo,
+					 cd_progress_ptr progress));
+EXTERN(void) end_progress_monitor JPP((j_common_ptr cinfo));
+EXTERN(boolean) keymatch JPP((char * arg, const char * keyword, int minchars));
+EXTERN(FILE *) read_stdin JPP((void));
+EXTERN(FILE *) write_stdout JPP((void));
+
+/* miscellaneous useful macros */
+
+#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
+#define READ_BINARY	"r"
+#define WRITE_BINARY	"w"
+#else
+#ifdef VMS			/* VMS is very nonstandard */
+#define READ_BINARY	"rb", "ctx=stm"
+#define WRITE_BINARY	"wb", "ctx=stm"
+#else				/* standard ANSI-compliant case */
+#define READ_BINARY	"rb"
+#define WRITE_BINARY	"wb"
+#endif
+#endif
+
+#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+#ifndef EXIT_SUCCESS
+#ifdef VMS
+#define EXIT_SUCCESS  1		/* VMS is very nonstandard */
+#else
+#define EXIT_SUCCESS  0
+#endif
+#endif
+#ifndef EXIT_WARNING
+#ifdef VMS
+#define EXIT_WARNING  1		/* VMS is very nonstandard */
+#else
+#define EXIT_WARNING  2
+#endif
+#endif
diff --git a/plugins/AdvaImg/src/LibJPEG/change.log b/plugins/AdvaImg/src/LibJPEG/change.log
index ce71abd2eb..be1b8870e7 100644
--- a/plugins/AdvaImg/src/LibJPEG/change.log
+++ b/plugins/AdvaImg/src/LibJPEG/change.log
@@ -1,6 +1,42 @@
 CHANGE LOG for Independent JPEG Group's JPEG software
 
 
+Version 9  13-Jan-2013
+----------------------
+
+Add cjpeg -rgb1 option to create an RGB JPEG file, and insert
+a simple reversible color transform into the processing which
+significantly improves the compression.
+The recommended command for lossless coding of RGB images is now
+cjpeg -rgb1 -block 1 -arithmetic.
+As said, this option improves the compression significantly, but
+the files are not compatible with JPEG decoders prior to IJG v9
+due to the included color transform.
+The used color transform and marker signaling is compatible with
+other JPEG standards (e.g., JPEG-LS part 2).
+
+Remove the automatic de-ANSI-fication support (Automake 1.12).
+Thank also to Nitin A Kamble for suggestion.
+
+Add remark for jpeg_mem_dest() in jdatadst.c.
+Thank to Elie-Gregoire Khoury for the hint.
+
+Support files with invalid component identifiers (created
+by Adobe PDF).  Thank to Robin Watts for the suggestion.
+
+Adapt full buffer case in jcmainct.c for use with scaled DCT.
+Thank to Sergii Biloshytskyi for the suggestion.
+
+Add type identifier for declaration of noreturn functions.
+Thank to Brett L. Moore for the suggestion.
+
+Correct argument type in format string, avoid compiler warnings.
+Thank to Vincent Torri for hint.
+
+Add missing #include directives in configuration checks, avoid
+configuration errors.  Thank to John Spencer for the hint.
+
+
 Version 8d  15-Jan-2012
 -----------------------
 
diff --git a/plugins/AdvaImg/src/LibJPEG/cjpeg.c b/plugins/AdvaImg/src/LibJPEG/cjpeg.c
index a999eeef1c..6d6b772b7a 100644
--- a/plugins/AdvaImg/src/LibJPEG/cjpeg.c
+++ b/plugins/AdvaImg/src/LibJPEG/cjpeg.c
@@ -2,7 +2,7 @@
  * cjpeg.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2011 by Guido Vollbeding.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -172,6 +172,9 @@ usage (void)
 #ifdef DCT_SCALING_SUPPORTED
   fprintf(stderr, "  -block N       DCT block size (1..16; default is 8)\n");
 #endif
+#if JPEG_LIB_VERSION_MAJOR >= 9
+  fprintf(stderr, "  -rgb1          Create RGB JPEG file with reversible color transform\n");
+#endif
 #ifdef DCT_ISLOW_SUPPORTED
   fprintf(stderr, "  -dct int       Use integer DCT method%s\n",
 	  (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
@@ -310,8 +313,14 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
       /* Force a monochrome JPEG file to be generated. */
       jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
 
-    } else if (keymatch(arg, "rgb", 3)) {
+    } else if (keymatch(arg, "rgb", 3) || keymatch(arg, "rgb1", 4)) {
       /* Force an RGB JPEG file to be generated. */
+#if JPEG_LIB_VERSION_MAJOR >= 9
+      /* Note: Entropy table assignment in jpeg_set_colorspace depends
+       * on color_transform.
+       */
+      cinfo->color_transform = arg[3] ? JCT_SUBTRACT_GREEN : JCT_NONE;
+#endif
       jpeg_set_colorspace(cinfo, JCS_RGB);
 
     } else if (keymatch(arg, "maxmemory", 3)) {
@@ -328,7 +337,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
       cinfo->mem->max_memory_to_use = lval * 1000L;
 
     } else if (keymatch(arg, "nosmooth", 3)) {
-      /* Suppress fancy downsampling */
+      /* Suppress fancy downsampling. */
       cinfo->do_fancy_downsampling = FALSE;
 
     } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) {
@@ -414,7 +423,7 @@ parse_switches (j_compress_ptr cinfo, int argc, char **argv,
       /* Scale the image by a fraction M/N. */
       if (++argn >= argc)	/* advance to next argument */
 	usage();
-      if (sscanf(argv[argn], "%d/%d",
+      if (sscanf(argv[argn], "%u/%u",
 		 &cinfo->scale_num, &cinfo->scale_denom) != 2)
 	usage();
 
diff --git a/plugins/AdvaImg/src/LibJPEG/djpeg.c b/plugins/AdvaImg/src/LibJPEG/djpeg.c
index 363c628cbe..77718cefbe 100644
--- a/plugins/AdvaImg/src/LibJPEG/djpeg.c
+++ b/plugins/AdvaImg/src/LibJPEG/djpeg.c
@@ -2,7 +2,7 @@
  * djpeg.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2009 by Guido Vollbeding.
+ * Modified 2009-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -298,7 +298,7 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv,
       cinfo->mem->max_memory_to_use = lval * 1000L;
 
     } else if (keymatch(arg, "nosmooth", 3)) {
-      /* Suppress fancy upsampling */
+      /* Suppress fancy upsampling. */
       cinfo->do_fancy_upsampling = FALSE;
 
     } else if (keymatch(arg, "onepass", 3)) {
@@ -327,7 +327,7 @@ parse_switches (j_decompress_ptr cinfo, int argc, char **argv,
       /* Scale the output image by a fraction M/N. */
       if (++argn >= argc)	/* advance to next argument */
 	usage();
-      if (sscanf(argv[argn], "%d/%d",
+      if (sscanf(argv[argn], "%u/%u",
 		 &cinfo->scale_num, &cinfo->scale_denom) < 1)
 	usage();
 
diff --git a/plugins/AdvaImg/src/LibJPEG/filelist.txt b/plugins/AdvaImg/src/LibJPEG/filelist.txt
index 040abffc8d..a0761fae7d 100644
--- a/plugins/AdvaImg/src/LibJPEG/filelist.txt
+++ b/plugins/AdvaImg/src/LibJPEG/filelist.txt
@@ -1,6 +1,6 @@
 IJG JPEG LIBRARY:  FILE LIST
 
-Copyright (C) 1994-2009, Thomas G. Lane, Guido Vollbeding.
+Copyright (C) 1994-2012, Thomas G. Lane, Guido Vollbeding.
 This file is part of the Independent JPEG Group's software.
 For conditions of distribution and use, see the accompanying README file.
 
@@ -197,6 +197,7 @@ config.guess
 config.sub
 depcomp
 missing
+ar-lib
 install-sh	Install shell script for those Unix systems lacking one.
 Makefile.in	Makefile input for configure.
 Makefile.am	Source file for use with Automake to generate Makefile.in.
@@ -206,8 +207,6 @@ mak*.*		Sample makefiles for particular systems.
 jconfig.*	Sample jconfig.h for particular systems.
 libjpeg.map	Script to generate shared library with versioned symbols.
 aclocal.m4	M4 macro definitions for use with Autoconf.
-ansi2knr.c	De-ANSIfier for pre-ANSI C compilers (courtesy of
-		L. Peter Deutsch and Aladdin Enterprises).
 
 Test files (see install.txt for test procedure):
 
diff --git a/plugins/AdvaImg/src/LibJPEG/install.txt b/plugins/AdvaImg/src/LibJPEG/install.txt
index d8f24952fb..d73a05761f 100644
--- a/plugins/AdvaImg/src/LibJPEG/install.txt
+++ b/plugins/AdvaImg/src/LibJPEG/install.txt
@@ -1,6 +1,6 @@
 INSTALLATION INSTRUCTIONS for the Independent JPEG Group's JPEG software
 
-Copyright (C) 1991-2011, Thomas G. Lane, Guido Vollbeding.
+Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
 This file is part of the Independent JPEG Group's software.
 For conditions of distribution and use, see the accompanying README file.
 
@@ -848,14 +848,14 @@ with /Oo-.
 Microsoft Windows (all versions), generic comments:
 
 Some Windows system include files define typedef boolean as "unsigned char".
-The IJG code also defines typedef boolean, but we make it "int" by default.
+The IJG code also defines typedef boolean, but we make it an "enum" by default.
 This doesn't affect the IJG programs because we don't import those Windows
 include files.  But if you use the JPEG library in your own program, and some
 of your program's files import one definition of boolean while some import the
 other, you can get all sorts of mysterious problems.  A good preventive step
 is to make the IJG library use "unsigned char" for boolean.  To do that,
 add something like this to your jconfig.h file:
-	/* Define "boolean" as unsigned char, not int, per Windows custom */
+	/* Define "boolean" as unsigned char, not enum, per Windows custom */
 	#ifndef __RPCNDR_H__	/* don't conflict if rpcndr.h already read */
 	typedef unsigned char boolean;
 	#endif
diff --git a/plugins/AdvaImg/src/LibJPEG/jaricom.c b/plugins/AdvaImg/src/LibJPEG/jaricom.c
index 690068861f..50ad879f53 100644
--- a/plugins/AdvaImg/src/LibJPEG/jaricom.c
+++ b/plugins/AdvaImg/src/LibJPEG/jaricom.c
@@ -1,153 +1,153 @@
-/*
- * jaricom.c
- *
- * Developed 1997-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains probability estimation tables for common use in
- * arithmetic entropy encoding and decoding routines.
- *
- * This data represents Table D.3 in the JPEG spec (D.2 in the draft),
- * ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81, and Table 24
- * in the JBIG spec, ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-/* The following #define specifies the packing of the four components
- * into the compact INT32 representation.
- * Note that this formula must match the actual arithmetic encoder
- * and decoder implementation.  The implementation has to be changed
- * if this formula is changed.
- * The current organization is leaned on Markus Kuhn's JBIG
- * implementation (jbig_tab.c).
- */
-
-#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b)
-
-const INT32 jpeg_aritab[113+1] = {
-/*
- * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
- */
-  V(   0, 0x5a1d,   1,   1, 1 ),
-  V(   1, 0x2586,  14,   2, 0 ),
-  V(   2, 0x1114,  16,   3, 0 ),
-  V(   3, 0x080b,  18,   4, 0 ),
-  V(   4, 0x03d8,  20,   5, 0 ),
-  V(   5, 0x01da,  23,   6, 0 ),
-  V(   6, 0x00e5,  25,   7, 0 ),
-  V(   7, 0x006f,  28,   8, 0 ),
-  V(   8, 0x0036,  30,   9, 0 ),
-  V(   9, 0x001a,  33,  10, 0 ),
-  V(  10, 0x000d,  35,  11, 0 ),
-  V(  11, 0x0006,   9,  12, 0 ),
-  V(  12, 0x0003,  10,  13, 0 ),
-  V(  13, 0x0001,  12,  13, 0 ),
-  V(  14, 0x5a7f,  15,  15, 1 ),
-  V(  15, 0x3f25,  36,  16, 0 ),
-  V(  16, 0x2cf2,  38,  17, 0 ),
-  V(  17, 0x207c,  39,  18, 0 ),
-  V(  18, 0x17b9,  40,  19, 0 ),
-  V(  19, 0x1182,  42,  20, 0 ),
-  V(  20, 0x0cef,  43,  21, 0 ),
-  V(  21, 0x09a1,  45,  22, 0 ),
-  V(  22, 0x072f,  46,  23, 0 ),
-  V(  23, 0x055c,  48,  24, 0 ),
-  V(  24, 0x0406,  49,  25, 0 ),
-  V(  25, 0x0303,  51,  26, 0 ),
-  V(  26, 0x0240,  52,  27, 0 ),
-  V(  27, 0x01b1,  54,  28, 0 ),
-  V(  28, 0x0144,  56,  29, 0 ),
-  V(  29, 0x00f5,  57,  30, 0 ),
-  V(  30, 0x00b7,  59,  31, 0 ),
-  V(  31, 0x008a,  60,  32, 0 ),
-  V(  32, 0x0068,  62,  33, 0 ),
-  V(  33, 0x004e,  63,  34, 0 ),
-  V(  34, 0x003b,  32,  35, 0 ),
-  V(  35, 0x002c,  33,   9, 0 ),
-  V(  36, 0x5ae1,  37,  37, 1 ),
-  V(  37, 0x484c,  64,  38, 0 ),
-  V(  38, 0x3a0d,  65,  39, 0 ),
-  V(  39, 0x2ef1,  67,  40, 0 ),
-  V(  40, 0x261f,  68,  41, 0 ),
-  V(  41, 0x1f33,  69,  42, 0 ),
-  V(  42, 0x19a8,  70,  43, 0 ),
-  V(  43, 0x1518,  72,  44, 0 ),
-  V(  44, 0x1177,  73,  45, 0 ),
-  V(  45, 0x0e74,  74,  46, 0 ),
-  V(  46, 0x0bfb,  75,  47, 0 ),
-  V(  47, 0x09f8,  77,  48, 0 ),
-  V(  48, 0x0861,  78,  49, 0 ),
-  V(  49, 0x0706,  79,  50, 0 ),
-  V(  50, 0x05cd,  48,  51, 0 ),
-  V(  51, 0x04de,  50,  52, 0 ),
-  V(  52, 0x040f,  50,  53, 0 ),
-  V(  53, 0x0363,  51,  54, 0 ),
-  V(  54, 0x02d4,  52,  55, 0 ),
-  V(  55, 0x025c,  53,  56, 0 ),
-  V(  56, 0x01f8,  54,  57, 0 ),
-  V(  57, 0x01a4,  55,  58, 0 ),
-  V(  58, 0x0160,  56,  59, 0 ),
-  V(  59, 0x0125,  57,  60, 0 ),
-  V(  60, 0x00f6,  58,  61, 0 ),
-  V(  61, 0x00cb,  59,  62, 0 ),
-  V(  62, 0x00ab,  61,  63, 0 ),
-  V(  63, 0x008f,  61,  32, 0 ),
-  V(  64, 0x5b12,  65,  65, 1 ),
-  V(  65, 0x4d04,  80,  66, 0 ),
-  V(  66, 0x412c,  81,  67, 0 ),
-  V(  67, 0x37d8,  82,  68, 0 ),
-  V(  68, 0x2fe8,  83,  69, 0 ),
-  V(  69, 0x293c,  84,  70, 0 ),
-  V(  70, 0x2379,  86,  71, 0 ),
-  V(  71, 0x1edf,  87,  72, 0 ),
-  V(  72, 0x1aa9,  87,  73, 0 ),
-  V(  73, 0x174e,  72,  74, 0 ),
-  V(  74, 0x1424,  72,  75, 0 ),
-  V(  75, 0x119c,  74,  76, 0 ),
-  V(  76, 0x0f6b,  74,  77, 0 ),
-  V(  77, 0x0d51,  75,  78, 0 ),
-  V(  78, 0x0bb6,  77,  79, 0 ),
-  V(  79, 0x0a40,  77,  48, 0 ),
-  V(  80, 0x5832,  80,  81, 1 ),
-  V(  81, 0x4d1c,  88,  82, 0 ),
-  V(  82, 0x438e,  89,  83, 0 ),
-  V(  83, 0x3bdd,  90,  84, 0 ),
-  V(  84, 0x34ee,  91,  85, 0 ),
-  V(  85, 0x2eae,  92,  86, 0 ),
-  V(  86, 0x299a,  93,  87, 0 ),
-  V(  87, 0x2516,  86,  71, 0 ),
-  V(  88, 0x5570,  88,  89, 1 ),
-  V(  89, 0x4ca9,  95,  90, 0 ),
-  V(  90, 0x44d9,  96,  91, 0 ),
-  V(  91, 0x3e22,  97,  92, 0 ),
-  V(  92, 0x3824,  99,  93, 0 ),
-  V(  93, 0x32b4,  99,  94, 0 ),
-  V(  94, 0x2e17,  93,  86, 0 ),
-  V(  95, 0x56a8,  95,  96, 1 ),
-  V(  96, 0x4f46, 101,  97, 0 ),
-  V(  97, 0x47e5, 102,  98, 0 ),
-  V(  98, 0x41cf, 103,  99, 0 ),
-  V(  99, 0x3c3d, 104, 100, 0 ),
-  V( 100, 0x375e,  99,  93, 0 ),
-  V( 101, 0x5231, 105, 102, 0 ),
-  V( 102, 0x4c0f, 106, 103, 0 ),
-  V( 103, 0x4639, 107, 104, 0 ),
-  V( 104, 0x415e, 103,  99, 0 ),
-  V( 105, 0x5627, 105, 106, 1 ),
-  V( 106, 0x50e7, 108, 107, 0 ),
-  V( 107, 0x4b85, 109, 103, 0 ),
-  V( 108, 0x5597, 110, 109, 0 ),
-  V( 109, 0x504f, 111, 107, 0 ),
-  V( 110, 0x5a10, 110, 111, 1 ),
-  V( 111, 0x5522, 112, 109, 0 ),
-  V( 112, 0x59eb, 112, 111, 1 ),
-/*
- * This last entry is used for fixed probability estimate of 0.5
- * as suggested in Section 10.3 Table 5 of ITU-T Rec. T.851.
- */
-  V( 113, 0x5a1d, 113, 113, 0 )
-};
+/*
+ * jaricom.c
+ *
+ * Developed 1997-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains probability estimation tables for common use in
+ * arithmetic entropy encoding and decoding routines.
+ *
+ * This data represents Table D.3 in the JPEG spec (D.2 in the draft),
+ * ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81, and Table 24
+ * in the JBIG spec, ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* The following #define specifies the packing of the four components
+ * into the compact INT32 representation.
+ * Note that this formula must match the actual arithmetic encoder
+ * and decoder implementation.  The implementation has to be changed
+ * if this formula is changed.
+ * The current organization is leaned on Markus Kuhn's JBIG
+ * implementation (jbig_tab.c).
+ */
+
+#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b)
+
+const INT32 jpeg_aritab[113+1] = {
+/*
+ * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
+ */
+  V(   0, 0x5a1d,   1,   1, 1 ),
+  V(   1, 0x2586,  14,   2, 0 ),
+  V(   2, 0x1114,  16,   3, 0 ),
+  V(   3, 0x080b,  18,   4, 0 ),
+  V(   4, 0x03d8,  20,   5, 0 ),
+  V(   5, 0x01da,  23,   6, 0 ),
+  V(   6, 0x00e5,  25,   7, 0 ),
+  V(   7, 0x006f,  28,   8, 0 ),
+  V(   8, 0x0036,  30,   9, 0 ),
+  V(   9, 0x001a,  33,  10, 0 ),
+  V(  10, 0x000d,  35,  11, 0 ),
+  V(  11, 0x0006,   9,  12, 0 ),
+  V(  12, 0x0003,  10,  13, 0 ),
+  V(  13, 0x0001,  12,  13, 0 ),
+  V(  14, 0x5a7f,  15,  15, 1 ),
+  V(  15, 0x3f25,  36,  16, 0 ),
+  V(  16, 0x2cf2,  38,  17, 0 ),
+  V(  17, 0x207c,  39,  18, 0 ),
+  V(  18, 0x17b9,  40,  19, 0 ),
+  V(  19, 0x1182,  42,  20, 0 ),
+  V(  20, 0x0cef,  43,  21, 0 ),
+  V(  21, 0x09a1,  45,  22, 0 ),
+  V(  22, 0x072f,  46,  23, 0 ),
+  V(  23, 0x055c,  48,  24, 0 ),
+  V(  24, 0x0406,  49,  25, 0 ),
+  V(  25, 0x0303,  51,  26, 0 ),
+  V(  26, 0x0240,  52,  27, 0 ),
+  V(  27, 0x01b1,  54,  28, 0 ),
+  V(  28, 0x0144,  56,  29, 0 ),
+  V(  29, 0x00f5,  57,  30, 0 ),
+  V(  30, 0x00b7,  59,  31, 0 ),
+  V(  31, 0x008a,  60,  32, 0 ),
+  V(  32, 0x0068,  62,  33, 0 ),
+  V(  33, 0x004e,  63,  34, 0 ),
+  V(  34, 0x003b,  32,  35, 0 ),
+  V(  35, 0x002c,  33,   9, 0 ),
+  V(  36, 0x5ae1,  37,  37, 1 ),
+  V(  37, 0x484c,  64,  38, 0 ),
+  V(  38, 0x3a0d,  65,  39, 0 ),
+  V(  39, 0x2ef1,  67,  40, 0 ),
+  V(  40, 0x261f,  68,  41, 0 ),
+  V(  41, 0x1f33,  69,  42, 0 ),
+  V(  42, 0x19a8,  70,  43, 0 ),
+  V(  43, 0x1518,  72,  44, 0 ),
+  V(  44, 0x1177,  73,  45, 0 ),
+  V(  45, 0x0e74,  74,  46, 0 ),
+  V(  46, 0x0bfb,  75,  47, 0 ),
+  V(  47, 0x09f8,  77,  48, 0 ),
+  V(  48, 0x0861,  78,  49, 0 ),
+  V(  49, 0x0706,  79,  50, 0 ),
+  V(  50, 0x05cd,  48,  51, 0 ),
+  V(  51, 0x04de,  50,  52, 0 ),
+  V(  52, 0x040f,  50,  53, 0 ),
+  V(  53, 0x0363,  51,  54, 0 ),
+  V(  54, 0x02d4,  52,  55, 0 ),
+  V(  55, 0x025c,  53,  56, 0 ),
+  V(  56, 0x01f8,  54,  57, 0 ),
+  V(  57, 0x01a4,  55,  58, 0 ),
+  V(  58, 0x0160,  56,  59, 0 ),
+  V(  59, 0x0125,  57,  60, 0 ),
+  V(  60, 0x00f6,  58,  61, 0 ),
+  V(  61, 0x00cb,  59,  62, 0 ),
+  V(  62, 0x00ab,  61,  63, 0 ),
+  V(  63, 0x008f,  61,  32, 0 ),
+  V(  64, 0x5b12,  65,  65, 1 ),
+  V(  65, 0x4d04,  80,  66, 0 ),
+  V(  66, 0x412c,  81,  67, 0 ),
+  V(  67, 0x37d8,  82,  68, 0 ),
+  V(  68, 0x2fe8,  83,  69, 0 ),
+  V(  69, 0x293c,  84,  70, 0 ),
+  V(  70, 0x2379,  86,  71, 0 ),
+  V(  71, 0x1edf,  87,  72, 0 ),
+  V(  72, 0x1aa9,  87,  73, 0 ),
+  V(  73, 0x174e,  72,  74, 0 ),
+  V(  74, 0x1424,  72,  75, 0 ),
+  V(  75, 0x119c,  74,  76, 0 ),
+  V(  76, 0x0f6b,  74,  77, 0 ),
+  V(  77, 0x0d51,  75,  78, 0 ),
+  V(  78, 0x0bb6,  77,  79, 0 ),
+  V(  79, 0x0a40,  77,  48, 0 ),
+  V(  80, 0x5832,  80,  81, 1 ),
+  V(  81, 0x4d1c,  88,  82, 0 ),
+  V(  82, 0x438e,  89,  83, 0 ),
+  V(  83, 0x3bdd,  90,  84, 0 ),
+  V(  84, 0x34ee,  91,  85, 0 ),
+  V(  85, 0x2eae,  92,  86, 0 ),
+  V(  86, 0x299a,  93,  87, 0 ),
+  V(  87, 0x2516,  86,  71, 0 ),
+  V(  88, 0x5570,  88,  89, 1 ),
+  V(  89, 0x4ca9,  95,  90, 0 ),
+  V(  90, 0x44d9,  96,  91, 0 ),
+  V(  91, 0x3e22,  97,  92, 0 ),
+  V(  92, 0x3824,  99,  93, 0 ),
+  V(  93, 0x32b4,  99,  94, 0 ),
+  V(  94, 0x2e17,  93,  86, 0 ),
+  V(  95, 0x56a8,  95,  96, 1 ),
+  V(  96, 0x4f46, 101,  97, 0 ),
+  V(  97, 0x47e5, 102,  98, 0 ),
+  V(  98, 0x41cf, 103,  99, 0 ),
+  V(  99, 0x3c3d, 104, 100, 0 ),
+  V( 100, 0x375e,  99,  93, 0 ),
+  V( 101, 0x5231, 105, 102, 0 ),
+  V( 102, 0x4c0f, 106, 103, 0 ),
+  V( 103, 0x4639, 107, 104, 0 ),
+  V( 104, 0x415e, 103,  99, 0 ),
+  V( 105, 0x5627, 105, 106, 1 ),
+  V( 106, 0x50e7, 108, 107, 0 ),
+  V( 107, 0x4b85, 109, 103, 0 ),
+  V( 108, 0x5597, 110, 109, 0 ),
+  V( 109, 0x504f, 111, 107, 0 ),
+  V( 110, 0x5a10, 110, 111, 1 ),
+  V( 111, 0x5522, 112, 109, 0 ),
+  V( 112, 0x59eb, 112, 111, 1 ),
+/*
+ * This last entry is used for fixed probability estimate of 0.5
+ * as suggested in Section 10.3 Table 5 of ITU-T Rec. T.851.
+ */
+  V( 113, 0x5a1d, 113, 113, 0 )
+};
diff --git a/plugins/AdvaImg/src/LibJPEG/jcapimin.c b/plugins/AdvaImg/src/LibJPEG/jcapimin.c
index 639ce86f44..3382d91557 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcapimin.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcapimin.c
@@ -1,288 +1,288 @@
-/*
- * jcapimin.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2003-2010 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the compression half
- * of the JPEG library.  These are the "minimum" API routines that may be
- * needed in either the normal full-compression case or the transcoding-only
- * case.
- *
- * Most of the routines intended to be called directly by an application
- * are in this file or in jcapistd.c.  But also see jcparam.c for
- * parameter-setup helper routines, jcomapi.c for routines shared by
- * compression and decompression, and jctrans.c for the transcoding case.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Initialization of a JPEG compression object.
- * The error manager must already be set up (in case memory manager fails).
- */
-
-GLOBAL(void)
-jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
-{
-  int i;
-
-  /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
-  if (version != JPEG_LIB_VERSION)
-    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
-  if (structsize != SIZEOF(struct jpeg_compress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
-
-  /* For debugging purposes, we zero the whole master structure.
-   * But the application has already set the err pointer, and may have set
-   * client_data, so we have to save and restore those fields.
-   * Note: if application hasn't set client_data, tools like Purify may
-   * complain here.
-   */
-  {
-    struct jpeg_error_mgr * err = cinfo->err;
-    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct));
-    cinfo->err = err;
-    cinfo->client_data = client_data;
-  }
-  cinfo->is_decompressor = FALSE;
-
-  /* Initialize a memory manager instance for this object */
-  jinit_memory_mgr((j_common_ptr) cinfo);
-
-  /* Zero out pointers to permanent structures. */
-  cinfo->progress = NULL;
-  cinfo->dest = NULL;
-
-  cinfo->comp_info = NULL;
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    cinfo->quant_tbl_ptrs[i] = NULL;
-    cinfo->q_scale_factor[i] = 100;
-  }
-
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    cinfo->dc_huff_tbl_ptrs[i] = NULL;
-    cinfo->ac_huff_tbl_ptrs[i] = NULL;
-  }
-
-  /* Must do it here for emit_dqt in case jpeg_write_tables is used */
-  cinfo->block_size = DCTSIZE;
-  cinfo->natural_order = jpeg_natural_order;
-  cinfo->lim_Se = DCTSIZE2-1;
-
-  cinfo->script_space = NULL;
-
-  cinfo->input_gamma = 1.0;	/* in case application forgets */
-
-  /* OK, I'm ready */
-  cinfo->global_state = CSTATE_START;
-}
-
-
-/*
- * Destruction of a JPEG compression object
- */
-
-GLOBAL(void)
-jpeg_destroy_compress (j_compress_ptr cinfo)
-{
-  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Abort processing of a JPEG compression operation,
- * but don't destroy the object itself.
- */
-
-GLOBAL(void)
-jpeg_abort_compress (j_compress_ptr cinfo)
-{
-  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Forcibly suppress or un-suppress all quantization and Huffman tables.
- * Marks all currently defined tables as already written (if suppress)
- * or not written (if !suppress).  This will control whether they get emitted
- * by a subsequent jpeg_start_compress call.
- *
- * This routine is exported for use by applications that want to produce
- * abbreviated JPEG datastreams.  It logically belongs in jcparam.c, but
- * since it is called by jpeg_start_compress, we put it here --- otherwise
- * jcparam.o would be linked whether the application used it or not.
- */
-
-GLOBAL(void)
-jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
-{
-  int i;
-  JQUANT_TBL * qtbl;
-  JHUFF_TBL * htbl;
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL)
-      qtbl->sent_table = suppress;
-  }
-
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    if ((htbl = cinfo->dc_huff_tbl_ptrs[i]) != NULL)
-      htbl->sent_table = suppress;
-    if ((htbl = cinfo->ac_huff_tbl_ptrs[i]) != NULL)
-      htbl->sent_table = suppress;
-  }
-}
-
-
-/*
- * Finish JPEG compression.
- *
- * If a multipass operating mode was selected, this may do a great deal of
- * work including most of the actual output.
- */
-
-GLOBAL(void)
-jpeg_finish_compress (j_compress_ptr cinfo)
-{
-  JDIMENSION iMCU_row;
-
-  if (cinfo->global_state == CSTATE_SCANNING ||
-      cinfo->global_state == CSTATE_RAW_OK) {
-    /* Terminate first pass */
-    if (cinfo->next_scanline < cinfo->image_height)
-      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
-    (*cinfo->master->finish_pass) (cinfo);
-  } else if (cinfo->global_state != CSTATE_WRCOEFS)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Perform any remaining passes */
-  while (! cinfo->master->is_last_pass) {
-    (*cinfo->master->prepare_for_pass) (cinfo);
-    for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
-      if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) iMCU_row;
-	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-      }
-      /* We bypass the main controller and invoke coef controller directly;
-       * all work is being done from the coefficient buffer.
-       */
-      if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
-	ERREXIT(cinfo, JERR_CANT_SUSPEND);
-    }
-    (*cinfo->master->finish_pass) (cinfo);
-  }
-  /* Write EOI, do final cleanup */
-  (*cinfo->marker->write_file_trailer) (cinfo);
-  (*cinfo->dest->term_destination) (cinfo);
-  /* We can use jpeg_abort to release memory and reset global_state */
-  jpeg_abort((j_common_ptr) cinfo);
-}
-
-
-/*
- * Write a special marker.
- * This is only recommended for writing COM or APPn markers.
- * Must be called after jpeg_start_compress() and before
- * first call to jpeg_write_scanlines() or jpeg_write_raw_data().
- */
-
-GLOBAL(void)
-jpeg_write_marker (j_compress_ptr cinfo, int marker,
-		   const JOCTET *dataptr, unsigned int datalen)
-{
-  JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
-
-  if (cinfo->next_scanline != 0 ||
-      (cinfo->global_state != CSTATE_SCANNING &&
-       cinfo->global_state != CSTATE_RAW_OK &&
-       cinfo->global_state != CSTATE_WRCOEFS))
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
-  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
-  while (datalen--) {
-    (*write_marker_byte) (cinfo, *dataptr);
-    dataptr++;
-  }
-}
-
-/* Same, but piecemeal. */
-
-GLOBAL(void)
-jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
-{
-  if (cinfo->next_scanline != 0 ||
-      (cinfo->global_state != CSTATE_SCANNING &&
-       cinfo->global_state != CSTATE_RAW_OK &&
-       cinfo->global_state != CSTATE_WRCOEFS))
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
-}
-
-GLOBAL(void)
-jpeg_write_m_byte (j_compress_ptr cinfo, int val)
-{
-  (*cinfo->marker->write_marker_byte) (cinfo, val);
-}
-
-
-/*
- * Alternate compression function: just write an abbreviated table file.
- * Before calling this, all parameters and a data destination must be set up.
- *
- * To produce a pair of files containing abbreviated tables and abbreviated
- * image data, one would proceed as follows:
- *
- *		initialize JPEG object
- *		set JPEG parameters
- *		set destination to table file
- *		jpeg_write_tables(cinfo);
- *		set destination to image file
- *		jpeg_start_compress(cinfo, FALSE);
- *		write data...
- *		jpeg_finish_compress(cinfo);
- *
- * jpeg_write_tables has the side effect of marking all tables written
- * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
- * will not re-emit the tables unless it is passed write_all_tables=TRUE.
- */
-
-GLOBAL(void)
-jpeg_write_tables (j_compress_ptr cinfo)
-{
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* (Re)initialize error mgr and destination modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->dest->init_destination) (cinfo);
-  /* Initialize the marker writer ... bit of a crock to do it here. */
-  jinit_marker_writer(cinfo);
-  /* Write them tables! */
-  (*cinfo->marker->write_tables_only) (cinfo);
-  /* And clean up. */
-  (*cinfo->dest->term_destination) (cinfo);
-  /*
-   * In library releases up through v6a, we called jpeg_abort() here to free
-   * any working memory allocated by the destination manager and marker
-   * writer.  Some applications had a problem with that: they allocated space
-   * of their own from the library memory manager, and didn't want it to go
-   * away during write_tables.  So now we do nothing.  This will cause a
-   * memory leak if an app calls write_tables repeatedly without doing a full
-   * compression cycle or otherwise resetting the JPEG object.  However, that
-   * seems less bad than unexpectedly freeing memory in the normal case.
-   * An app that prefers the old behavior can call jpeg_abort for itself after
-   * each call to jpeg_write_tables().
-   */
-}
+/*
+ * jcapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2003-2010 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-compression case or the transcoding-only
+ * case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jcapistd.c.  But also see jcparam.c for
+ * parameter-setup helper routines, jcomapi.c for routines shared by
+ * compression and decompression, and jctrans.c for the transcoding case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG compression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_compress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = FALSE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->dest = NULL;
+
+  cinfo->comp_info = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    cinfo->quant_tbl_ptrs[i] = NULL;
+    cinfo->q_scale_factor[i] = 100;
+  }
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  /* Must do it here for emit_dqt in case jpeg_write_tables is used */
+  cinfo->block_size = DCTSIZE;
+  cinfo->natural_order = jpeg_natural_order;
+  cinfo->lim_Se = DCTSIZE2-1;
+
+  cinfo->script_space = NULL;
+
+  cinfo->input_gamma = 1.0;	/* in case application forgets */
+
+  /* OK, I'm ready */
+  cinfo->global_state = CSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG compression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_compress (j_compress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG compression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_compress (j_compress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Forcibly suppress or un-suppress all quantization and Huffman tables.
+ * Marks all currently defined tables as already written (if suppress)
+ * or not written (if !suppress).  This will control whether they get emitted
+ * by a subsequent jpeg_start_compress call.
+ *
+ * This routine is exported for use by applications that want to produce
+ * abbreviated JPEG datastreams.  It logically belongs in jcparam.c, but
+ * since it is called by jpeg_start_compress, we put it here --- otherwise
+ * jcparam.o would be linked whether the application used it or not.
+ */
+
+GLOBAL(void)
+jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
+{
+  int i;
+  JQUANT_TBL * qtbl;
+  JHUFF_TBL * htbl;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL)
+      qtbl->sent_table = suppress;
+  }
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    if ((htbl = cinfo->dc_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+    if ((htbl = cinfo->ac_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+  }
+}
+
+
+/*
+ * Finish JPEG compression.
+ *
+ * If a multipass operating mode was selected, this may do a great deal of
+ * work including most of the actual output.
+ */
+
+GLOBAL(void)
+jpeg_finish_compress (j_compress_ptr cinfo)
+{
+  JDIMENSION iMCU_row;
+
+  if (cinfo->global_state == CSTATE_SCANNING ||
+      cinfo->global_state == CSTATE_RAW_OK) {
+    /* Terminate first pass */
+    if (cinfo->next_scanline < cinfo->image_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_pass) (cinfo);
+  } else if (cinfo->global_state != CSTATE_WRCOEFS)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any remaining passes */
+  while (! cinfo->master->is_last_pass) {
+    (*cinfo->master->prepare_for_pass) (cinfo);
+    for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) iMCU_row;
+	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* We bypass the main controller and invoke coef controller directly;
+       * all work is being done from the coefficient buffer.
+       */
+      if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
+	ERREXIT(cinfo, JERR_CANT_SUSPEND);
+    }
+    (*cinfo->master->finish_pass) (cinfo);
+  }
+  /* Write EOI, do final cleanup */
+  (*cinfo->marker->write_file_trailer) (cinfo);
+  (*cinfo->dest->term_destination) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+}
+
+
+/*
+ * Write a special marker.
+ * This is only recommended for writing COM or APPn markers.
+ * Must be called after jpeg_start_compress() and before
+ * first call to jpeg_write_scanlines() or jpeg_write_raw_data().
+ */
+
+GLOBAL(void)
+jpeg_write_marker (j_compress_ptr cinfo, int marker,
+		   const JOCTET *dataptr, unsigned int datalen)
+{
+  JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
+
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
+  while (datalen--) {
+    (*write_marker_byte) (cinfo, *dataptr);
+    dataptr++;
+  }
+}
+
+/* Same, but piecemeal. */
+
+GLOBAL(void)
+jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+{
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+}
+
+GLOBAL(void)
+jpeg_write_m_byte (j_compress_ptr cinfo, int val)
+{
+  (*cinfo->marker->write_marker_byte) (cinfo, val);
+}
+
+
+/*
+ * Alternate compression function: just write an abbreviated table file.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * To produce a pair of files containing abbreviated tables and abbreviated
+ * image data, one would proceed as follows:
+ *
+ *		initialize JPEG object
+ *		set JPEG parameters
+ *		set destination to table file
+ *		jpeg_write_tables(cinfo);
+ *		set destination to image file
+ *		jpeg_start_compress(cinfo, FALSE);
+ *		write data...
+ *		jpeg_finish_compress(cinfo);
+ *
+ * jpeg_write_tables has the side effect of marking all tables written
+ * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
+ * will not re-emit the tables unless it is passed write_all_tables=TRUE.
+ */
+
+GLOBAL(void)
+jpeg_write_tables (j_compress_ptr cinfo)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Initialize the marker writer ... bit of a crock to do it here. */
+  jinit_marker_writer(cinfo);
+  /* Write them tables! */
+  (*cinfo->marker->write_tables_only) (cinfo);
+  /* And clean up. */
+  (*cinfo->dest->term_destination) (cinfo);
+  /*
+   * In library releases up through v6a, we called jpeg_abort() here to free
+   * any working memory allocated by the destination manager and marker
+   * writer.  Some applications had a problem with that: they allocated space
+   * of their own from the library memory manager, and didn't want it to go
+   * away during write_tables.  So now we do nothing.  This will cause a
+   * memory leak if an app calls write_tables repeatedly without doing a full
+   * compression cycle or otherwise resetting the JPEG object.  However, that
+   * seems less bad than unexpectedly freeing memory in the normal case.
+   * An app that prefers the old behavior can call jpeg_abort for itself after
+   * each call to jpeg_write_tables().
+   */
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jcapistd.c b/plugins/AdvaImg/src/LibJPEG/jcapistd.c
index c0320b1b19..fed66caf17 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcapistd.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcapistd.c
@@ -1,161 +1,161 @@
-/*
- * jcapistd.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the compression half
- * of the JPEG library.  These are the "standard" API routines that are
- * used in the normal full-compression case.  They are not used by a
- * transcoding-only application.  Note that if an application links in
- * jpeg_start_compress, it will end up linking in the entire compressor.
- * We thus must separate this file from jcapimin.c to avoid linking the
- * whole compression library into a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Compression initialization.
- * Before calling this, all parameters and a data destination must be set up.
- *
- * We require a write_all_tables parameter as a failsafe check when writing
- * multiple datastreams from the same compression object.  Since prior runs
- * will have left all the tables marked sent_table=TRUE, a subsequent run
- * would emit an abbreviated stream (no tables) by default.  This may be what
- * is wanted, but for safety's sake it should not be the default behavior:
- * programmers should have to make a deliberate choice to emit abbreviated
- * images.  Therefore the documentation and examples should encourage people
- * to pass write_all_tables=TRUE; then it will take active thought to do the
- * wrong thing.
- */
-
-GLOBAL(void)
-jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables)
-{
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  if (write_all_tables)
-    jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
-
-  /* (Re)initialize error mgr and destination modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->dest->init_destination) (cinfo);
-  /* Perform master selection of active modules */
-  jinit_compress_master(cinfo);
-  /* Set up for the first pass */
-  (*cinfo->master->prepare_for_pass) (cinfo);
-  /* Ready for application to drive first pass through jpeg_write_scanlines
-   * or jpeg_write_raw_data.
-   */
-  cinfo->next_scanline = 0;
-  cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
-}
-
-
-/*
- * Write some scanlines of data to the JPEG compressor.
- *
- * The return value will be the number of lines actually written.
- * This should be less than the supplied num_lines only in case that
- * the data destination module has requested suspension of the compressor,
- * or if more than image_height scanlines are passed in.
- *
- * Note: we warn about excess calls to jpeg_write_scanlines() since
- * this likely signals an application programmer error.  However,
- * excess scanlines passed in the last valid call are *silently* ignored,
- * so that the application need not adjust num_lines for end-of-image
- * when using a multiple-scanline buffer.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
-		      JDIMENSION num_lines)
-{
-  JDIMENSION row_ctr, rows_left;
-
-  if (cinfo->global_state != CSTATE_SCANNING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->next_scanline >= cinfo->image_height)
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->image_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Give master control module another chance if this is first call to
-   * jpeg_write_scanlines.  This lets output of the frame/scan headers be
-   * delayed so that application can write COM, etc, markers between
-   * jpeg_start_compress and jpeg_write_scanlines.
-   */
-  if (cinfo->master->call_pass_startup)
-    (*cinfo->master->pass_startup) (cinfo);
-
-  /* Ignore any extra scanlines at bottom of image. */
-  rows_left = cinfo->image_height - cinfo->next_scanline;
-  if (num_lines > rows_left)
-    num_lines = rows_left;
-
-  row_ctr = 0;
-  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
-  cinfo->next_scanline += row_ctr;
-  return row_ctr;
-}
-
-
-/*
- * Alternate entry point to write raw data.
- * Processes exactly one iMCU row per call, unless suspended.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
-		     JDIMENSION num_lines)
-{
-  JDIMENSION lines_per_iMCU_row;
-
-  if (cinfo->global_state != CSTATE_RAW_OK)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->next_scanline >= cinfo->image_height) {
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-    return 0;
-  }
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->image_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Give master control module another chance if this is first call to
-   * jpeg_write_raw_data.  This lets output of the frame/scan headers be
-   * delayed so that application can write COM, etc, markers between
-   * jpeg_start_compress and jpeg_write_raw_data.
-   */
-  if (cinfo->master->call_pass_startup)
-    (*cinfo->master->pass_startup) (cinfo);
-
-  /* Verify that at least one iMCU row has been passed. */
-  lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
-  if (num_lines < lines_per_iMCU_row)
-    ERREXIT(cinfo, JERR_BUFFER_SIZE);
-
-  /* Directly compress the row. */
-  if (! (*cinfo->coef->compress_data) (cinfo, data)) {
-    /* If compressor did not consume the whole row, suspend processing. */
-    return 0;
-  }
-
-  /* OK, we processed one iMCU row. */
-  cinfo->next_scanline += lines_per_iMCU_row;
-  return lines_per_iMCU_row;
-}
+/*
+ * jcapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-compression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_compress, it will end up linking in the entire compressor.
+ * We thus must separate this file from jcapimin.c to avoid linking the
+ * whole compression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Compression initialization.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * We require a write_all_tables parameter as a failsafe check when writing
+ * multiple datastreams from the same compression object.  Since prior runs
+ * will have left all the tables marked sent_table=TRUE, a subsequent run
+ * would emit an abbreviated stream (no tables) by default.  This may be what
+ * is wanted, but for safety's sake it should not be the default behavior:
+ * programmers should have to make a deliberate choice to emit abbreviated
+ * images.  Therefore the documentation and examples should encourage people
+ * to pass write_all_tables=TRUE; then it will take active thought to do the
+ * wrong thing.
+ */
+
+GLOBAL(void)
+jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (write_all_tables)
+    jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  jinit_compress_master(cinfo);
+  /* Set up for the first pass */
+  (*cinfo->master->prepare_for_pass) (cinfo);
+  /* Ready for application to drive first pass through jpeg_write_scanlines
+   * or jpeg_write_raw_data.
+   */
+  cinfo->next_scanline = 0;
+  cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
+}
+
+
+/*
+ * Write some scanlines of data to the JPEG compressor.
+ *
+ * The return value will be the number of lines actually written.
+ * This should be less than the supplied num_lines only in case that
+ * the data destination module has requested suspension of the compressor,
+ * or if more than image_height scanlines are passed in.
+ *
+ * Note: we warn about excess calls to jpeg_write_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * excess scanlines passed in the last valid call are *silently* ignored,
+ * so that the application need not adjust num_lines for end-of-image
+ * when using a multiple-scanline buffer.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
+		      JDIMENSION num_lines)
+{
+  JDIMENSION row_ctr, rows_left;
+
+  if (cinfo->global_state != CSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height)
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_scanlines.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_scanlines.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Ignore any extra scanlines at bottom of image. */
+  rows_left = cinfo->image_height - cinfo->next_scanline;
+  if (num_lines > rows_left)
+    num_lines = rows_left;
+
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
+  cinfo->next_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to write raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
+		     JDIMENSION num_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != CSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_raw_data.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_raw_data.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Verify that at least one iMCU row has been passed. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  if (num_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Directly compress the row. */
+  if (! (*cinfo->coef->compress_data) (cinfo, data)) {
+    /* If compressor did not consume the whole row, suspend processing. */
+    return 0;
+  }
+
+  /* OK, we processed one iMCU row. */
+  cinfo->next_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jcarith.c b/plugins/AdvaImg/src/LibJPEG/jcarith.c
index 033f67069e..78edcd353c 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcarith.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcarith.c
@@ -1,937 +1,943 @@
-/*
- * jcarith.c
- *
- * Developed 1997-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains portable arithmetic entropy encoding routines for JPEG
- * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
- *
- * Both sequential and progressive modes are supported in this single module.
- *
- * Suspension is not currently supported in this module.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Expanded entropy encoder object for arithmetic encoding. */
-
-typedef struct {
-  struct jpeg_entropy_encoder pub; /* public fields */
-
-  INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */
-  INT32 a;               /* A register, normalized size of coding interval */
-  INT32 sc;        /* counter for stacked 0xFF values which might overflow */
-  INT32 zc;          /* counter for pending 0x00 output values which might *
-                          * be discarded at the end ("Pacman" termination) */
-  int ct;  /* bit shift counter, determines when next byte will be written */
-  int buffer;                /* buffer for most recent output byte != 0xFF */
-
-  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
-
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
-
-  /* Pointers to statistics areas (these workspaces have image lifespan) */
-  unsigned char * dc_stats[NUM_ARITH_TBLS];
-  unsigned char * ac_stats[NUM_ARITH_TBLS];
-
-  /* Statistics bin for coding with fixed probability 0.5 */
-  unsigned char fixed_bin[4];
-} arith_entropy_encoder;
-
-typedef arith_entropy_encoder * arith_entropy_ptr;
-
-/* The following two definitions specify the allocation chunk size
- * for the statistics area.
- * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
- * 49 statistics bins for DC, and 245 statistics bins for AC coding.
- *
- * We use a compact representation with 1 byte per statistics bin,
- * thus the numbers directly represent byte sizes.
- * This 1 byte per statistics bin contains the meaning of the MPS
- * (more probable symbol) in the highest bit (mask 0x80), and the
- * index into the probability estimation state machine table
- * in the lower bits (mask 0x7F).
- */
-
-#define DC_STAT_BINS 64
-#define AC_STAT_BINS 256
-
-/* NOTE: Uncomment the following #define if you want to use the
- * given formula for calculating the AC conditioning parameter Kx
- * for spectral selection progressive coding in section G.1.3.2
- * of the spec (Kx = Kmin + SRL (8 + Se - Kmin) 4).
- * Although the spec and P&M authors claim that this "has proven
- * to give good results for 8 bit precision samples", I'm not
- * convinced yet that this is really beneficial.
- * Early tests gave only very marginal compression enhancements
- * (a few - around 5 or so - bytes even for very large files),
- * which would turn out rather negative if we'd suppress the
- * DAC (Define Arithmetic Conditioning) marker segments for
- * the default parameters in the future.
- * Note that currently the marker writing module emits 12-byte
- * DAC segments for a full-component scan in a color image.
- * This is not worth worrying about IMHO. However, since the
- * spec defines the default values to be used if the tables
- * are omitted (unlike Huffman tables, which are required
- * anyway), one might optimize this behaviour in the future,
- * and then it would be disadvantageous to use custom tables if
- * they don't provide sufficient gain to exceed the DAC size.
- *
- * On the other hand, I'd consider it as a reasonable result
- * that the conditioning has no significant influence on the
- * compression performance. This means that the basic
- * statistical model is already rather stable.
- *
- * Thus, at the moment, we use the default conditioning values
- * anyway, and do not use the custom formula.
- *
-#define CALCULATE_SPECTRAL_CONDITIONING
- */
-
-/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
- * We assume that int right shift is unsigned if INT32 right shift is,
- * which should be safe.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	int ishift_temp;
-#define IRIGHT_SHIFT(x,shft)  \
-	((ishift_temp = (x)) < 0 ? \
-	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
-	 (ishift_temp >> (shft)))
-#else
-#define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-
-LOCAL(void)
-emit_byte (int val, j_compress_ptr cinfo)
-/* Write next output byte; we do not support suspension in this module. */
-{
-  struct jpeg_destination_mgr * dest = cinfo->dest;
-
-  *dest->next_output_byte++ = (JOCTET) val;
-  if (--dest->free_in_buffer == 0)
-    if (! (*dest->empty_output_buffer) (cinfo))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-}
-
-
-/*
- * Finish up at the end of an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass (j_compress_ptr cinfo)
-{
-  arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  INT32 temp;
-
-  /* Section D.1.8: Termination of encoding */
-
-  /* Find the e->c in the coding interval with the largest
-   * number of trailing zero bits */
-  if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c)
-    e->c = temp + 0x8000L;
-  else
-    e->c = temp;
-  /* Send remaining bytes to output */
-  e->c <<= e->ct;
-  if (e->c & 0xF8000000L) {
-    /* One final overflow has to be handled */
-    if (e->buffer >= 0) {
-      if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
-      emit_byte(e->buffer + 1, cinfo);
-      if (e->buffer + 1 == 0xFF)
-	emit_byte(0x00, cinfo);
-    }
-    e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
-    e->sc = 0;
-  } else {
-    if (e->buffer == 0)
-      ++e->zc;
-    else if (e->buffer >= 0) {
-      if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
-      emit_byte(e->buffer, cinfo);
-    }
-    if (e->sc) {
-      if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
-      do {
-	emit_byte(0xFF, cinfo);
-	emit_byte(0x00, cinfo);
-      } while (--e->sc);
-    }
-  }
-  /* Output final bytes only if they are not 0x00 */
-  if (e->c & 0x7FFF800L) {
-    if (e->zc)  /* output final pending zero bytes */
-      do emit_byte(0x00, cinfo);
-      while (--e->zc);
-    emit_byte((e->c >> 19) & 0xFF, cinfo);
-    if (((e->c >> 19) & 0xFF) == 0xFF)
-      emit_byte(0x00, cinfo);
-    if (e->c & 0x7F800L) {
-      emit_byte((e->c >> 11) & 0xFF, cinfo);
-      if (((e->c >> 11) & 0xFF) == 0xFF)
-	emit_byte(0x00, cinfo);
-    }
-  }
-}
-
-
-/*
- * The core arithmetic encoding routine (common in JPEG and JBIG).
- * This needs to go as fast as possible.
- * Machine-dependent optimization facilities
- * are not utilized in this portable implementation.
- * However, this code should be fairly efficient and
- * may be a good base for further optimizations anyway.
- *
- * Parameter 'val' to be encoded may be 0 or 1 (binary decision).
- *
- * Note: I've added full "Pacman" termination support to the
- * byte output routines, which is equivalent to the optional
- * Discard_final_zeros procedure (Figure D.15) in the spec.
- * Thus, we always produce the shortest possible output
- * stream compliant to the spec (no trailing zero bytes,
- * except for FF stuffing).
- *
- * I've also introduced a new scheme for accessing
- * the probability estimation state machine table,
- * derived from Markus Kuhn's JBIG implementation.
- */
-
-LOCAL(void)
-arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) 
-{
-  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  register unsigned char nl, nm;
-  register INT32 qe, temp;
-  register int sv;
-
-  /* Fetch values from our compact representation of Table D.3(D.2):
-   * Qe values and probability estimation state machine
-   */
-  sv = *st;
-  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
-  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
-  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
-
-  /* Encode & estimation procedures per sections D.1.4 & D.1.5 */
-  e->a -= qe;
-  if (val != (sv >> 7)) {
-    /* Encode the less probable symbol */
-    if (e->a >= qe) {
-      /* If the interval size (qe) for the less probable symbol (LPS)
-       * is larger than the interval size for the MPS, then exchange
-       * the two symbols for coding efficiency, otherwise code the LPS
-       * as usual: */
-      e->c += e->a;
-      e->a = qe;
-    }
-    *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-  } else {
-    /* Encode the more probable symbol */
-    if (e->a >= 0x8000L)
-      return;  /* A >= 0x8000 -> ready, no renormalization required */
-    if (e->a < qe) {
-      /* If the interval size (qe) for the less probable symbol (LPS)
-       * is larger than the interval size for the MPS, then exchange
-       * the two symbols for coding efficiency: */
-      e->c += e->a;
-      e->a = qe;
-    }
-    *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
-  }
-
-  /* Renormalization & data output per section D.1.6 */
-  do {
-    e->a <<= 1;
-    e->c <<= 1;
-    if (--e->ct == 0) {
-      /* Another byte is ready for output */
-      temp = e->c >> 19;
-      if (temp > 0xFF) {
-	/* Handle overflow over all stacked 0xFF bytes */
-	if (e->buffer >= 0) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  emit_byte(e->buffer + 1, cinfo);
-	  if (e->buffer + 1 == 0xFF)
-	    emit_byte(0x00, cinfo);
-	}
-	e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
-	e->sc = 0;
-	/* Note: The 3 spacer bits in the C register guarantee
-	 * that the new buffer byte can't be 0xFF here
-	 * (see page 160 in the P&M JPEG book). */
-	e->buffer = temp & 0xFF;  /* new output byte, might overflow later */
-      } else if (temp == 0xFF) {
-	++e->sc;  /* stack 0xFF byte (which might overflow later) */
-      } else {
-	/* Output all stacked 0xFF bytes, they will not overflow any more */
-	if (e->buffer == 0)
-	  ++e->zc;
-	else if (e->buffer >= 0) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  emit_byte(e->buffer, cinfo);
-	}
-	if (e->sc) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  do {
-	    emit_byte(0xFF, cinfo);
-	    emit_byte(0x00, cinfo);
-	  } while (--e->sc);
-	}
-	e->buffer = temp & 0xFF;  /* new output byte (can still overflow) */
-      }
-      e->c &= 0x7FFFFL;
-      e->ct += 8;
-    }
-  } while (e->a < 0x8000L);
-}
-
-
-/*
- * Emit a restart marker & resynchronize predictions.
- */
-
-LOCAL(void)
-emit_restart (j_compress_ptr cinfo, int restart_num)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci;
-  jpeg_component_info * compptr;
-
-  finish_pass(cinfo);
-
-  emit_byte(0xFF, cinfo);
-  emit_byte(JPEG_RST0 + restart_num, cinfo);
-
-  /* Re-initialize statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
-      /* Reset DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
-    }
-  }
-
-  /* Reset arithmetic encoding variables */
-  entropy->c = 0;
-  entropy->a = 0x10000L;
-  entropy->sc = 0;
-  entropy->zc = 0;
-  entropy->ct = 11;
-  entropy->buffer = -1;  /* empty */
-}
-
-
-/*
- * MCU encoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  unsigned char *st;
-  int blkn, ci, tbl;
-  int v, v2, m;
-  ISHIFT_TEMPS
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
-
-    /* Compute the DC value after the required point transform by Al.
-     * This is simply an arithmetic right shift.
-     */
-    m = IRIGHT_SHIFT((int) ((*block)[0]), cinfo->Al);
-
-    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.4: Encode_DC_DIFF */
-    if ((v = m - entropy->last_dc_val[ci]) == 0) {
-      arith_encode(cinfo, st, 0);
-      entropy->dc_context[ci] = 0;	/* zero diff category */
-    } else {
-      entropy->last_dc_val[ci] = m;
-      arith_encode(cinfo, st, 1);
-      /* Figure F.6: Encoding nonzero value v */
-      /* Figure F.7: Encoding the sign of v */
-      if (v > 0) {
-	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
-	st += 2;			/* Table F.4: SP = S0 + 2 */
-	entropy->dc_context[ci] = 4;	/* small positive diff category */
-      } else {
-	v = -v;
-	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
-	st += 3;			/* Table F.4: SN = S0 + 3 */
-	entropy->dc_context[ci] = 8;	/* small negative diff category */
-      }
-      /* Figure F.8: Encoding the magnitude category of v */
-      m = 0;
-      if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
-      }
-      arith_encode(cinfo, st, 0);
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;	/* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] += 8;	/* large diff category */
-      /* Figure F.9: Encoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
-    }
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, k, ke;
-  int v, v2, m;
-  const int * natural_order;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
-
-  /* Establish EOB (end-of-block) index */
-  for (ke = cinfo->Se; ke > 0; ke--)
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value.
-     */
-    if ((v = (*block)[natural_order[ke]]) >= 0) {
-      if (v >>= cinfo->Al) break;
-    } else {
-      v = -v;
-      if (v >>= cinfo->Al) break;
-    }
-
-  /* Figure F.5: Encode_AC_Coefficients */
-  for (k = cinfo->Ss; k <= ke; k++) {
-    st = entropy->ac_stats[tbl] + 3 * (k - 1);
-    arith_encode(cinfo, st, 0);		/* EOB decision */
-    for (;;) {
-      if ((v = (*block)[natural_order[k]]) >= 0) {
-	if (v >>= cinfo->Al) {
-	  arith_encode(cinfo, st + 1, 1);
-	  arith_encode(cinfo, entropy->fixed_bin, 0);
-	  break;
-	}
-      } else {
-	v = -v;
-	if (v >>= cinfo->Al) {
-	  arith_encode(cinfo, st + 1, 1);
-	  arith_encode(cinfo, entropy->fixed_bin, 1);
-	  break;
-	}
-      }
-      arith_encode(cinfo, st + 1, 0); st += 3; k++;
-    }
-    st += 2;
-    /* Figure F.8: Encoding the magnitude category of v */
-    m = 0;
-    if (v -= 1) {
-      arith_encode(cinfo, st, 1);
-      m = 1;
-      v2 = v;
-      if (v2 >>= 1) {
-	arith_encode(cinfo, st, 1);
-	m <<= 1;
-	st = entropy->ac_stats[tbl] +
-	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
-      }
-    }
-    arith_encode(cinfo, st, 0);
-    /* Figure F.9: Encoding the magnitude bit pattern of v */
-    st += 14;
-    while (m >>= 1)
-      arith_encode(cinfo, st, (m & v) ? 1 : 0);
-  }
-  /* Encode EOB decision only if k <= cinfo->Se */
-  if (k <= cinfo->Se) {
-    st = entropy->ac_stats[tbl] + 3 * (k - 1);
-    arith_encode(cinfo, st, 1);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for DC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  unsigned char *st;
-  int Al, blkn;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  st = entropy->fixed_bin;	/* use fixed probability estimation */
-  Al = cinfo->Al;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* We simply emit the Al'th bit of the DC coefficient value. */
-    arith_encode(cinfo, st, (MCU_data[blkn][0][0] >> Al) & 1);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, k, ke, kex;
-  int v;
-  const int * natural_order;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  /* Section G.1.3.3: Encoding of AC coefficients */
-
-  /* Establish EOB (end-of-block) index */
-  for (ke = cinfo->Se; ke > 0; ke--)
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value.
-     */
-    if ((v = (*block)[natural_order[ke]]) >= 0) {
-      if (v >>= cinfo->Al) break;
-    } else {
-      v = -v;
-      if (v >>= cinfo->Al) break;
-    }
-
-  /* Establish EOBx (previous stage end-of-block) index */
-  for (kex = ke; kex > 0; kex--)
-    if ((v = (*block)[natural_order[kex]]) >= 0) {
-      if (v >>= cinfo->Ah) break;
-    } else {
-      v = -v;
-      if (v >>= cinfo->Ah) break;
-    }
-
-  /* Figure G.10: Encode_AC_Coefficients_SA */
-  for (k = cinfo->Ss; k <= ke; k++) {
-    st = entropy->ac_stats[tbl] + 3 * (k - 1);
-    if (k > kex)
-      arith_encode(cinfo, st, 0);	/* EOB decision */
-    for (;;) {
-      if ((v = (*block)[natural_order[k]]) >= 0) {
-	if (v >>= cinfo->Al) {
-	  if (v >> 1)			/* previously nonzero coef */
-	    arith_encode(cinfo, st + 2, (v & 1));
-	  else {			/* newly nonzero coef */
-	    arith_encode(cinfo, st + 1, 1);
-	    arith_encode(cinfo, entropy->fixed_bin, 0);
-	  }
-	  break;
-	}
-      } else {
-	v = -v;
-	if (v >>= cinfo->Al) {
-	  if (v >> 1)			/* previously nonzero coef */
-	    arith_encode(cinfo, st + 2, (v & 1));
-	  else {			/* newly nonzero coef */
-	    arith_encode(cinfo, st + 1, 1);
-	    arith_encode(cinfo, entropy->fixed_bin, 1);
-	  }
-	  break;
-	}
-      }
-      arith_encode(cinfo, st + 1, 0); st += 3; k++;
-    }
-  }
-  /* Encode EOB decision only if k <= cinfo->Se */
-  if (k <= cinfo->Se) {
-    st = entropy->ac_stats[tbl] + 3 * (k - 1);
-    arith_encode(cinfo, st, 1);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Encode and output one MCU's worth of arithmetic-compressed coefficients.
- */
-
-METHODDEF(boolean)
-encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  jpeg_component_info * compptr;
-  JBLOCKROW block;
-  unsigned char *st;
-  int blkn, ci, tbl, k, ke;
-  int v, v2, m;
-  const int * natural_order;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-
-    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
-
-    tbl = compptr->dc_tbl_no;
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.4: Encode_DC_DIFF */
-    if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) {
-      arith_encode(cinfo, st, 0);
-      entropy->dc_context[ci] = 0;	/* zero diff category */
-    } else {
-      entropy->last_dc_val[ci] = (*block)[0];
-      arith_encode(cinfo, st, 1);
-      /* Figure F.6: Encoding nonzero value v */
-      /* Figure F.7: Encoding the sign of v */
-      if (v > 0) {
-	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
-	st += 2;			/* Table F.4: SP = S0 + 2 */
-	entropy->dc_context[ci] = 4;	/* small positive diff category */
-      } else {
-	v = -v;
-	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
-	st += 3;			/* Table F.4: SN = S0 + 3 */
-	entropy->dc_context[ci] = 8;	/* small negative diff category */
-      }
-      /* Figure F.8: Encoding the magnitude category of v */
-      m = 0;
-      if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
-      }
-      arith_encode(cinfo, st, 0);
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;	/* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] += 8;	/* large diff category */
-      /* Figure F.9: Encoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
-    }
-
-    /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
-
-    if ((ke = cinfo->lim_Se) == 0) continue;
-    tbl = compptr->ac_tbl_no;
-
-    /* Establish EOB (end-of-block) index */
-    do {
-      if ((*block)[natural_order[ke]]) break;
-    } while (--ke);
-
-    /* Figure F.5: Encode_AC_Coefficients */
-    for (k = 0; k < ke;) {
-      st = entropy->ac_stats[tbl] + 3 * k;
-      arith_encode(cinfo, st, 0);	/* EOB decision */
-      while ((v = (*block)[natural_order[++k]]) == 0) {
-	arith_encode(cinfo, st + 1, 0);
-	st += 3;
-      }
-      arith_encode(cinfo, st + 1, 1);
-      /* Figure F.6: Encoding nonzero value v */
-      /* Figure F.7: Encoding the sign of v */
-      if (v > 0) {
-	arith_encode(cinfo, entropy->fixed_bin, 0);
-      } else {
-	v = -v;
-	arith_encode(cinfo, entropy->fixed_bin, 1);
-      }
-      st += 2;
-      /* Figure F.8: Encoding the magnitude category of v */
-      m = 0;
-      if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	if (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st = entropy->ac_stats[tbl] +
-	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	  while (v2 >>= 1) {
-	    arith_encode(cinfo, st, 1);
-	    m <<= 1;
-	    st += 1;
-	  }
-	}
-      }
-      arith_encode(cinfo, st, 0);
-      /* Figure F.9: Encoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
-    }
-    /* Encode EOB decision only if k < cinfo->lim_Se */
-    if (k < cinfo->lim_Se) {
-      st = entropy->ac_stats[tbl] + 3 * k;
-      arith_encode(cinfo, st, 1);
-    }
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Initialize for an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-start_pass (j_compress_ptr cinfo, boolean gather_statistics)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-
-  if (gather_statistics)
-    /* Make sure to avoid that in the master control logic!
-     * We are fully adaptive here and need no extra
-     * statistics gathering pass!
-     */
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-
-  /* We assume jcmaster.c already validated the progressive scan parameters. */
-
-  /* Select execution routines */
-  if (cinfo->progressive_mode) {
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_first;
-      else
-	entropy->pub.encode_mcu = encode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_refine;
-      else
-	entropy->pub.encode_mcu = encode_mcu_AC_refine;
-    }
-  } else
-    entropy->pub.encode_mcu = encode_mcu;
-
-  /* Allocate & initialize requested statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      tbl = compptr->dc_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->dc_stats[tbl] == NULL)
-	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
-      /* Initialize DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      tbl = compptr->ac_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->ac_stats[tbl] == NULL)
-	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
-#ifdef CALCULATE_SPECTRAL_CONDITIONING
-      if (cinfo->progressive_mode)
-	/* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
-	cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4);
-#endif
-    }
-  }
-
-  /* Initialize arithmetic encoding variables */
-  entropy->c = 0;
-  entropy->a = 0x10000L;
-  entropy->sc = 0;
-  entropy->zc = 0;
-  entropy->ct = 11;
-  entropy->buffer = -1;  /* empty */
-
-  /* Initialize restart stuff */
-  entropy->restarts_to_go = cinfo->restart_interval;
-  entropy->next_restart_num = 0;
-}
-
-
-/*
- * Module initialization routine for arithmetic entropy encoding.
- */
-
-GLOBAL(void)
-jinit_arith_encoder (j_compress_ptr cinfo)
-{
-  arith_entropy_ptr entropy;
-  int i;
-
-  entropy = (arith_entropy_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(arith_entropy_encoder));
-  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
-  entropy->pub.start_pass = start_pass;
-  entropy->pub.finish_pass = finish_pass;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    entropy->dc_stats[i] = NULL;
-    entropy->ac_stats[i] = NULL;
-  }
-
-  /* Initialize index for fixed probability estimation */
-  entropy->fixed_bin[0] = 113;
-}
+/*
+ * jcarith.c
+ *
+ * Developed 1997-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains portable arithmetic entropy encoding routines for JPEG
+ * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Expanded entropy encoder object for arithmetic encoding. */
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */
+  INT32 a;               /* A register, normalized size of coding interval */
+  INT32 sc;        /* counter for stacked 0xFF values which might overflow */
+  INT32 zc;          /* counter for pending 0x00 output values which might *
+                          * be discarded at the end ("Pacman" termination) */
+  int ct;  /* bit shift counter, determines when next byte will be written */
+  int buffer;                /* buffer for most recent output byte != 0xFF */
+
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
+
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  int next_restart_num;		/* next restart number to write (0-7) */
+
+  /* Pointers to statistics areas (these workspaces have image lifespan) */
+  unsigned char * dc_stats[NUM_ARITH_TBLS];
+  unsigned char * ac_stats[NUM_ARITH_TBLS];
+
+  /* Statistics bin for coding with fixed probability 0.5 */
+  unsigned char fixed_bin[4];
+} arith_entropy_encoder;
+
+typedef arith_entropy_encoder * arith_entropy_ptr;
+
+/* The following two definitions specify the allocation chunk size
+ * for the statistics area.
+ * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
+ * 49 statistics bins for DC, and 245 statistics bins for AC coding.
+ *
+ * We use a compact representation with 1 byte per statistics bin,
+ * thus the numbers directly represent byte sizes.
+ * This 1 byte per statistics bin contains the meaning of the MPS
+ * (more probable symbol) in the highest bit (mask 0x80), and the
+ * index into the probability estimation state machine table
+ * in the lower bits (mask 0x7F).
+ */
+
+#define DC_STAT_BINS 64
+#define AC_STAT_BINS 256
+
+/* NOTE: Uncomment the following #define if you want to use the
+ * given formula for calculating the AC conditioning parameter Kx
+ * for spectral selection progressive coding in section G.1.3.2
+ * of the spec (Kx = Kmin + SRL (8 + Se - Kmin) 4).
+ * Although the spec and P&M authors claim that this "has proven
+ * to give good results for 8 bit precision samples", I'm not
+ * convinced yet that this is really beneficial.
+ * Early tests gave only very marginal compression enhancements
+ * (a few - around 5 or so - bytes even for very large files),
+ * which would turn out rather negative if we'd suppress the
+ * DAC (Define Arithmetic Conditioning) marker segments for
+ * the default parameters in the future.
+ * Note that currently the marker writing module emits 12-byte
+ * DAC segments for a full-component scan in a color image.
+ * This is not worth worrying about IMHO. However, since the
+ * spec defines the default values to be used if the tables
+ * are omitted (unlike Huffman tables, which are required
+ * anyway), one might optimize this behaviour in the future,
+ * and then it would be disadvantageous to use custom tables if
+ * they don't provide sufficient gain to exceed the DAC size.
+ *
+ * On the other hand, I'd consider it as a reasonable result
+ * that the conditioning has no significant influence on the
+ * compression performance. This means that the basic
+ * statistical model is already rather stable.
+ *
+ * Thus, at the moment, we use the default conditioning values
+ * anyway, and do not use the custom formula.
+ *
+#define CALCULATE_SPECTRAL_CONDITIONING
+ */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
+ * We assume that int right shift is unsigned if INT32 right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	int ishift_temp;
+#define IRIGHT_SHIFT(x,shft)  \
+	((ishift_temp = (x)) < 0 ? \
+	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+	 (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+
+LOCAL(void)
+emit_byte (int val, j_compress_ptr cinfo)
+/* Write next output byte; we do not support suspension in this module. */
+{
+  struct jpeg_destination_mgr * dest = cinfo->dest;
+
+  *dest->next_output_byte++ = (JOCTET) val;
+  if (--dest->free_in_buffer == 0)
+    if (! (*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+}
+
+
+/*
+ * Finish up at the end of an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass (j_compress_ptr cinfo)
+{
+  arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  INT32 temp;
+
+  /* Section D.1.8: Termination of encoding */
+
+  /* Find the e->c in the coding interval with the largest
+   * number of trailing zero bits */
+  if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c)
+    e->c = temp + 0x8000L;
+  else
+    e->c = temp;
+  /* Send remaining bytes to output */
+  e->c <<= e->ct;
+  if (e->c & 0xF8000000L) {
+    /* One final overflow has to be handled */
+    if (e->buffer >= 0) {
+      if (e->zc)
+	do emit_byte(0x00, cinfo);
+	while (--e->zc);
+      emit_byte(e->buffer + 1, cinfo);
+      if (e->buffer + 1 == 0xFF)
+	emit_byte(0x00, cinfo);
+    }
+    e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
+    e->sc = 0;
+  } else {
+    if (e->buffer == 0)
+      ++e->zc;
+    else if (e->buffer >= 0) {
+      if (e->zc)
+	do emit_byte(0x00, cinfo);
+	while (--e->zc);
+      emit_byte(e->buffer, cinfo);
+    }
+    if (e->sc) {
+      if (e->zc)
+	do emit_byte(0x00, cinfo);
+	while (--e->zc);
+      do {
+	emit_byte(0xFF, cinfo);
+	emit_byte(0x00, cinfo);
+      } while (--e->sc);
+    }
+  }
+  /* Output final bytes only if they are not 0x00 */
+  if (e->c & 0x7FFF800L) {
+    if (e->zc)  /* output final pending zero bytes */
+      do emit_byte(0x00, cinfo);
+      while (--e->zc);
+    emit_byte((e->c >> 19) & 0xFF, cinfo);
+    if (((e->c >> 19) & 0xFF) == 0xFF)
+      emit_byte(0x00, cinfo);
+    if (e->c & 0x7F800L) {
+      emit_byte((e->c >> 11) & 0xFF, cinfo);
+      if (((e->c >> 11) & 0xFF) == 0xFF)
+	emit_byte(0x00, cinfo);
+    }
+  }
+}
+
+
+/*
+ * The core arithmetic encoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Parameter 'val' to be encoded may be 0 or 1 (binary decision).
+ *
+ * Note: I've added full "Pacman" termination support to the
+ * byte output routines, which is equivalent to the optional
+ * Discard_final_zeros procedure (Figure D.15) in the spec.
+ * Thus, we always produce the shortest possible output
+ * stream compliant to the spec (no trailing zero bytes,
+ * except for FF stuffing).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+LOCAL(void)
+arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) 
+{
+  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  register unsigned char nl, nm;
+  register INT32 qe, temp;
+  register int sv;
+
+  /* Fetch values from our compact representation of Table D.3(D.2):
+   * Qe values and probability estimation state machine
+   */
+  sv = *st;
+  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
+  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
+
+  /* Encode & estimation procedures per sections D.1.4 & D.1.5 */
+  e->a -= qe;
+  if (val != (sv >> 7)) {
+    /* Encode the less probable symbol */
+    if (e->a >= qe) {
+      /* If the interval size (qe) for the less probable symbol (LPS)
+       * is larger than the interval size for the MPS, then exchange
+       * the two symbols for coding efficiency, otherwise code the LPS
+       * as usual: */
+      e->c += e->a;
+      e->a = qe;
+    }
+    *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
+  } else {
+    /* Encode the more probable symbol */
+    if (e->a >= 0x8000L)
+      return;  /* A >= 0x8000 -> ready, no renormalization required */
+    if (e->a < qe) {
+      /* If the interval size (qe) for the less probable symbol (LPS)
+       * is larger than the interval size for the MPS, then exchange
+       * the two symbols for coding efficiency: */
+      e->c += e->a;
+      e->a = qe;
+    }
+    *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+  }
+
+  /* Renormalization & data output per section D.1.6 */
+  do {
+    e->a <<= 1;
+    e->c <<= 1;
+    if (--e->ct == 0) {
+      /* Another byte is ready for output */
+      temp = e->c >> 19;
+      if (temp > 0xFF) {
+	/* Handle overflow over all stacked 0xFF bytes */
+	if (e->buffer >= 0) {
+	  if (e->zc)
+	    do emit_byte(0x00, cinfo);
+	    while (--e->zc);
+	  emit_byte(e->buffer + 1, cinfo);
+	  if (e->buffer + 1 == 0xFF)
+	    emit_byte(0x00, cinfo);
+	}
+	e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
+	e->sc = 0;
+	/* Note: The 3 spacer bits in the C register guarantee
+	 * that the new buffer byte can't be 0xFF here
+	 * (see page 160 in the P&M JPEG book). */
+	e->buffer = temp & 0xFF;  /* new output byte, might overflow later */
+      } else if (temp == 0xFF) {
+	++e->sc;  /* stack 0xFF byte (which might overflow later) */
+      } else {
+	/* Output all stacked 0xFF bytes, they will not overflow any more */
+	if (e->buffer == 0)
+	  ++e->zc;
+	else if (e->buffer >= 0) {
+	  if (e->zc)
+	    do emit_byte(0x00, cinfo);
+	    while (--e->zc);
+	  emit_byte(e->buffer, cinfo);
+	}
+	if (e->sc) {
+	  if (e->zc)
+	    do emit_byte(0x00, cinfo);
+	    while (--e->zc);
+	  do {
+	    emit_byte(0xFF, cinfo);
+	    emit_byte(0x00, cinfo);
+	  } while (--e->sc);
+	}
+	e->buffer = temp & 0xFF;  /* new output byte (can still overflow) */
+      }
+      e->c &= 0x7FFFFL;
+      e->ct += 8;
+    }
+  } while (e->a < 0x8000L);
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(void)
+emit_restart (j_compress_ptr cinfo, int restart_num)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci;
+  jpeg_component_info * compptr;
+
+  finish_pass(cinfo);
+
+  emit_byte(0xFF, cinfo);
+  emit_byte(JPEG_RST0 + restart_num, cinfo);
+
+  /* Re-initialize statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
+      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      /* Reset DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->Se) {
+      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+    }
+  }
+
+  /* Reset arithmetic encoding variables */
+  entropy->c = 0;
+  entropy->a = 0x10000L;
+  entropy->sc = 0;
+  entropy->zc = 0;
+  entropy->ct = 11;
+  entropy->buffer = -1;  /* empty */
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl;
+  int v, v2, m;
+  ISHIFT_TEMPS
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    m = IRIGHT_SHIFT((int) ((*block)[0]), cinfo->Al);
+
+    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.4: Encode_DC_DIFF */
+    if ((v = m - entropy->last_dc_val[ci]) == 0) {
+      arith_encode(cinfo, st, 0);
+      entropy->dc_context[ci] = 0;	/* zero diff category */
+    } else {
+      entropy->last_dc_val[ci] = m;
+      arith_encode(cinfo, st, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
+	st += 2;			/* Table F.4: SP = S0 + 2 */
+	entropy->dc_context[ci] = 4;	/* small positive diff category */
+      } else {
+	v = -v;
+	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
+	st += 3;			/* Table F.4: SN = S0 + 3 */
+	entropy->dc_context[ci] = 8;	/* small negative diff category */
+      }
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+	arith_encode(cinfo, st, 1);
+	m = 1;
+	v2 = v;
+	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+	while (v2 >>= 1) {
+	  arith_encode(cinfo, st, 1);
+	  m <<= 1;
+	  st += 1;
+	}
+      }
+      arith_encode(cinfo, st, 0);
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;	/* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] += 8;	/* large diff category */
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, k, ke;
+  int v, v2, m;
+  const int * natural_order;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  natural_order = cinfo->natural_order;
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
+
+  /* Establish EOB (end-of-block) index */
+  ke = cinfo->Se;
+  do {
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if ((v = (*block)[natural_order[ke]]) >= 0) {
+      if (v >>= cinfo->Al) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Al) break;
+    }
+  } while (--ke);
+
+  /* Figure F.5: Encode_AC_Coefficients */
+  for (k = cinfo->Ss - 1; k < ke;) {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    arith_encode(cinfo, st, 0);		/* EOB decision */
+    for (;;) {
+      if ((v = (*block)[natural_order[++k]]) >= 0) {
+	if (v >>= cinfo->Al) {
+	  arith_encode(cinfo, st + 1, 1);
+	  arith_encode(cinfo, entropy->fixed_bin, 0);
+	  break;
+	}
+      } else {
+	v = -v;
+	if (v >>= cinfo->Al) {
+	  arith_encode(cinfo, st + 1, 1);
+	  arith_encode(cinfo, entropy->fixed_bin, 1);
+	  break;
+	}
+      }
+      arith_encode(cinfo, st + 1, 0);
+      st += 3;
+    }
+    st += 2;
+    /* Figure F.8: Encoding the magnitude category of v */
+    m = 0;
+    if (v -= 1) {
+      arith_encode(cinfo, st, 1);
+      m = 1;
+      v2 = v;
+      if (v2 >>= 1) {
+	arith_encode(cinfo, st, 1);
+	m <<= 1;
+	st = entropy->ac_stats[tbl] +
+	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	while (v2 >>= 1) {
+	  arith_encode(cinfo, st, 1);
+	  m <<= 1;
+	  st += 1;
+	}
+      }
+    }
+    arith_encode(cinfo, st, 0);
+    /* Figure F.9: Encoding the magnitude bit pattern of v */
+    st += 14;
+    while (m >>= 1)
+      arith_encode(cinfo, st, (m & v) ? 1 : 0);
+  }
+  /* Encode EOB decision only if k < cinfo->Se */
+  if (k < cinfo->Se) {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    arith_encode(cinfo, st, 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  unsigned char *st;
+  int Al, blkn;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  st = entropy->fixed_bin;	/* use fixed probability estimation */
+  Al = cinfo->Al;
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    arith_encode(cinfo, st, (MCU_data[blkn][0][0] >> Al) & 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, k, ke, kex;
+  int v;
+  const int * natural_order;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  natural_order = cinfo->natural_order;
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Section G.1.3.3: Encoding of AC coefficients */
+
+  /* Establish EOB (end-of-block) index */
+  ke = cinfo->Se;
+  do {
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if ((v = (*block)[natural_order[ke]]) >= 0) {
+      if (v >>= cinfo->Al) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Al) break;
+    }
+  } while (--ke);
+
+  /* Establish EOBx (previous stage end-of-block) index */
+  for (kex = ke; kex > 0; kex--)
+    if ((v = (*block)[natural_order[kex]]) >= 0) {
+      if (v >>= cinfo->Ah) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Ah) break;
+    }
+
+  /* Figure G.10: Encode_AC_Coefficients_SA */
+  for (k = cinfo->Ss - 1; k < ke;) {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    if (k >= kex)
+      arith_encode(cinfo, st, 0);	/* EOB decision */
+    for (;;) {
+      if ((v = (*block)[natural_order[++k]]) >= 0) {
+	if (v >>= cinfo->Al) {
+	  if (v >> 1)			/* previously nonzero coef */
+	    arith_encode(cinfo, st + 2, (v & 1));
+	  else {			/* newly nonzero coef */
+	    arith_encode(cinfo, st + 1, 1);
+	    arith_encode(cinfo, entropy->fixed_bin, 0);
+	  }
+	  break;
+	}
+      } else {
+	v = -v;
+	if (v >>= cinfo->Al) {
+	  if (v >> 1)			/* previously nonzero coef */
+	    arith_encode(cinfo, st + 2, (v & 1));
+	  else {			/* newly nonzero coef */
+	    arith_encode(cinfo, st + 1, 1);
+	    arith_encode(cinfo, entropy->fixed_bin, 1);
+	  }
+	  break;
+	}
+      }
+      arith_encode(cinfo, st + 1, 0);
+      st += 3;
+    }
+  }
+  /* Encode EOB decision only if k < cinfo->Se */
+  if (k < cinfo->Se) {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    arith_encode(cinfo, st, 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output one MCU's worth of arithmetic-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  jpeg_component_info * compptr;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, k, ke;
+  int v, v2, m;
+  const int * natural_order;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  natural_order = cinfo->natural_order;
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
+
+    tbl = compptr->dc_tbl_no;
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.4: Encode_DC_DIFF */
+    if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) {
+      arith_encode(cinfo, st, 0);
+      entropy->dc_context[ci] = 0;	/* zero diff category */
+    } else {
+      entropy->last_dc_val[ci] = (*block)[0];
+      arith_encode(cinfo, st, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
+	st += 2;			/* Table F.4: SP = S0 + 2 */
+	entropy->dc_context[ci] = 4;	/* small positive diff category */
+      } else {
+	v = -v;
+	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
+	st += 3;			/* Table F.4: SN = S0 + 3 */
+	entropy->dc_context[ci] = 8;	/* small negative diff category */
+      }
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+	arith_encode(cinfo, st, 1);
+	m = 1;
+	v2 = v;
+	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+	while (v2 >>= 1) {
+	  arith_encode(cinfo, st, 1);
+	  m <<= 1;
+	  st += 1;
+	}
+      }
+      arith_encode(cinfo, st, 0);
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;	/* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] += 8;	/* large diff category */
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+
+    /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
+
+    if ((ke = cinfo->lim_Se) == 0) continue;
+    tbl = compptr->ac_tbl_no;
+
+    /* Establish EOB (end-of-block) index */
+    do {
+      if ((*block)[natural_order[ke]]) break;
+    } while (--ke);
+
+    /* Figure F.5: Encode_AC_Coefficients */
+    for (k = 0; k < ke;) {
+      st = entropy->ac_stats[tbl] + 3 * k;
+      arith_encode(cinfo, st, 0);	/* EOB decision */
+      while ((v = (*block)[natural_order[++k]]) == 0) {
+	arith_encode(cinfo, st + 1, 0);
+	st += 3;
+      }
+      arith_encode(cinfo, st + 1, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+	arith_encode(cinfo, entropy->fixed_bin, 0);
+      } else {
+	v = -v;
+	arith_encode(cinfo, entropy->fixed_bin, 1);
+      }
+      st += 2;
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+	arith_encode(cinfo, st, 1);
+	m = 1;
+	v2 = v;
+	if (v2 >>= 1) {
+	  arith_encode(cinfo, st, 1);
+	  m <<= 1;
+	  st = entropy->ac_stats[tbl] +
+	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	  while (v2 >>= 1) {
+	    arith_encode(cinfo, st, 1);
+	    m <<= 1;
+	    st += 1;
+	  }
+	}
+      }
+      arith_encode(cinfo, st, 0);
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+    /* Encode EOB decision only if k < cinfo->lim_Se */
+    if (k < cinfo->lim_Se) {
+      st = entropy->ac_stats[tbl] + 3 * k;
+      arith_encode(cinfo, st, 1);
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass (j_compress_ptr cinfo, boolean gather_statistics)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  if (gather_statistics)
+    /* Make sure to avoid that in the master control logic!
+     * We are fully adaptive here and need no extra
+     * statistics gathering pass!
+     */
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+
+  /* We assume jcmaster.c already validated the progressive scan parameters. */
+
+  /* Select execution routines */
+  if (cinfo->progressive_mode) {
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.encode_mcu = encode_mcu_DC_first;
+      else
+	entropy->pub.encode_mcu = encode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.encode_mcu = encode_mcu_DC_refine;
+      else
+	entropy->pub.encode_mcu = encode_mcu_AC_refine;
+    }
+  } else
+    entropy->pub.encode_mcu = encode_mcu;
+
+  /* Allocate & initialize requested statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
+      tbl = compptr->dc_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->dc_stats[tbl] == NULL)
+	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      /* Initialize DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->Se) {
+      tbl = compptr->ac_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->ac_stats[tbl] == NULL)
+	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+#ifdef CALCULATE_SPECTRAL_CONDITIONING
+      if (cinfo->progressive_mode)
+	/* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
+	cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4);
+#endif
+    }
+  }
+
+  /* Initialize arithmetic encoding variables */
+  entropy->c = 0;
+  entropy->a = 0x10000L;
+  entropy->sc = 0;
+  entropy->zc = 0;
+  entropy->ct = 11;
+  entropy->buffer = -1;  /* empty */
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/*
+ * Module initialization routine for arithmetic entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_arith_encoder (j_compress_ptr cinfo)
+{
+  arith_entropy_ptr entropy;
+  int i;
+
+  entropy = (arith_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(arith_entropy_encoder));
+  cinfo->entropy = &entropy->pub;
+  entropy->pub.start_pass = start_pass;
+  entropy->pub.finish_pass = finish_pass;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    entropy->dc_stats[i] = NULL;
+    entropy->ac_stats[i] = NULL;
+  }
+
+  /* Initialize index for fixed probability estimation */
+  entropy->fixed_bin[0] = 113;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jccoefct.c b/plugins/AdvaImg/src/LibJPEG/jccoefct.c
index 924a703dda..b64b46e75a 100644
--- a/plugins/AdvaImg/src/LibJPEG/jccoefct.c
+++ b/plugins/AdvaImg/src/LibJPEG/jccoefct.c
@@ -1,454 +1,454 @@
-/*
- * jccoefct.c
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2003-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the coefficient buffer controller for compression.
- * This controller is the top level of the JPEG compressor proper.
- * The coefficient buffer lies between forward-DCT and entropy encoding steps.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* We use a full-image coefficient buffer when doing Huffman optimization,
- * and also for writing multiple-scan JPEG files.  In all cases, the DCT
- * step is run during the first pass, and subsequent passes need only read
- * the buffered coefficients.
- */
-#ifdef ENTROPY_OPT_SUPPORTED
-#define FULL_COEF_BUFFER_SUPPORTED
-#else
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-#define FULL_COEF_BUFFER_SUPPORTED
-#endif
-#endif
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_coef_controller pub; /* public fields */
-
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
-
-  /* For single-pass compression, it's sufficient to buffer just one MCU
-   * (although this may prove a bit slow in practice).  We allocate a
-   * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each
-   * MCU constructed and sent.  (On 80x86, the workspace is FAR even though
-   * it's not really very big; this is to keep the module interfaces unchanged
-   * when a large coefficient buffer is necessary.)
-   * In multi-pass modes, this array points to the current MCU's blocks
-   * within the virtual arrays.
-   */
-  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
-
-  /* In multi-pass modes, we need a virtual block array for each component. */
-  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
-} my_coef_controller;
-
-typedef my_coef_controller * my_coef_ptr;
-
-
-/* Forward declarations */
-METHODDEF(boolean) compress_data
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-METHODDEF(boolean) compress_first_pass
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
-METHODDEF(boolean) compress_output
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
-#endif
-
-
-LOCAL(void)
-start_iMCU_row (j_compress_ptr cinfo)
-/* Reset within-iMCU-row counters for a new row */
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* In an interleaved scan, an MCU row is the same as an iMCU row.
-   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
-   * But at the bottom of the image, process only what's left.
-   */
-  if (cinfo->comps_in_scan > 1) {
-    coef->MCU_rows_per_iMCU_row = 1;
-  } else {
-    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
-    else
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
-  }
-
-  coef->mcu_ctr = 0;
-  coef->MCU_vert_offset = 0;
-}
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  coef->iMCU_row_num = 0;
-  start_iMCU_row(cinfo);
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-    if (coef->whole_image[0] != NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_data;
-    break;
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-  case JBUF_SAVE_AND_PASS:
-    if (coef->whole_image[0] == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_first_pass;
-    break;
-  case JBUF_CRANK_DEST:
-    if (coef->whole_image[0] == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_output;
-    break;
-#endif
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    break;
-  }
-}
-
-
-/*
- * Process some data in the single-pass case.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the image.
- * Returns TRUE if the iMCU row is completed, FALSE if suspended.
- *
- * NB: input_buf contains a plane for each component in image,
- * which we index according to the component's SOF position.
- */
-
-METHODDEF(boolean)
-compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int blkn, bi, ci, yindex, yoffset, blockcnt;
-  JDIMENSION ypos, xpos;
-  jpeg_component_info *compptr;
-  forward_DCT_ptr forward_DCT;
-
-  /* Loop to write as much as one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
-      /* Determine where data comes from in input_buf and do the DCT thing.
-       * Each call on forward_DCT processes a horizontal row of DCT blocks
-       * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks
-       * sequentially.  Dummy blocks at the right or bottom edge are filled in
-       * specially.  The data in them does not matter for image reconstruction,
-       * so we fill them with values that will encode to the smallest amount of
-       * data, viz: all zeroes in the AC entries, DC entries equal to previous
-       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
-       */
-      blkn = 0;
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	forward_DCT = cinfo->fdct->forward_DCT[compptr->component_index];
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	xpos = MCU_col_num * compptr->MCU_sample_width;
-	ypos = yoffset * compptr->DCT_v_scaled_size;
-	/* ypos == (yoffset+yindex) * DCTSIZE */
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yoffset+yindex < compptr->last_row_height) {
-	    (*forward_DCT) (cinfo, compptr,
-			    input_buf[compptr->component_index],
-			    coef->MCU_buffer[blkn],
-			    ypos, xpos, (JDIMENSION) blockcnt);
-	    if (blockcnt < compptr->MCU_width) {
-	      /* Create some dummy blocks at the right edge of the image. */
-	      FMEMZERO((void FAR *) coef->MCU_buffer[blkn + blockcnt],
-		       (compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK));
-	      for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
-		coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
-	      }
-	    }
-	  } else {
-	    /* Create a row of dummy blocks at the bottom of the image. */
-	    FMEMZERO((void FAR *) coef->MCU_buffer[blkn],
-		     compptr->MCU_width * SIZEOF(JBLOCK));
-	    for (bi = 0; bi < compptr->MCU_width; bi++) {
-	      coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
-	    }
-	  }
-	  blkn += compptr->MCU_width;
-	  ypos += compptr->DCT_v_scaled_size;
-	}
-      }
-      /* Try to write the MCU.  In event of a suspension failure, we will
-       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
-       */
-      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->mcu_ctr = MCU_col_num;
-	return FALSE;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->mcu_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  coef->iMCU_row_num++;
-  start_iMCU_row(cinfo);
-  return TRUE;
-}
-
-
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-
-/*
- * Process some data in the first pass of a multi-pass case.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the image.
- * This amount of data is read from the source buffer, DCT'd and quantized,
- * and saved into the virtual arrays.  We also generate suitable dummy blocks
- * as needed at the right and lower edges.  (The dummy blocks are constructed
- * in the virtual arrays, which have been padded appropriately.)  This makes
- * it possible for subsequent passes not to worry about real vs. dummy blocks.
- *
- * We must also emit the data to the entropy encoder.  This is conveniently
- * done by calling compress_output() after we've loaded the current strip
- * of the virtual arrays.
- *
- * NB: input_buf contains a plane for each component in image.  All
- * components are DCT'd and loaded into the virtual arrays in this pass.
- * However, it may be that only a subset of the components are emitted to
- * the entropy encoder during this first pass; be careful about looking
- * at the scan-dependent variables (MCU dimensions, etc).
- */
-
-METHODDEF(boolean)
-compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  JDIMENSION blocks_across, MCUs_across, MCUindex;
-  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
-  JCOEF lastDC;
-  jpeg_component_info *compptr;
-  JBLOCKARRAY buffer;
-  JBLOCKROW thisblockrow, lastblockrow;
-  forward_DCT_ptr forward_DCT;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Align the virtual buffer for this component. */
-    buffer = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[ci],
-       coef->iMCU_row_num * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, TRUE);
-    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (coef->iMCU_row_num < last_iMCU_row)
-      block_rows = compptr->v_samp_factor;
-    else {
-      /* NB: can't use last_row_height here, since may not be set! */
-      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-      if (block_rows == 0) block_rows = compptr->v_samp_factor;
-    }
-    blocks_across = compptr->width_in_blocks;
-    h_samp_factor = compptr->h_samp_factor;
-    /* Count number of dummy blocks to be added at the right margin. */
-    ndummy = (int) (blocks_across % h_samp_factor);
-    if (ndummy > 0)
-      ndummy = h_samp_factor - ndummy;
-    forward_DCT = cinfo->fdct->forward_DCT[ci];
-    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
-     * on forward_DCT processes a complete horizontal row of DCT blocks.
-     */
-    for (block_row = 0; block_row < block_rows; block_row++) {
-      thisblockrow = buffer[block_row];
-      (*forward_DCT) (cinfo, compptr, input_buf[ci], thisblockrow,
-		      (JDIMENSION) (block_row * compptr->DCT_v_scaled_size),
-		      (JDIMENSION) 0, blocks_across);
-      if (ndummy > 0) {
-	/* Create dummy blocks at the right edge of the image. */
-	thisblockrow += blocks_across; /* => first dummy block */
-	FMEMZERO((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
-	lastDC = thisblockrow[-1][0];
-	for (bi = 0; bi < ndummy; bi++) {
-	  thisblockrow[bi][0] = lastDC;
-	}
-      }
-    }
-    /* If at end of image, create dummy block rows as needed.
-     * The tricky part here is that within each MCU, we want the DC values
-     * of the dummy blocks to match the last real block's DC value.
-     * This squeezes a few more bytes out of the resulting file...
-     */
-    if (coef->iMCU_row_num == last_iMCU_row) {
-      blocks_across += ndummy;	/* include lower right corner */
-      MCUs_across = blocks_across / h_samp_factor;
-      for (block_row = block_rows; block_row < compptr->v_samp_factor;
-	   block_row++) {
-	thisblockrow = buffer[block_row];
-	lastblockrow = buffer[block_row-1];
-	FMEMZERO((void FAR *) thisblockrow,
-		 (size_t) (blocks_across * SIZEOF(JBLOCK)));
-	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
-	  lastDC = lastblockrow[h_samp_factor-1][0];
-	  for (bi = 0; bi < h_samp_factor; bi++) {
-	    thisblockrow[bi][0] = lastDC;
-	  }
-	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
-	  lastblockrow += h_samp_factor;
-	}
-      }
-    }
-  }
-  /* NB: compress_output will increment iMCU_row_num if successful.
-   * A suspension return will result in redoing all the work above next time.
-   */
-
-  /* Emit data to the entropy encoder, sharing code with subsequent passes */
-  return compress_output(cinfo, input_buf);
-}
-
-
-/*
- * Process some data in subsequent passes of a multi-pass case.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the scan.
- * The data is obtained from the virtual arrays and fed to the entropy coder.
- * Returns TRUE if the iMCU row is completed, FALSE if suspended.
- *
- * NB: input_buf is ignored; it is likely to be a NULL pointer.
- */
-
-METHODDEF(boolean)
-compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  int blkn, ci, xindex, yindex, yoffset;
-  JDIMENSION start_col;
-  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
-  JBLOCKROW buffer_ptr;
-  jpeg_component_info *compptr;
-
-  /* Align the virtual buffers for the components used in this scan.
-   * NB: during first pass, this is safe only because the buffers will
-   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
-   */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    buffer[ci] = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       coef->iMCU_row_num * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, FALSE);
-  }
-
-  /* Loop to process one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
-	    coef->MCU_buffer[blkn++] = buffer_ptr++;
-	  }
-	}
-      }
-      /* Try to write the MCU. */
-      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->mcu_ctr = MCU_col_num;
-	return FALSE;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->mcu_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  coef->iMCU_row_num++;
-  start_iMCU_row(cinfo);
-  return TRUE;
-}
-
-#endif /* FULL_COEF_BUFFER_SUPPORTED */
-
-
-/*
- * Initialize coefficient buffer controller.
- */
-
-GLOBAL(void)
-jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer)
-{
-  my_coef_ptr coef;
-
-  coef = (my_coef_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_coef_controller));
-  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
-  coef->pub.start_pass = start_pass_coef;
-
-  /* Create the coefficient buffer. */
-  if (need_full_buffer) {
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-    /* Allocate a full-image virtual array for each component, */
-    /* padded to a multiple of samp_factor DCT blocks in each direction. */
-    int ci;
-    jpeg_component_info *compptr;
-
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) compptr->v_samp_factor);
-    }
-#else
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif
-  } else {
-    /* We only need a single-MCU buffer. */
-    JBLOCKROW buffer;
-    int i;
-
-    buffer = (JBLOCKROW)
-      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
-    for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
-      coef->MCU_buffer[i] = buffer + i;
-    }
-    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
-  }
-}
+/*
+ * jccoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2003-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for compression.
+ * This controller is the top level of the JPEG compressor proper.
+ * The coefficient buffer lies between forward-DCT and entropy encoding steps.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* We use a full-image coefficient buffer when doing Huffman optimization,
+ * and also for writing multiple-scan JPEG files.  In all cases, the DCT
+ * step is run during the first pass, and subsequent passes need only read
+ * the buffered coefficients.
+ */
+#ifdef ENTROPY_OPT_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#else
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#endif
+#endif
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* For single-pass compression, it's sufficient to buffer just one MCU
+   * (although this may prove a bit slow in practice).  We allocate a
+   * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each
+   * MCU constructed and sent.  (On 80x86, the workspace is FAR even though
+   * it's not really very big; this is to keep the module interfaces unchanged
+   * when a large coefficient buffer is necessary.)
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays.
+   */
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+/* Forward declarations */
+METHODDEF(boolean) compress_data
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+METHODDEF(boolean) compress_first_pass
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+METHODDEF(boolean) compress_output
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (coef->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_data;
+    break;
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_first_pass;
+    break;
+  case JBUF_CRANK_DEST:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_output;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data in the single-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(boolean)
+compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, bi, ci, yindex, yoffset, blockcnt;
+  JDIMENSION ypos, xpos;
+  jpeg_component_info *compptr;
+  forward_DCT_ptr forward_DCT;
+
+  /* Loop to write as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Determine where data comes from in input_buf and do the DCT thing.
+       * Each call on forward_DCT processes a horizontal row of DCT blocks
+       * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks
+       * sequentially.  Dummy blocks at the right or bottom edge are filled in
+       * specially.  The data in them does not matter for image reconstruction,
+       * so we fill them with values that will encode to the smallest amount of
+       * data, viz: all zeroes in the AC entries, DC entries equal to previous
+       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
+       */
+      blkn = 0;
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	forward_DCT = cinfo->fdct->forward_DCT[compptr->component_index];
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	xpos = MCU_col_num * compptr->MCU_sample_width;
+	ypos = yoffset * compptr->DCT_v_scaled_size;
+	/* ypos == (yoffset+yindex) * DCTSIZE */
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yoffset+yindex < compptr->last_row_height) {
+	    (*forward_DCT) (cinfo, compptr,
+			    input_buf[compptr->component_index],
+			    coef->MCU_buffer[blkn],
+			    ypos, xpos, (JDIMENSION) blockcnt);
+	    if (blockcnt < compptr->MCU_width) {
+	      /* Create some dummy blocks at the right edge of the image. */
+	      FMEMZERO((void FAR *) coef->MCU_buffer[blkn + blockcnt],
+		       (compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK));
+	      for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
+		coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
+	      }
+	    }
+	  } else {
+	    /* Create a row of dummy blocks at the bottom of the image. */
+	    FMEMZERO((void FAR *) coef->MCU_buffer[blkn],
+		     compptr->MCU_width * SIZEOF(JBLOCK));
+	    for (bi = 0; bi < compptr->MCU_width; bi++) {
+	      coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
+	    }
+	  }
+	  blkn += compptr->MCU_width;
+	  ypos += compptr->DCT_v_scaled_size;
+	}
+      }
+      /* Try to write the MCU.  In event of a suspension failure, we will
+       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
+       */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+
+/*
+ * Process some data in the first pass of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * This amount of data is read from the source buffer, DCT'd and quantized,
+ * and saved into the virtual arrays.  We also generate suitable dummy blocks
+ * as needed at the right and lower edges.  (The dummy blocks are constructed
+ * in the virtual arrays, which have been padded appropriately.)  This makes
+ * it possible for subsequent passes not to worry about real vs. dummy blocks.
+ *
+ * We must also emit the data to the entropy encoder.  This is conveniently
+ * done by calling compress_output() after we've loaded the current strip
+ * of the virtual arrays.
+ *
+ * NB: input_buf contains a plane for each component in image.  All
+ * components are DCT'd and loaded into the virtual arrays in this pass.
+ * However, it may be that only a subset of the components are emitted to
+ * the entropy encoder during this first pass; be careful about looking
+ * at the scan-dependent variables (MCU dimensions, etc).
+ */
+
+METHODDEF(boolean)
+compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION blocks_across, MCUs_across, MCUindex;
+  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
+  JCOEF lastDC;
+  jpeg_component_info *compptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW thisblockrow, lastblockrow;
+  forward_DCT_ptr forward_DCT;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (coef->iMCU_row_num < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here, since may not be set! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    blocks_across = compptr->width_in_blocks;
+    h_samp_factor = compptr->h_samp_factor;
+    /* Count number of dummy blocks to be added at the right margin. */
+    ndummy = (int) (blocks_across % h_samp_factor);
+    if (ndummy > 0)
+      ndummy = h_samp_factor - ndummy;
+    forward_DCT = cinfo->fdct->forward_DCT[ci];
+    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
+     * on forward_DCT processes a complete horizontal row of DCT blocks.
+     */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      thisblockrow = buffer[block_row];
+      (*forward_DCT) (cinfo, compptr, input_buf[ci], thisblockrow,
+		      (JDIMENSION) (block_row * compptr->DCT_v_scaled_size),
+		      (JDIMENSION) 0, blocks_across);
+      if (ndummy > 0) {
+	/* Create dummy blocks at the right edge of the image. */
+	thisblockrow += blocks_across; /* => first dummy block */
+	FMEMZERO((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
+	lastDC = thisblockrow[-1][0];
+	for (bi = 0; bi < ndummy; bi++) {
+	  thisblockrow[bi][0] = lastDC;
+	}
+      }
+    }
+    /* If at end of image, create dummy block rows as needed.
+     * The tricky part here is that within each MCU, we want the DC values
+     * of the dummy blocks to match the last real block's DC value.
+     * This squeezes a few more bytes out of the resulting file...
+     */
+    if (coef->iMCU_row_num == last_iMCU_row) {
+      blocks_across += ndummy;	/* include lower right corner */
+      MCUs_across = blocks_across / h_samp_factor;
+      for (block_row = block_rows; block_row < compptr->v_samp_factor;
+	   block_row++) {
+	thisblockrow = buffer[block_row];
+	lastblockrow = buffer[block_row-1];
+	FMEMZERO((void FAR *) thisblockrow,
+		 (size_t) (blocks_across * SIZEOF(JBLOCK)));
+	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
+	  lastDC = lastblockrow[h_samp_factor-1][0];
+	  for (bi = 0; bi < h_samp_factor; bi++) {
+	    thisblockrow[bi][0] = lastDC;
+	  }
+	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
+	  lastblockrow += h_samp_factor;
+	}
+      }
+    }
+  }
+  /* NB: compress_output will increment iMCU_row_num if successful.
+   * A suspension return will result in redoing all the work above next time.
+   */
+
+  /* Emit data to the entropy encoder, sharing code with subsequent passes */
+  return compress_output(cinfo, input_buf);
+}
+
+
+/*
+ * Process some data in subsequent passes of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan.
+   * NB: during first pass, this is safe only because the buffers will
+   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+	    coef->MCU_buffer[blkn++] = buffer_ptr++;
+	  }
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+#endif /* FULL_COEF_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
+  coef->pub.start_pass = start_pass_coef;
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    int ci;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) compptr->v_samp_factor);
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+    for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jccolor.c b/plugins/AdvaImg/src/LibJPEG/jccolor.c
index 3e2d0e9277..daf66010df 100644
--- a/plugins/AdvaImg/src/LibJPEG/jccolor.c
+++ b/plugins/AdvaImg/src/LibJPEG/jccolor.c
@@ -1,490 +1,541 @@
-/*
- * jccolor.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains input colorspace conversion routines.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_color_converter pub; /* public fields */
-
-  /* Private state for RGB->YCC conversion */
-  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
-} my_color_converter;
-
-typedef my_color_converter * my_cconvert_ptr;
-
-
-/**************** RGB -> YCbCr conversion: most common case **************/
-
-/*
- * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * The conversion equations to be implemented are therefore
- *	Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
- *	Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
- *	Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
- * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
- * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
- * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
- * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
- * were not represented exactly.  Now we sacrifice exact representation of
- * maximum red and maximum blue in order to get exact grayscales.
- *
- * To avoid floating-point arithmetic, we represent the fractional constants
- * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
- * the products by 2^16, with appropriate rounding, to get the correct answer.
- *
- * For even more speed, we avoid doing any multiplications in the inner loop
- * by precalculating the constants times R,G,B for all possible values.
- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 12-bit samples it is still acceptable.  It's not very reasonable for
- * 16-bit samples, but if you want lossless storage you shouldn't be changing
- * colorspace anyway.
- * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
- * in the tables to save adding them separately in the inner loop.
- */
-
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
-
-/* We allocate one big table and divide it up into eight parts, instead of
- * doing eight alloc_small requests.  This lets us use a single table base
- * address, which can be held in a register in the inner loops on many
- * machines (more than can hold all eight addresses, anyway).
- */
-
-#define R_Y_OFF		0			/* offset to R => Y section */
-#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
-#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
-#define R_CB_OFF	(3*(MAXJSAMPLE+1))
-#define G_CB_OFF	(4*(MAXJSAMPLE+1))
-#define B_CB_OFF	(5*(MAXJSAMPLE+1))
-#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
-#define G_CR_OFF	(6*(MAXJSAMPLE+1))
-#define B_CR_OFF	(7*(MAXJSAMPLE+1))
-#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
-
-
-/*
- * Initialize for RGB->YCC colorspace conversion.
- */
-
-METHODDEF(void)
-rgb_ycc_start (j_compress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  INT32 * rgb_ycc_tab;
-  INT32 i;
-
-  /* Allocate and fill in the conversion tables. */
-  cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(TABLE_SIZE * SIZEOF(INT32)));
-
-  for (i = 0; i <= MAXJSAMPLE; i++) {
-    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
-    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i;
-    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i     + ONE_HALF;
-    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i;
-    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i;
-    /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
-     * This ensures that the maximum output will round to MAXJSAMPLE
-     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
-     */
-    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
-/*  B=>Cb and R=>Cr tables are the same
-    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
-*/
-    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i;
-    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i;
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- *
- * Note that we change from the application's interleaved-pixel format
- * to our internal noninterleaved, one-plane-per-component format.
- * The input buffer is therefore three times as wide as the output buffer.
- *
- * A starting row offset is provided only for the output buffer.  The caller
- * can easily adjust the passed input_buf value to accommodate any row
- * offset required on that side.
- */
-
-METHODDEF(void)
-rgb_ycc_convert (j_compress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		 JDIMENSION output_row, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
-      inptr += RGB_PIXELSIZE;
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
-       * must be too; we do not need an explicit range-limiting operation.
-       * Hence the value being shifted is never negative, and we don't
-       * need the general RIGHT_SHIFT macro.
-       */
-      /* Y */
-      outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-      /* Cb */
-      outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
-      /* Cr */
-      outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
-    }
-  }
-}
-
-
-/**************** Cases other than RGB -> YCbCr **************/
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles RGB->grayscale conversion, which is the same
- * as the RGB->Y portion of RGB->YCbCr.
- * We assume rgb_ycc_start has been called (we only use the Y tables).
- */
-
-METHODDEF(void)
-rgb_gray_convert (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		  JDIMENSION output_row, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr = output_buf[0][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
-      inptr += RGB_PIXELSIZE;
-      /* Y */
-      outptr[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles Adobe-style CMYK->YCCK conversion,
- * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same
- * conversion as above, while passing K (black) unchanged.
- * We assume rgb_ycc_start has been called.
- */
-
-METHODDEF(void)
-cmyk_ycck_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2, outptr3;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    outptr3 = output_buf[3][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
-      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
-      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
-      /* K passes through as-is */
-      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
-      inptr += 4;
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
-       * must be too; we do not need an explicit range-limiting operation.
-       * Hence the value being shifted is never negative, and we don't
-       * need the general RIGHT_SHIFT macro.
-       */
-      /* Y */
-      outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-      /* Cb */
-      outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
-      /* Cr */
-      outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles grayscale output with no conversion.
- * The source can be either plain grayscale or YCbCr (since Y == gray).
- */
-
-METHODDEF(void)
-grayscale_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
-{
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-  int instride = cinfo->input_components;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr = output_buf[0][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      outptr[col] = inptr[0];	/* don't need GETJSAMPLE() here */
-      inptr += instride;
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * No colorspace conversion, but change from interleaved
- * to separate-planes representation.
- */
-
-METHODDEF(void)
-rgb_convert (j_compress_ptr cinfo,
-	     JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-	     JDIMENSION output_row, int num_rows)
-{
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr0[col] = inptr[RGB_RED];
-      outptr1[col] = inptr[RGB_GREEN];
-      outptr2[col] = inptr[RGB_BLUE];
-      inptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles multi-component colorspaces without conversion.
- * We assume input_components == num_components.
- */
-
-METHODDEF(void)
-null_convert (j_compress_ptr cinfo,
-	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-	      JDIMENSION output_row, int num_rows)
-{
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION col;
-  register int ci;
-  int nc = cinfo->num_components;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    /* It seems fastest to make a separate pass for each component. */
-    for (ci = 0; ci < nc; ci++) {
-      inptr = *input_buf;
-      outptr = output_buf[ci][output_row];
-      for (col = 0; col < num_cols; col++) {
-	outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
-	inptr += nc;
-      }
-    }
-    input_buf++;
-    output_row++;
-  }
-}
-
-
-/*
- * Empty method for start_pass.
- */
-
-METHODDEF(void)
-null_method (j_compress_ptr cinfo)
-{
-  /* no work needed */
-}
-
-
-/*
- * Module initialization routine for input colorspace conversion.
- */
-
-GLOBAL(void)
-jinit_color_converter (j_compress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert;
-
-  cconvert = (my_cconvert_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_color_converter));
-  cinfo->cconvert = (struct jpeg_color_converter *) cconvert;
-  /* set start_pass to null method until we find out differently */
-  cconvert->pub.start_pass = null_method;
-
-  /* Make sure input_components agrees with in_color_space */
-  switch (cinfo->in_color_space) {
-  case JCS_GRAYSCALE:
-    if (cinfo->input_components != 1)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  case JCS_RGB:
-    if (cinfo->input_components != RGB_PIXELSIZE)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  case JCS_YCbCr:
-    if (cinfo->input_components != 3)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  case JCS_CMYK:
-  case JCS_YCCK:
-    if (cinfo->input_components != 4)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  default:			/* JCS_UNKNOWN can be anything */
-    if (cinfo->input_components < 1)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-  }
-
-  /* Check num_components, set conversion method based on requested space */
-  switch (cinfo->jpeg_color_space) {
-  case JCS_GRAYSCALE:
-    if (cinfo->num_components != 1)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_GRAYSCALE ||
-	cinfo->in_color_space == JCS_YCbCr)
-      cconvert->pub.color_convert = grayscale_convert;
-    else if (cinfo->in_color_space == JCS_RGB) {
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_gray_convert;
-    } else
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    break;
-
-  case JCS_RGB:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_RGB)
-      cconvert->pub.color_convert = rgb_convert;
-    else
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    break;
-
-  case JCS_YCbCr:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_RGB) {
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_ycc_convert;
-    } else if (cinfo->in_color_space == JCS_YCbCr)
-      cconvert->pub.color_convert = null_convert;
-    else
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    break;
-
-  case JCS_CMYK:
-    if (cinfo->num_components != 4)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_CMYK)
-      cconvert->pub.color_convert = null_convert;
-    else
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    break;
-
-  case JCS_YCCK:
-    if (cinfo->num_components != 4)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_CMYK) {
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = cmyk_ycck_convert;
-    } else if (cinfo->in_color_space == JCS_YCCK)
-      cconvert->pub.color_convert = null_convert;
-    else
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    break;
-
-  default:			/* allow null conversion of JCS_UNKNOWN */
-    if (cinfo->jpeg_color_space != cinfo->in_color_space ||
-	cinfo->num_components != cinfo->input_components)
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    cconvert->pub.color_convert = null_convert;
-    break;
-  }
-}
+/*
+ * jccolor.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2011-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_converter pub; /* public fields */
+
+  /* Private state for RGB->YCC conversion */
+  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
+} my_color_converter;
+
+typedef my_color_converter * my_cconvert_ptr;
+
+
+/**************** RGB -> YCbCr conversion: most common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *	Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *	Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
+ *	Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
+ * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
+ * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
+ * were not represented exactly.  Now we sacrifice exact representation of
+ * maximum red and maximum blue in order to get exact grayscales.
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times R,G,B for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
+ * in the tables to save adding them separately in the inner loop.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+/* We allocate one big table and divide it up into eight parts, instead of
+ * doing eight alloc_small requests.  This lets us use a single table base
+ * address, which can be held in a register in the inner loops on many
+ * machines (more than can hold all eight addresses, anyway).
+ */
+
+#define R_Y_OFF		0			/* offset to R => Y section */
+#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
+#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
+#define R_CB_OFF	(3*(MAXJSAMPLE+1))
+#define G_CB_OFF	(4*(MAXJSAMPLE+1))
+#define B_CB_OFF	(5*(MAXJSAMPLE+1))
+#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF	(6*(MAXJSAMPLE+1))
+#define B_CR_OFF	(7*(MAXJSAMPLE+1))
+#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
+
+
+/*
+ * Initialize for RGB->YCC colorspace conversion.
+ */
+
+METHODDEF(void)
+rgb_ycc_start (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  INT32 * rgb_ycc_tab;
+  INT32 i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(TABLE_SIZE * SIZEOF(INT32)));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
+    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i;
+    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i     + ONE_HALF;
+    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i;
+    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i;
+    /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
+     * This ensures that the maximum output will round to MAXJSAMPLE
+     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
+     */
+    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+/*  B=>Cb and R=>Cr tables are the same
+    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+*/
+    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i;
+    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ *
+ * Note that we change from the application's interleaved-pixel format
+ * to our internal noninterleaved, one-plane-per-component format.
+ * The input buffer is therefore three times as wide as the output buffer.
+ *
+ * A starting row offset is provided only for the output buffer.  The caller
+ * can easily adjust the passed input_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+rgb_ycc_convert (j_compress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		 JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register int r, g, b;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+      inptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/**************** Cases other than RGB -> YCbCr **************/
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles RGB->grayscale conversion, which is the same
+ * as the RGB->Y portion of RGB->YCbCr.
+ * We assume rgb_ycc_start has been called (we only use the Y tables).
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		  JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register int r, g, b;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row++];
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      /* Y */
+      outptr[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      inptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles Adobe-style CMYK->YCCK conversion,
+ * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume rgb_ycc_start has been called.
+ */
+
+METHODDEF(void)
+cmyk_ycck_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register int r, g, b;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2, outptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    outptr3 = output_buf[3][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
+      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
+      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
+      /* K passes through as-is */
+      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+      inptr += 4;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * [R,G,B] to [R-G,G,B-G] conversion with modulo calculation
+ * (forward reversible color transform).
+ */
+
+METHODDEF(void)
+rgb_rgb1_convert (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		  JDIMENSION output_row, int num_rows)
+{
+  register int r, g, b;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
+       * (modulo) operator is equivalent to the bitmask operator AND.
+       */
+      outptr0[col] = (JSAMPLE) ((r - g + CENTERJSAMPLE) & MAXJSAMPLE);
+      outptr1[col] = (JSAMPLE) g;
+      outptr2[col] = (JSAMPLE) ((b - g + CENTERJSAMPLE) & MAXJSAMPLE);
+      inptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles grayscale output with no conversion.
+ * The source can be either plain grayscale or YCbCr (since Y == gray).
+ */
+
+METHODDEF(void)
+grayscale_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  int instride = cinfo->input_components;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row++];
+    for (col = 0; col < num_cols; col++) {
+      outptr[col] = inptr[0];	/* don't need GETJSAMPLE() here */
+      inptr += instride;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * No colorspace conversion, but change from interleaved
+ * to separate-planes representation.
+ */
+
+METHODDEF(void)
+rgb_convert (j_compress_ptr cinfo,
+	     JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+	     JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr0[col] = inptr[RGB_RED];
+      outptr1[col] = inptr[RGB_GREEN];
+      outptr2[col] = inptr[RGB_BLUE];
+      inptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles multi-component colorspaces without conversion.
+ * We assume input_components == num_components.
+ */
+
+METHODDEF(void)
+null_convert (j_compress_ptr cinfo,
+	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+	      JDIMENSION output_row, int num_rows)
+{
+  int ci;
+  register int nc = cinfo->num_components;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    /* It seems fastest to make a separate pass for each component. */
+    for (ci = 0; ci < nc; ci++) {
+      inptr = input_buf[0] + ci;
+      outptr = output_buf[ci][output_row];
+      for (col = 0; col < num_cols; col++) {
+	*outptr++ = *inptr;	/* don't need GETJSAMPLE() here */
+	inptr += nc;
+      }
+    }
+    input_buf++;
+    output_row++;
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+null_method (j_compress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for input colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_converter (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_color_converter));
+  cinfo->cconvert = &cconvert->pub;
+  /* set start_pass to null method until we find out differently */
+  cconvert->pub.start_pass = null_method;
+
+  /* Make sure input_components agrees with in_color_space */
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->input_components != 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+    if (cinfo->input_components != RGB_PIXELSIZE)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_YCbCr:
+    if (cinfo->input_components != 3)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->input_components != 4)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->input_components < 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+  }
+
+  /* Support color transform only for RGB colorspace */
+  if (cinfo->color_transform && cinfo->jpeg_color_space != JCS_RGB)
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+
+  /* Check num_components, set conversion method based on requested space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_GRAYSCALE ||
+	cinfo->in_color_space == JCS_YCbCr)
+      cconvert->pub.color_convert = grayscale_convert;
+    else if (cinfo->in_color_space == JCS_RGB) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_gray_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_RGB) {
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_convert;
+	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb_rgb1_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+	break;
+      }
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_RGB) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_ycc_convert;
+    } else if (cinfo->in_color_space == JCS_YCbCr)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_CMYK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = cmyk_ycck_convert;
+    } else if (cinfo->in_color_space == JCS_YCCK)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:			/* allow null conversion of JCS_UNKNOWN */
+    if (cinfo->jpeg_color_space != cinfo->in_color_space ||
+	cinfo->num_components != cinfo->input_components)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    cconvert->pub.color_convert = null_convert;
+    break;
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jcdctmgr.c b/plugins/AdvaImg/src/LibJPEG/jcdctmgr.c
index 0bbdbb685d..550b1a6e7c 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcdctmgr.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcdctmgr.c
@@ -1,482 +1,482 @@
-/*
- * jcdctmgr.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the forward-DCT management logic.
- * This code selects a particular DCT implementation to be used,
- * and it performs related housekeeping chores including coefficient
- * quantization.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-
-/* Private subobject for this module */
-
-typedef struct {
-  struct jpeg_forward_dct pub;	/* public fields */
-
-  /* Pointer to the DCT routine actually in use */
-  forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
-
-  /* The actual post-DCT divisors --- not identical to the quant table
-   * entries, because of scaling (especially for an unnormalized DCT).
-   * Each table is given in normal array order.
-   */
-  DCTELEM * divisors[NUM_QUANT_TBLS];
-
-#ifdef DCT_FLOAT_SUPPORTED
-  /* Same as above for the floating-point case. */
-  float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
-  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
-#endif
-} my_fdct_controller;
-
-typedef my_fdct_controller * my_fdct_ptr;
-
-
-/* The current scaled-DCT routines require ISLOW-style divisor tables,
- * so be sure to compile that code if either ISLOW or SCALING is requested.
- */
-#ifdef DCT_ISLOW_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#else
-#ifdef DCT_SCALING_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#endif
-#endif
-
-
-/*
- * Perform forward DCT on one or more blocks of a component.
- *
- * The input samples are taken from the sample_data[] array starting at
- * position start_row/start_col, and moving to the right for any additional
- * blocks. The quantized coefficients are returned in coef_blocks[].
- */
-
-METHODDEF(void)
-forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
-	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-	     JDIMENSION start_row, JDIMENSION start_col,
-	     JDIMENSION num_blocks)
-/* This version is used for integer DCT implementations. */
-{
-  /* This routine is heavily used, so it's worth coding it tightly. */
-  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
-  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
-  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
-  JDIMENSION bi;
-
-  sample_data += start_row;	/* fold in the vertical offset once */
-
-  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
-    /* Perform the DCT */
-    (*do_dct) (workspace, sample_data, start_col);
-
-    /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register DCTELEM temp, qval;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-	qval = divisors[i];
-	temp = workspace[i];
-	/* Divide the coefficient value by qval, ensuring proper rounding.
-	 * Since C does not specify the direction of rounding for negative
-	 * quotients, we have to force the dividend positive for portability.
-	 *
-	 * In most files, at least half of the output values will be zero
-	 * (at default quantization settings, more like three-quarters...)
-	 * so we should ensure that this case is fast.  On many machines,
-	 * a comparison is enough cheaper than a divide to make a special test
-	 * a win.  Since both inputs will be nonnegative, we need only test
-	 * for a < b to discover whether a/b is 0.
-	 * If your machine's division is fast enough, define FAST_DIVIDE.
-	 */
-#ifdef FAST_DIVIDE
-#define DIVIDE_BY(a,b)	a /= b
-#else
-#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
-#endif
-	if (temp < 0) {
-	  temp = -temp;
-	  temp += qval>>1;	/* for rounding */
-	  DIVIDE_BY(temp, qval);
-	  temp = -temp;
-	} else {
-	  temp += qval>>1;	/* for rounding */
-	  DIVIDE_BY(temp, qval);
-	}
-	output_ptr[i] = (JCOEF) temp;
-      }
-    }
-  }
-}
-
-
-#ifdef DCT_FLOAT_SUPPORTED
-
-METHODDEF(void)
-forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-		   JDIMENSION start_row, JDIMENSION start_col,
-		   JDIMENSION num_blocks)
-/* This version is used for floating-point DCT implementations. */
-{
-  /* This routine is heavily used, so it's worth coding it tightly. */
-  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
-  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
-  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
-  JDIMENSION bi;
-
-  sample_data += start_row;	/* fold in the vertical offset once */
-
-  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
-    /* Perform the DCT */
-    (*do_dct) (workspace, sample_data, start_col);
-
-    /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register FAST_FLOAT temp;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-	/* Apply the quantization and scaling factor */
-	temp = workspace[i] * divisors[i];
-	/* Round to nearest integer.
-	 * Since C does not specify the direction of rounding for negative
-	 * quotients, we have to force the dividend positive for portability.
-	 * The maximum coefficient size is +-16K (for 12-bit data), so this
-	 * code should work for either 16-bit or 32-bit ints.
-	 */
-	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
-      }
-    }
-  }
-}
-
-#endif /* DCT_FLOAT_SUPPORTED */
-
-
-/*
- * Initialize for a processing pass.
- * Verify that all referenced Q-tables are present, and set up
- * the divisor table for each one.
- * In the current implementation, DCT of all components is done during
- * the first pass, even if only some components will be output in the
- * first scan.  Hence all components should be examined here.
- */
-
-METHODDEF(void)
-start_pass_fdctmgr (j_compress_ptr cinfo)
-{
-  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  int ci, qtblno, i;
-  jpeg_component_info *compptr;
-  int method = 0;
-  JQUANT_TBL * qtbl;
-  DCTELEM * dtbl;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Select the proper DCT routine for this component's scaling */
-    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
-#ifdef DCT_SCALING_SUPPORTED
-    case ((1 << 8) + 1):
-      fdct->do_dct[ci] = jpeg_fdct_1x1;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((2 << 8) + 2):
-      fdct->do_dct[ci] = jpeg_fdct_2x2;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((3 << 8) + 3):
-      fdct->do_dct[ci] = jpeg_fdct_3x3;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((4 << 8) + 4):
-      fdct->do_dct[ci] = jpeg_fdct_4x4;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((5 << 8) + 5):
-      fdct->do_dct[ci] = jpeg_fdct_5x5;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((6 << 8) + 6):
-      fdct->do_dct[ci] = jpeg_fdct_6x6;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((7 << 8) + 7):
-      fdct->do_dct[ci] = jpeg_fdct_7x7;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((9 << 8) + 9):
-      fdct->do_dct[ci] = jpeg_fdct_9x9;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((10 << 8) + 10):
-      fdct->do_dct[ci] = jpeg_fdct_10x10;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((11 << 8) + 11):
-      fdct->do_dct[ci] = jpeg_fdct_11x11;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((12 << 8) + 12):
-      fdct->do_dct[ci] = jpeg_fdct_12x12;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((13 << 8) + 13):
-      fdct->do_dct[ci] = jpeg_fdct_13x13;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((14 << 8) + 14):
-      fdct->do_dct[ci] = jpeg_fdct_14x14;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((15 << 8) + 15):
-      fdct->do_dct[ci] = jpeg_fdct_15x15;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((16 << 8) + 16):
-      fdct->do_dct[ci] = jpeg_fdct_16x16;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((16 << 8) + 8):
-      fdct->do_dct[ci] = jpeg_fdct_16x8;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((14 << 8) + 7):
-      fdct->do_dct[ci] = jpeg_fdct_14x7;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((12 << 8) + 6):
-      fdct->do_dct[ci] = jpeg_fdct_12x6;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((10 << 8) + 5):
-      fdct->do_dct[ci] = jpeg_fdct_10x5;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((8 << 8) + 4):
-      fdct->do_dct[ci] = jpeg_fdct_8x4;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((6 << 8) + 3):
-      fdct->do_dct[ci] = jpeg_fdct_6x3;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((4 << 8) + 2):
-      fdct->do_dct[ci] = jpeg_fdct_4x2;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((2 << 8) + 1):
-      fdct->do_dct[ci] = jpeg_fdct_2x1;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((8 << 8) + 16):
-      fdct->do_dct[ci] = jpeg_fdct_8x16;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((7 << 8) + 14):
-      fdct->do_dct[ci] = jpeg_fdct_7x14;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((6 << 8) + 12):
-      fdct->do_dct[ci] = jpeg_fdct_6x12;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((5 << 8) + 10):
-      fdct->do_dct[ci] = jpeg_fdct_5x10;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((4 << 8) + 8):
-      fdct->do_dct[ci] = jpeg_fdct_4x8;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((3 << 8) + 6):
-      fdct->do_dct[ci] = jpeg_fdct_3x6;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((2 << 8) + 4):
-      fdct->do_dct[ci] = jpeg_fdct_2x4;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((1 << 8) + 2):
-      fdct->do_dct[ci] = jpeg_fdct_1x2;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-#endif
-    case ((DCTSIZE << 8) + DCTSIZE):
-      switch (cinfo->dct_method) {
-#ifdef DCT_ISLOW_SUPPORTED
-      case JDCT_ISLOW:
-	fdct->do_dct[ci] = jpeg_fdct_islow;
-	method = JDCT_ISLOW;
-	break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-      case JDCT_IFAST:
-	fdct->do_dct[ci] = jpeg_fdct_ifast;
-	method = JDCT_IFAST;
-	break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-      case JDCT_FLOAT:
-	fdct->do_float_dct[ci] = jpeg_fdct_float;
-	method = JDCT_FLOAT;
-	break;
-#endif
-      default:
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
-	break;
-      }
-      break;
-    default:
-      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
-      break;
-    }
-    qtblno = compptr->quant_tbl_no;
-    /* Make sure specified quantization table is present */
-    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
-      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
-    qtbl = cinfo->quant_tbl_ptrs[qtblno];
-    /* Compute divisors for this quant table */
-    /* We may do this more than once for same table, but it's not a big deal */
-    switch (method) {
-#ifdef PROVIDE_ISLOW_TABLES
-    case JDCT_ISLOW:
-      /* For LL&M IDCT method, divisors are equal to raw quantization
-       * coefficients multiplied by 8 (to counteract scaling).
-       */
-      if (fdct->divisors[qtblno] == NULL) {
-	fdct->divisors[qtblno] = (DCTELEM *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      DCTSIZE2 * SIZEOF(DCTELEM));
-      }
-      dtbl = fdct->divisors[qtblno];
-      for (i = 0; i < DCTSIZE2; i++) {
-	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
-      }
-      fdct->pub.forward_DCT[ci] = forward_DCT;
-      break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-    case JDCT_IFAST:
-      {
-	/* For AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 */
-#define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
-
-	if (fdct->divisors[qtblno] == NULL) {
-	  fdct->divisors[qtblno] = (DCTELEM *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					DCTSIZE2 * SIZEOF(DCTELEM));
-	}
-	dtbl = fdct->divisors[qtblno];
-	for (i = 0; i < DCTSIZE2; i++) {
-	  dtbl[i] = (DCTELEM)
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    CONST_BITS-3);
-	}
-      }
-      fdct->pub.forward_DCT[ci] = forward_DCT;
-      break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-    case JDCT_FLOAT:
-      {
-	/* For float AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 * What's actually stored is 1/divisor so that the inner loop can
-	 * use a multiplication rather than a division.
-	 */
-	FAST_FLOAT * fdtbl;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
-
-	if (fdct->float_divisors[qtblno] == NULL) {
-	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					DCTSIZE2 * SIZEOF(FAST_FLOAT));
-	}
-	fdtbl = fdct->float_divisors[qtblno];
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fdtbl[i] = (FAST_FLOAT)
-	      (1.0 / (((double) qtbl->quantval[i] *
-		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
-	    i++;
-	  }
-	}
-      }
-      fdct->pub.forward_DCT[ci] = forward_DCT_float;
-      break;
-#endif
-    default:
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-      break;
-    }
-  }
-}
-
-
-/*
- * Initialize FDCT manager.
- */
-
-GLOBAL(void)
-jinit_forward_dct (j_compress_ptr cinfo)
-{
-  my_fdct_ptr fdct;
-  int i;
-
-  fdct = (my_fdct_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_fdct_controller));
-  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
-  fdct->pub.start_pass = start_pass_fdctmgr;
-
-  /* Mark divisor tables unallocated */
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    fdct->divisors[i] = NULL;
-#ifdef DCT_FLOAT_SUPPORTED
-    fdct->float_divisors[i] = NULL;
-#endif
-  }
-}
+/*
+ * jcdctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the forward-DCT management logic.
+ * This code selects a particular DCT implementation to be used,
+ * and it performs related housekeeping chores including coefficient
+ * quantization.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_forward_dct pub;	/* public fields */
+
+  /* Pointer to the DCT routine actually in use */
+  forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
+
+  /* The actual post-DCT divisors --- not identical to the quant table
+   * entries, because of scaling (especially for an unnormalized DCT).
+   * Each table is given in normal array order.
+   */
+  DCTELEM * divisors[NUM_QUANT_TBLS];
+
+#ifdef DCT_FLOAT_SUPPORTED
+  /* Same as above for the floating-point case. */
+  float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
+  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
+#endif
+} my_fdct_controller;
+
+typedef my_fdct_controller * my_fdct_ptr;
+
+
+/* The current scaled-DCT routines require ISLOW-style divisor tables,
+ * so be sure to compile that code if either ISLOW or SCALING is requested.
+ */
+#ifdef DCT_ISLOW_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#else
+#ifdef DCT_SCALING_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#endif
+#endif
+
+
+/*
+ * Perform forward DCT on one or more blocks of a component.
+ *
+ * The input samples are taken from the sample_data[] array starting at
+ * position start_row/start_col, and moving to the right for any additional
+ * blocks. The quantized coefficients are returned in coef_blocks[].
+ */
+
+METHODDEF(void)
+forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
+	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+	     JDIMENSION start_row, JDIMENSION start_col,
+	     JDIMENSION num_blocks)
+/* This version is used for integer DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
+  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
+  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  sample_data += start_row;	/* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
+    /* Perform the DCT */
+    (*do_dct) (workspace, sample_data, start_col);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register DCTELEM temp, qval;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	qval = divisors[i];
+	temp = workspace[i];
+	/* Divide the coefficient value by qval, ensuring proper rounding.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 *
+	 * In most files, at least half of the output values will be zero
+	 * (at default quantization settings, more like three-quarters...)
+	 * so we should ensure that this case is fast.  On many machines,
+	 * a comparison is enough cheaper than a divide to make a special test
+	 * a win.  Since both inputs will be nonnegative, we need only test
+	 * for a < b to discover whether a/b is 0.
+	 * If your machine's division is fast enough, define FAST_DIVIDE.
+	 */
+#ifdef FAST_DIVIDE
+#define DIVIDE_BY(a,b)	a /= b
+#else
+#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
+#endif
+	if (temp < 0) {
+	  temp = -temp;
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	  temp = -temp;
+	} else {
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	}
+	output_ptr[i] = (JCOEF) temp;
+      }
+    }
+  }
+}
+
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+METHODDEF(void)
+forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+		   JDIMENSION start_row, JDIMENSION start_col,
+		   JDIMENSION num_blocks)
+/* This version is used for floating-point DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
+  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
+  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  sample_data += start_row;	/* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
+    /* Perform the DCT */
+    (*do_dct) (workspace, sample_data, start_col);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register FAST_FLOAT temp;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	/* Apply the quantization and scaling factor */
+	temp = workspace[i] * divisors[i];
+	/* Round to nearest integer.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 * The maximum coefficient size is +-16K (for 12-bit data), so this
+	 * code should work for either 16-bit or 32-bit ints.
+	 */
+	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
+      }
+    }
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
+
+
+/*
+ * Initialize for a processing pass.
+ * Verify that all referenced Q-tables are present, and set up
+ * the divisor table for each one.
+ * In the current implementation, DCT of all components is done during
+ * the first pass, even if only some components will be output in the
+ * first scan.  Hence all components should be examined here.
+ */
+
+METHODDEF(void)
+start_pass_fdctmgr (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  int ci, qtblno, i;
+  jpeg_component_info *compptr;
+  int method = 0;
+  JQUANT_TBL * qtbl;
+  DCTELEM * dtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Select the proper DCT routine for this component's scaling */
+    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
+#ifdef DCT_SCALING_SUPPORTED
+    case ((1 << 8) + 1):
+      fdct->do_dct[ci] = jpeg_fdct_1x1;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((2 << 8) + 2):
+      fdct->do_dct[ci] = jpeg_fdct_2x2;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((3 << 8) + 3):
+      fdct->do_dct[ci] = jpeg_fdct_3x3;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((4 << 8) + 4):
+      fdct->do_dct[ci] = jpeg_fdct_4x4;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((5 << 8) + 5):
+      fdct->do_dct[ci] = jpeg_fdct_5x5;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((6 << 8) + 6):
+      fdct->do_dct[ci] = jpeg_fdct_6x6;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((7 << 8) + 7):
+      fdct->do_dct[ci] = jpeg_fdct_7x7;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((9 << 8) + 9):
+      fdct->do_dct[ci] = jpeg_fdct_9x9;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((10 << 8) + 10):
+      fdct->do_dct[ci] = jpeg_fdct_10x10;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((11 << 8) + 11):
+      fdct->do_dct[ci] = jpeg_fdct_11x11;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((12 << 8) + 12):
+      fdct->do_dct[ci] = jpeg_fdct_12x12;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((13 << 8) + 13):
+      fdct->do_dct[ci] = jpeg_fdct_13x13;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((14 << 8) + 14):
+      fdct->do_dct[ci] = jpeg_fdct_14x14;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((15 << 8) + 15):
+      fdct->do_dct[ci] = jpeg_fdct_15x15;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((16 << 8) + 16):
+      fdct->do_dct[ci] = jpeg_fdct_16x16;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((16 << 8) + 8):
+      fdct->do_dct[ci] = jpeg_fdct_16x8;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((14 << 8) + 7):
+      fdct->do_dct[ci] = jpeg_fdct_14x7;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((12 << 8) + 6):
+      fdct->do_dct[ci] = jpeg_fdct_12x6;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((10 << 8) + 5):
+      fdct->do_dct[ci] = jpeg_fdct_10x5;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((8 << 8) + 4):
+      fdct->do_dct[ci] = jpeg_fdct_8x4;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((6 << 8) + 3):
+      fdct->do_dct[ci] = jpeg_fdct_6x3;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((4 << 8) + 2):
+      fdct->do_dct[ci] = jpeg_fdct_4x2;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((2 << 8) + 1):
+      fdct->do_dct[ci] = jpeg_fdct_2x1;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((8 << 8) + 16):
+      fdct->do_dct[ci] = jpeg_fdct_8x16;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((7 << 8) + 14):
+      fdct->do_dct[ci] = jpeg_fdct_7x14;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((6 << 8) + 12):
+      fdct->do_dct[ci] = jpeg_fdct_6x12;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((5 << 8) + 10):
+      fdct->do_dct[ci] = jpeg_fdct_5x10;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((4 << 8) + 8):
+      fdct->do_dct[ci] = jpeg_fdct_4x8;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((3 << 8) + 6):
+      fdct->do_dct[ci] = jpeg_fdct_3x6;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((2 << 8) + 4):
+      fdct->do_dct[ci] = jpeg_fdct_2x4;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((1 << 8) + 2):
+      fdct->do_dct[ci] = jpeg_fdct_1x2;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+#endif
+    case ((DCTSIZE << 8) + DCTSIZE):
+      switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+      case JDCT_ISLOW:
+	fdct->do_dct[ci] = jpeg_fdct_islow;
+	method = JDCT_ISLOW;
+	break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+      case JDCT_IFAST:
+	fdct->do_dct[ci] = jpeg_fdct_ifast;
+	method = JDCT_IFAST;
+	break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+      case JDCT_FLOAT:
+	fdct->do_float_dct[ci] = jpeg_fdct_float;
+	method = JDCT_FLOAT;
+	break;
+#endif
+      default:
+	ERREXIT(cinfo, JERR_NOT_COMPILED);
+	break;
+      }
+      break;
+    default:
+      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
+      break;
+    }
+    qtblno = compptr->quant_tbl_no;
+    /* Make sure specified quantization table is present */
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    qtbl = cinfo->quant_tbl_ptrs[qtblno];
+    /* Compute divisors for this quant table */
+    /* We may do this more than once for same table, but it's not a big deal */
+    switch (method) {
+#ifdef PROVIDE_ISLOW_TABLES
+    case JDCT_ISLOW:
+      /* For LL&M IDCT method, divisors are equal to raw quantization
+       * coefficients multiplied by 8 (to counteract scaling).
+       */
+      if (fdct->divisors[qtblno] == NULL) {
+	fdct->divisors[qtblno] = (DCTELEM *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      DCTSIZE2 * SIZEOF(DCTELEM));
+      }
+      dtbl = fdct->divisors[qtblno];
+      for (i = 0; i < DCTSIZE2; i++) {
+	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
+      }
+      fdct->pub.forward_DCT[ci] = forward_DCT;
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 */
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	if (fdct->divisors[qtblno] == NULL) {
+	  fdct->divisors[qtblno] = (DCTELEM *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					DCTSIZE2 * SIZEOF(DCTELEM));
+	}
+	dtbl = fdct->divisors[qtblno];
+	for (i = 0; i < DCTSIZE2; i++) {
+	  dtbl[i] = (DCTELEM)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-3);
+	}
+      }
+      fdct->pub.forward_DCT[ci] = forward_DCT;
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 * What's actually stored is 1/divisor so that the inner loop can
+	 * use a multiplication rather than a division.
+	 */
+	FAST_FLOAT * fdtbl;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	if (fdct->float_divisors[qtblno] == NULL) {
+	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					DCTSIZE2 * SIZEOF(FAST_FLOAT));
+	}
+	fdtbl = fdct->float_divisors[qtblno];
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fdtbl[i] = (FAST_FLOAT)
+	      (1.0 / (((double) qtbl->quantval[i] *
+		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
+	    i++;
+	  }
+	}
+      }
+      fdct->pub.forward_DCT[ci] = forward_DCT_float;
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Initialize FDCT manager.
+ */
+
+GLOBAL(void)
+jinit_forward_dct (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct;
+  int i;
+
+  fdct = (my_fdct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_fdct_controller));
+  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
+  fdct->pub.start_pass = start_pass_fdctmgr;
+
+  /* Mark divisor tables unallocated */
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    fdct->divisors[i] = NULL;
+#ifdef DCT_FLOAT_SUPPORTED
+    fdct->float_divisors[i] = NULL;
+#endif
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jchuff.c b/plugins/AdvaImg/src/LibJPEG/jchuff.c
index 257d7aa1f5..4cbab438d5 100644
--- a/plugins/AdvaImg/src/LibJPEG/jchuff.c
+++ b/plugins/AdvaImg/src/LibJPEG/jchuff.c
@@ -1,1576 +1,1576 @@
-/*
- * jchuff.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains Huffman entropy encoding routines.
- * Both sequential and progressive modes are supported in this single module.
- *
- * Much of the complexity here has to do with supporting output suspension.
- * If the data destination module demands suspension, we want to be able to
- * back up to the start of the current MCU.  To do this, we copy state
- * variables into local working storage, and update them back to the
- * permanent JPEG objects only upon successful completion of an MCU.
- *
- * We do not support output suspension for the progressive JPEG mode, since
- * the library currently does not allow multiple-scan files to be written
- * with output suspension.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* The legal range of a DCT coefficient is
- *  -1024 .. +1023  for 8-bit data;
- * -16384 .. +16383 for 12-bit data.
- * Hence the magnitude should always fit in 10 or 14 bits respectively.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define MAX_COEF_BITS 10
-#else
-#define MAX_COEF_BITS 14
-#endif
-
-/* Derived data constructed for each Huffman table */
-
-typedef struct {
-  unsigned int ehufco[256];	/* code for each symbol */
-  char ehufsi[256];		/* length of code for each symbol */
-  /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
-} c_derived_tbl;
-
-
-/* Expanded entropy encoder object for Huffman encoding.
- *
- * The savable_state subrecord contains fields that change within an MCU,
- * but must not be updated permanently until we complete the MCU.
- */
-
-typedef struct {
-  INT32 put_buffer;		/* current bit-accumulation buffer */
-  int put_bits;			/* # of bits now in it */
-  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-} savable_state;
-
-/* This macro is to work around compilers with missing or broken
- * structure assignment.  You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest,src)  ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest,src)  \
-	((dest).put_buffer = (src).put_buffer, \
-	 (dest).put_bits = (src).put_bits, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
-typedef struct {
-  struct jpeg_entropy_encoder pub; /* public fields */
-
-  savable_state saved;		/* Bit buffer & DC state at start of MCU */
-
-  /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
-
-  /* Pointers to derived tables (these workspaces have image lifespan) */
-  c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
-  c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
-
-  /* Statistics tables for optimization */
-  long * dc_count_ptrs[NUM_HUFF_TBLS];
-  long * ac_count_ptrs[NUM_HUFF_TBLS];
-
-  /* Following fields used only in progressive mode */
-
-  /* Mode flag: TRUE for optimization, FALSE for actual data output */
-  boolean gather_statistics;
-
-  /* next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
-   */
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
-
-  /* Coding status for AC components */
-  int ac_tbl_no;		/* the table number of the single component */
-  unsigned int EOBRUN;		/* run length of EOBs */
-  unsigned int BE;		/* # of buffered correction bits before MCU */
-  char * bit_buffer;		/* buffer for correction bits (1 per char) */
-  /* packing correction bits tightly would save some space but cost time... */
-} huff_entropy_encoder;
-
-typedef huff_entropy_encoder * huff_entropy_ptr;
-
-/* Working state while writing an MCU (sequential mode).
- * This struct contains all the fields that are needed by subroutines.
- */
-
-typedef struct {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  savable_state cur;		/* Current bit buffer & DC state */
-  j_compress_ptr cinfo;		/* dump_buffer needs access to this */
-} working_state;
-
-/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
- * buffer can hold.  Larger sizes may slightly improve compression, but
- * 1000 is already well into the realm of overkill.
- * The minimum safe size is 64 bits.
- */
-
-#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
-
-/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
- * We assume that int right shift is unsigned if INT32 right shift is,
- * which should be safe.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	int ishift_temp;
-#define IRIGHT_SHIFT(x,shft)  \
-	((ishift_temp = (x)) < 0 ? \
-	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
-	 (ishift_temp >> (shft)))
-#else
-#define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-
-/*
- * Compute the derived values for a Huffman table.
- * This routine also performs some validation checks on the table.
- */
-
-LOCAL(void)
-jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
-			 c_derived_tbl ** pdtbl)
-{
-  JHUFF_TBL *htbl;
-  c_derived_tbl *dtbl;
-  int p, i, l, lastp, si, maxsymbol;
-  char huffsize[257];
-  unsigned int huffcode[257];
-  unsigned int code;
-
-  /* Note that huffsize[] and huffcode[] are filled in code-length order,
-   * paralleling the order of the symbols themselves in htbl->huffval[].
-   */
-
-  /* Find the input Huffman table */
-  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-  htbl =
-    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
-  if (htbl == NULL)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-
-  /* Allocate a workspace if we haven't already done so. */
-  if (*pdtbl == NULL)
-    *pdtbl = (c_derived_tbl *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(c_derived_tbl));
-  dtbl = *pdtbl;
-  
-  /* Figure C.1: make table of Huffman code length for each symbol */
-
-  p = 0;
-  for (l = 1; l <= 16; l++) {
-    i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    while (i--)
-      huffsize[p++] = (char) l;
-  }
-  huffsize[p] = 0;
-  lastp = p;
-  
-  /* Figure C.2: generate the codes themselves */
-  /* We also validate that the counts represent a legal Huffman code tree. */
-
-  code = 0;
-  si = huffsize[0];
-  p = 0;
-  while (huffsize[p]) {
-    while (((int) huffsize[p]) == si) {
-      huffcode[p++] = code;
-      code++;
-    }
-    /* code is now 1 more than the last code used for codelength si; but
-     * it must still fit in si bits, since no code is allowed to be all ones.
-     */
-    if (((INT32) code) >= (((INT32) 1) << si))
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    code <<= 1;
-    si++;
-  }
-  
-  /* Figure C.3: generate encoding tables */
-  /* These are code and size indexed by symbol value */
-
-  /* Set all codeless symbols to have code length 0;
-   * this lets us detect duplicate VAL entries here, and later
-   * allows emit_bits to detect any attempt to emit such symbols.
-   */
-  MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
-
-  /* This is also a convenient place to check for out-of-range
-   * and duplicated VAL entries.  We allow 0..255 for AC symbols
-   * but only 0..15 for DC.  (We could constrain them further
-   * based on data depth and mode, but this seems enough.)
-   */
-  maxsymbol = isDC ? 15 : 255;
-
-  for (p = 0; p < lastp; p++) {
-    i = htbl->huffval[p];
-    if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    dtbl->ehufco[i] = huffcode[p];
-    dtbl->ehufsi[i] = huffsize[p];
-  }
-}
-
-
-/* Outputting bytes to the file.
- * NB: these must be called only when actually outputting,
- * that is, entropy->gather_statistics == FALSE.
- */
-
-/* Emit a byte, taking 'action' if must suspend. */
-#define emit_byte_s(state,val,action)  \
-	{ *(state)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(state)->free_in_buffer == 0)  \
-	    if (! dump_buffer_s(state))  \
-	      { action; } }
-
-/* Emit a byte */
-#define emit_byte_e(entropy,val)  \
-	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(entropy)->free_in_buffer == 0)  \
-	    dump_buffer_e(entropy); }
-
-
-LOCAL(boolean)
-dump_buffer_s (working_state * state)
-/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
-{
-  struct jpeg_destination_mgr * dest = state->cinfo->dest;
-
-  if (! (*dest->empty_output_buffer) (state->cinfo))
-    return FALSE;
-  /* After a successful buffer dump, must reset buffer pointers */
-  state->next_output_byte = dest->next_output_byte;
-  state->free_in_buffer = dest->free_in_buffer;
-  return TRUE;
-}
-
-
-LOCAL(void)
-dump_buffer_e (huff_entropy_ptr entropy)
-/* Empty the output buffer; we do not support suspension in this case. */
-{
-  struct jpeg_destination_mgr * dest = entropy->cinfo->dest;
-
-  if (! (*dest->empty_output_buffer) (entropy->cinfo))
-    ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
-  /* After a successful buffer dump, must reset buffer pointers */
-  entropy->next_output_byte = dest->next_output_byte;
-  entropy->free_in_buffer = dest->free_in_buffer;
-}
-
-
-/* Outputting bits to the file */
-
-/* Only the right 24 bits of put_buffer are used; the valid bits are
- * left-justified in this part.  At most 16 bits can be passed to emit_bits
- * in one call, and we never retain more than 7 bits in put_buffer
- * between calls, so 24 bits are sufficient.
- */
-
-INLINE
-LOCAL(boolean)
-emit_bits_s (working_state * state, unsigned int code, int size)
-/* Emit some bits; return TRUE if successful, FALSE if must suspend */
-{
-  /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer = (INT32) code;
-  register int put_bits = state->cur.put_bits;
-
-  /* if size is 0, caller used an invalid Huffman table entry */
-  if (size == 0)
-    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
-
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
-  
-  put_buffer <<= 24 - put_bits; /* align incoming bits */
-
-  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
-  
-  while (put_bits >= 8) {
-    int c = (int) ((put_buffer >> 16) & 0xFF);
-    
-    emit_byte_s(state, c, return FALSE);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
-      emit_byte_s(state, 0, return FALSE);
-    }
-    put_buffer <<= 8;
-    put_bits -= 8;
-  }
-
-  state->cur.put_buffer = put_buffer; /* update state variables */
-  state->cur.put_bits = put_bits;
-
-  return TRUE;
-}
-
-
-INLINE
-LOCAL(void)
-emit_bits_e (huff_entropy_ptr entropy, unsigned int code, int size)
-/* Emit some bits, unless we are in gather mode */
-{
-  /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer = (INT32) code;
-  register int put_bits = entropy->saved.put_bits;
-
-  /* if size is 0, caller used an invalid Huffman table entry */
-  if (size == 0)
-    ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
-
-  if (entropy->gather_statistics)
-    return;			/* do nothing if we're only getting stats */
-
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
-
-  put_buffer <<= 24 - put_bits; /* align incoming bits */
-
-  /* and merge with old buffer contents */
-  put_buffer |= entropy->saved.put_buffer;
-
-  while (put_bits >= 8) {
-    int c = (int) ((put_buffer >> 16) & 0xFF);
-
-    emit_byte_e(entropy, c);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
-      emit_byte_e(entropy, 0);
-    }
-    put_buffer <<= 8;
-    put_bits -= 8;
-  }
-
-  entropy->saved.put_buffer = put_buffer; /* update variables */
-  entropy->saved.put_bits = put_bits;
-}
-
-
-LOCAL(boolean)
-flush_bits_s (working_state * state)
-{
-  if (! emit_bits_s(state, 0x7F, 7)) /* fill any partial byte with ones */
-    return FALSE;
-  state->cur.put_buffer = 0;	     /* and reset bit-buffer to empty */
-  state->cur.put_bits = 0;
-  return TRUE;
-}
-
-
-LOCAL(void)
-flush_bits_e (huff_entropy_ptr entropy)
-{
-  emit_bits_e(entropy, 0x7F, 7); /* fill any partial byte with ones */
-  entropy->saved.put_buffer = 0; /* and reset bit-buffer to empty */
-  entropy->saved.put_bits = 0;
-}
-
-
-/*
- * Emit (or just count) a Huffman symbol.
- */
-
-INLINE
-LOCAL(void)
-emit_dc_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
-{
-  if (entropy->gather_statistics)
-    entropy->dc_count_ptrs[tbl_no][symbol]++;
-  else {
-    c_derived_tbl * tbl = entropy->dc_derived_tbls[tbl_no];
-    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
-  }
-}
-
-
-INLINE
-LOCAL(void)
-emit_ac_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
-{
-  if (entropy->gather_statistics)
-    entropy->ac_count_ptrs[tbl_no][symbol]++;
-  else {
-    c_derived_tbl * tbl = entropy->ac_derived_tbls[tbl_no];
-    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
-  }
-}
-
-
-/*
- * Emit bits from a correction bit buffer.
- */
-
-LOCAL(void)
-emit_buffered_bits (huff_entropy_ptr entropy, char * bufstart,
-		    unsigned int nbits)
-{
-  if (entropy->gather_statistics)
-    return;			/* no real work */
-
-  while (nbits > 0) {
-    emit_bits_e(entropy, (unsigned int) (*bufstart), 1);
-    bufstart++;
-    nbits--;
-  }
-}
-
-
-/*
- * Emit any pending EOBRUN symbol.
- */
-
-LOCAL(void)
-emit_eobrun (huff_entropy_ptr entropy)
-{
-  register int temp, nbits;
-
-  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
-    temp = entropy->EOBRUN;
-    nbits = 0;
-    while ((temp >>= 1))
-      nbits++;
-    /* safety check: shouldn't happen given limited correction-bit buffer */
-    if (nbits > 14)
-      ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
-
-    emit_ac_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
-    if (nbits)
-      emit_bits_e(entropy, entropy->EOBRUN, nbits);
-
-    entropy->EOBRUN = 0;
-
-    /* Emit any buffered correction bits */
-    emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
-    entropy->BE = 0;
-  }
-}
-
-
-/*
- * Emit a restart marker & resynchronize predictions.
- */
-
-LOCAL(boolean)
-emit_restart_s (working_state * state, int restart_num)
-{
-  int ci;
-
-  if (! flush_bits_s(state))
-    return FALSE;
-
-  emit_byte_s(state, 0xFF, return FALSE);
-  emit_byte_s(state, JPEG_RST0 + restart_num, return FALSE);
-
-  /* Re-initialize DC predictions to 0 */
-  for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
-    state->cur.last_dc_val[ci] = 0;
-
-  /* The restart counter is not updated until we successfully write the MCU. */
-
-  return TRUE;
-}
-
-
-LOCAL(void)
-emit_restart_e (huff_entropy_ptr entropy, int restart_num)
-{
-  int ci;
-
-  emit_eobrun(entropy);
-
-  if (! entropy->gather_statistics) {
-    flush_bits_e(entropy);
-    emit_byte_e(entropy, 0xFF);
-    emit_byte_e(entropy, JPEG_RST0 + restart_num);
-  }
-
-  if (entropy->cinfo->Ss == 0) {
-    /* Re-initialize DC predictions to 0 */
-    for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
-      entropy->saved.last_dc_val[ci] = 0;
-  } else {
-    /* Re-initialize all AC-related fields to 0 */
-    entropy->EOBRUN = 0;
-    entropy->BE = 0;
-  }
-}
-
-
-/*
- * MCU encoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int temp, temp2;
-  register int nbits;
-  int blkn, ci;
-  int Al = cinfo->Al;
-  JBLOCKROW block;
-  jpeg_component_info * compptr;
-  ISHIFT_TEMPS
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-
-    /* Compute the DC value after the required point transform by Al.
-     * This is simply an arithmetic right shift.
-     */
-    temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al);
-
-    /* DC differences are figured on the point-transformed values. */
-    temp = temp2 - entropy->saved.last_dc_val[ci];
-    entropy->saved.last_dc_val[ci] = temp2;
-
-    /* Encode the DC coefficient difference per section G.1.2.1 */
-    temp2 = temp;
-    if (temp < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      /* For a negative input, want temp2 = bitwise complement of abs(input) */
-      /* This code assumes we are on a two's complement machine */
-      temp2--;
-    }
-    
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    while (temp) {
-      nbits++;
-      temp >>= 1;
-    }
-    /* Check for out-of-range coefficient values.
-     * Since we're encoding a difference, the range limit is twice as much.
-     */
-    if (nbits > MAX_COEF_BITS+1)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-    
-    /* Count/emit the Huffman-coded symbol for the number of bits */
-    emit_dc_symbol(entropy, compptr->dc_tbl_no, nbits);
-    
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    if (nbits)			/* emit_bits rejects calls with size 0 */
-      emit_bits_e(entropy, (unsigned int) temp2, nbits);
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int temp, temp2;
-  register int nbits;
-  register int r, k;
-  int Se, Al;
-  const int * natural_order;
-  JBLOCKROW block;
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  Se = cinfo->Se;
-  Al = cinfo->Al;
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-
-  /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
-  
-  r = 0;			/* r = run length of zeros */
-   
-  for (k = cinfo->Ss; k <= Se; k++) {
-    if ((temp = (*block)[natural_order[k]]) == 0) {
-      r++;
-      continue;
-    }
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value; so the code is
-     * interwoven with finding the abs value (temp) and output bits (temp2).
-     */
-    if (temp < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      temp >>= Al;		/* apply the point transform */
-      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
-      temp2 = ~temp;
-    } else {
-      temp >>= Al;		/* apply the point transform */
-      temp2 = temp;
-    }
-    /* Watch out for case that nonzero coef is zero after point transform */
-    if (temp == 0) {
-      r++;
-      continue;
-    }
-
-    /* Emit any pending EOBRUN */
-    if (entropy->EOBRUN > 0)
-      emit_eobrun(entropy);
-    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-    while (r > 15) {
-      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
-      r -= 16;
-    }
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 1;			/* there must be at least one 1 bit */
-    while ((temp >>= 1))
-      nbits++;
-    /* Check for out-of-range coefficient values */
-    if (nbits > MAX_COEF_BITS)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-    /* Count/emit Huffman symbol for run length / number of bits */
-    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits);
-
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    emit_bits_e(entropy, (unsigned int) temp2, nbits);
-
-    r = 0;			/* reset zero run length */
-  }
-
-  if (r > 0) {			/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
-    if (entropy->EOBRUN == 0x7FFF)
-      emit_eobrun(entropy);	/* force it out to avoid overflow */
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component, although the spec
- * is not very clear on the point.
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int temp;
-  int blkn;
-  int Al = cinfo->Al;
-  JBLOCKROW block;
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-
-    /* We simply emit the Al'th bit of the DC coefficient value. */
-    temp = (*block)[0];
-    emit_bits_e(entropy, (unsigned int) (temp >> Al), 1);
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int temp;
-  register int r, k;
-  int EOB;
-  char *BR_buffer;
-  unsigned int BR;
-  int Se, Al;
-  const int * natural_order;
-  JBLOCKROW block;
-  int absvalues[DCTSIZE2];
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  Se = cinfo->Se;
-  Al = cinfo->Al;
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-
-  /* It is convenient to make a pre-pass to determine the transformed
-   * coefficients' absolute values and the EOB position.
-   */
-  EOB = 0;
-  for (k = cinfo->Ss; k <= Se; k++) {
-    temp = (*block)[natural_order[k]];
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value.
-     */
-    if (temp < 0)
-      temp = -temp;		/* temp is abs value of input */
-    temp >>= Al;		/* apply the point transform */
-    absvalues[k] = temp;	/* save abs value for main pass */
-    if (temp == 1)
-      EOB = k;			/* EOB = index of last newly-nonzero coef */
-  }
-
-  /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
-  
-  r = 0;			/* r = run length of zeros */
-  BR = 0;			/* BR = count of buffered bits added now */
-  BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
-
-  for (k = cinfo->Ss; k <= Se; k++) {
-    if ((temp = absvalues[k]) == 0) {
-      r++;
-      continue;
-    }
-
-    /* Emit any required ZRLs, but not if they can be folded into EOB */
-    while (r > 15 && k <= EOB) {
-      /* emit any pending EOBRUN and the BE correction bits */
-      emit_eobrun(entropy);
-      /* Emit ZRL */
-      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
-      r -= 16;
-      /* Emit buffered correction bits that must be associated with ZRL */
-      emit_buffered_bits(entropy, BR_buffer, BR);
-      BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
-      BR = 0;
-    }
-
-    /* If the coef was previously nonzero, it only needs a correction bit.
-     * NOTE: a straight translation of the spec's figure G.7 would suggest
-     * that we also need to test r > 15.  But if r > 15, we can only get here
-     * if k > EOB, which implies that this coefficient is not 1.
-     */
-    if (temp > 1) {
-      /* The correction bit is the next bit of the absolute value. */
-      BR_buffer[BR++] = (char) (temp & 1);
-      continue;
-    }
-
-    /* Emit any pending EOBRUN and the BE correction bits */
-    emit_eobrun(entropy);
-
-    /* Count/emit Huffman symbol for run length / number of bits */
-    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1);
-
-    /* Emit output bit for newly-nonzero coef */
-    temp = ((*block)[natural_order[k]] < 0) ? 0 : 1;
-    emit_bits_e(entropy, (unsigned int) temp, 1);
-
-    /* Emit buffered correction bits that must be associated with this code */
-    emit_buffered_bits(entropy, BR_buffer, BR);
-    BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
-    BR = 0;
-    r = 0;			/* reset zero run length */
-  }
-
-  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
-    entropy->BE += BR;		/* concat my correction bits to older ones */
-    /* We force out the EOB if we risk either:
-     * 1. overflow of the EOB counter;
-     * 2. overflow of the correction bit buffer during the next MCU.
-     */
-    if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1))
-      emit_eobrun(entropy);
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/* Encode a single block's worth of coefficients */
-
-LOCAL(boolean)
-encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
-		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
-{
-  register int temp, temp2;
-  register int nbits;
-  register int k, r, i;
-  int Se = state->cinfo->lim_Se;
-  const int * natural_order = state->cinfo->natural_order;
-
-  /* Encode the DC coefficient difference per section F.1.2.1 */
-
-  temp = temp2 = block[0] - last_dc_val;
-
-  if (temp < 0) {
-    temp = -temp;		/* temp is abs value of input */
-    /* For a negative input, want temp2 = bitwise complement of abs(input) */
-    /* This code assumes we are on a two's complement machine */
-    temp2--;
-  }
-
-  /* Find the number of bits needed for the magnitude of the coefficient */
-  nbits = 0;
-  while (temp) {
-    nbits++;
-    temp >>= 1;
-  }
-  /* Check for out-of-range coefficient values.
-   * Since we're encoding a difference, the range limit is twice as much.
-   */
-  if (nbits > MAX_COEF_BITS+1)
-    ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-
-  /* Emit the Huffman-coded symbol for the number of bits */
-  if (! emit_bits_s(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
-    return FALSE;
-
-  /* Emit that number of bits of the value, if positive, */
-  /* or the complement of its magnitude, if negative. */
-  if (nbits)			/* emit_bits rejects calls with size 0 */
-    if (! emit_bits_s(state, (unsigned int) temp2, nbits))
-      return FALSE;
-
-  /* Encode the AC coefficients per section F.1.2.2 */
-
-  r = 0;			/* r = run length of zeros */
-
-  for (k = 1; k <= Se; k++) {
-    if ((temp = block[natural_order[k]]) == 0) {
-      r++;
-    } else {
-      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-      while (r > 15) {
-	if (! emit_bits_s(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
-	  return FALSE;
-	r -= 16;
-      }
-
-      temp2 = temp;
-      if (temp < 0) {
-	temp = -temp;		/* temp is abs value of input */
-	/* This code assumes we are on a two's complement machine */
-	temp2--;
-      }
-
-      /* Find the number of bits needed for the magnitude of the coefficient */
-      nbits = 1;		/* there must be at least one 1 bit */
-      while ((temp >>= 1))
-	nbits++;
-      /* Check for out-of-range coefficient values */
-      if (nbits > MAX_COEF_BITS)
-	ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-
-      /* Emit Huffman symbol for run length / number of bits */
-      i = (r << 4) + nbits;
-      if (! emit_bits_s(state, actbl->ehufco[i], actbl->ehufsi[i]))
-	return FALSE;
-
-      /* Emit that number of bits of the value, if positive, */
-      /* or the complement of its magnitude, if negative. */
-      if (! emit_bits_s(state, (unsigned int) temp2, nbits))
-	return FALSE;
-
-      r = 0;
-    }
-  }
-
-  /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0)
-    if (! emit_bits_s(state, actbl->ehufco[0], actbl->ehufsi[0]))
-      return FALSE;
-
-  return TRUE;
-}
-
-
-/*
- * Encode and output one MCU's worth of Huffman-compressed coefficients.
- */
-
-METHODDEF(boolean)
-encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  working_state state;
-  int blkn, ci;
-  jpeg_component_info * compptr;
-
-  /* Load up working state */
-  state.next_output_byte = cinfo->dest->next_output_byte;
-  state.free_in_buffer = cinfo->dest->free_in_buffer;
-  ASSIGN_STATE(state.cur, entropy->saved);
-  state.cinfo = cinfo;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! emit_restart_s(&state, entropy->next_restart_num))
-	return FALSE;
-  }
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-    if (! encode_one_block(&state,
-			   MCU_data[blkn][0], state.cur.last_dc_val[ci],
-			   entropy->dc_derived_tbls[compptr->dc_tbl_no],
-			   entropy->ac_derived_tbls[compptr->ac_tbl_no]))
-      return FALSE;
-    /* Update last_dc_val */
-    state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
-  }
-
-  /* Completed MCU, so update state */
-  cinfo->dest->next_output_byte = state.next_output_byte;
-  cinfo->dest->free_in_buffer = state.free_in_buffer;
-  ASSIGN_STATE(entropy->saved, state.cur);
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Finish up at the end of a Huffman-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass_huff (j_compress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  working_state state;
-
-  if (cinfo->progressive_mode) {
-    entropy->next_output_byte = cinfo->dest->next_output_byte;
-    entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-    /* Flush out any buffered data */
-    emit_eobrun(entropy);
-    flush_bits_e(entropy);
-
-    cinfo->dest->next_output_byte = entropy->next_output_byte;
-    cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-  } else {
-    /* Load up working state ... flush_bits needs it */
-    state.next_output_byte = cinfo->dest->next_output_byte;
-    state.free_in_buffer = cinfo->dest->free_in_buffer;
-    ASSIGN_STATE(state.cur, entropy->saved);
-    state.cinfo = cinfo;
-
-    /* Flush out the last data */
-    if (! flush_bits_s(&state))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-
-    /* Update state */
-    cinfo->dest->next_output_byte = state.next_output_byte;
-    cinfo->dest->free_in_buffer = state.free_in_buffer;
-    ASSIGN_STATE(entropy->saved, state.cur);
-  }
-}
-
-
-/*
- * Huffman coding optimization.
- *
- * We first scan the supplied data and count the number of uses of each symbol
- * that is to be Huffman-coded. (This process MUST agree with the code above.)
- * Then we build a Huffman coding tree for the observed counts.
- * Symbols which are not needed at all for the particular image are not
- * assigned any code, which saves space in the DHT marker as well as in
- * the compressed data.
- */
-
-
-/* Process a single block's worth of coefficients */
-
-LOCAL(void)
-htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
-		 long dc_counts[], long ac_counts[])
-{
-  register int temp;
-  register int nbits;
-  register int k, r;
-  int Se = cinfo->lim_Se;
-  const int * natural_order = cinfo->natural_order;
-  
-  /* Encode the DC coefficient difference per section F.1.2.1 */
-  
-  temp = block[0] - last_dc_val;
-  if (temp < 0)
-    temp = -temp;
-  
-  /* Find the number of bits needed for the magnitude of the coefficient */
-  nbits = 0;
-  while (temp) {
-    nbits++;
-    temp >>= 1;
-  }
-  /* Check for out-of-range coefficient values.
-   * Since we're encoding a difference, the range limit is twice as much.
-   */
-  if (nbits > MAX_COEF_BITS+1)
-    ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-  /* Count the Huffman symbol for the number of bits */
-  dc_counts[nbits]++;
-  
-  /* Encode the AC coefficients per section F.1.2.2 */
-  
-  r = 0;			/* r = run length of zeros */
-  
-  for (k = 1; k <= Se; k++) {
-    if ((temp = block[natural_order[k]]) == 0) {
-      r++;
-    } else {
-      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-      while (r > 15) {
-	ac_counts[0xF0]++;
-	r -= 16;
-      }
-      
-      /* Find the number of bits needed for the magnitude of the coefficient */
-      if (temp < 0)
-	temp = -temp;
-      
-      /* Find the number of bits needed for the magnitude of the coefficient */
-      nbits = 1;		/* there must be at least one 1 bit */
-      while ((temp >>= 1))
-	nbits++;
-      /* Check for out-of-range coefficient values */
-      if (nbits > MAX_COEF_BITS)
-	ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-      
-      /* Count Huffman symbol for run length / number of bits */
-      ac_counts[(r << 4) + nbits]++;
-      
-      r = 0;
-    }
-  }
-
-  /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0)
-    ac_counts[0]++;
-}
-
-
-/*
- * Trial-encode one MCU's worth of Huffman-compressed coefficients.
- * No data is actually output, so no suspension return is possible.
- */
-
-METHODDEF(boolean)
-encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int blkn, ci;
-  jpeg_component_info * compptr;
-
-  /* Take care of restart intervals if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      /* Re-initialize DC predictions to 0 */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++)
-	entropy->saved.last_dc_val[ci] = 0;
-      /* Update restart state */
-      entropy->restarts_to_go = cinfo->restart_interval;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-    htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
-		    entropy->dc_count_ptrs[compptr->dc_tbl_no],
-		    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
-    entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Generate the best Huffman code table for the given counts, fill htbl.
- *
- * The JPEG standard requires that no symbol be assigned a codeword of all
- * one bits (so that padding bits added at the end of a compressed segment
- * can't look like a valid code).  Because of the canonical ordering of
- * codewords, this just means that there must be an unused slot in the
- * longest codeword length category.  Section K.2 of the JPEG spec suggests
- * reserving such a slot by pretending that symbol 256 is a valid symbol
- * with count 1.  In theory that's not optimal; giving it count zero but
- * including it in the symbol set anyway should give a better Huffman code.
- * But the theoretically better code actually seems to come out worse in
- * practice, because it produces more all-ones bytes (which incur stuffed
- * zero bytes in the final file).  In any case the difference is tiny.
- *
- * The JPEG standard requires Huffman codes to be no more than 16 bits long.
- * If some symbols have a very small but nonzero probability, the Huffman tree
- * must be adjusted to meet the code length restriction.  We currently use
- * the adjustment method suggested in JPEG section K.2.  This method is *not*
- * optimal; it may not choose the best possible limited-length code.  But
- * typically only very-low-frequency symbols will be given less-than-optimal
- * lengths, so the code is almost optimal.  Experimental comparisons against
- * an optimal limited-length-code algorithm indicate that the difference is
- * microscopic --- usually less than a hundredth of a percent of total size.
- * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
- */
-
-LOCAL(void)
-jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
-{
-#define MAX_CLEN 32		/* assumed maximum initial code length */
-  UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
-  int codesize[257];		/* codesize[k] = code length of symbol k */
-  int others[257];		/* next symbol in current branch of tree */
-  int c1, c2;
-  int p, i, j;
-  long v;
-
-  /* This algorithm is explained in section K.2 of the JPEG standard */
-
-  MEMZERO(bits, SIZEOF(bits));
-  MEMZERO(codesize, SIZEOF(codesize));
-  for (i = 0; i < 257; i++)
-    others[i] = -1;		/* init links to empty */
-  
-  freq[256] = 1;		/* make sure 256 has a nonzero count */
-  /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
-   * that no real symbol is given code-value of all ones, because 256
-   * will be placed last in the largest codeword category.
-   */
-
-  /* Huffman's basic algorithm to assign optimal code lengths to symbols */
-
-  for (;;) {
-    /* Find the smallest nonzero frequency, set c1 = its symbol */
-    /* In case of ties, take the larger symbol number */
-    c1 = -1;
-    v = 1000000000L;
-    for (i = 0; i <= 256; i++) {
-      if (freq[i] && freq[i] <= v) {
-	v = freq[i];
-	c1 = i;
-      }
-    }
-
-    /* Find the next smallest nonzero frequency, set c2 = its symbol */
-    /* In case of ties, take the larger symbol number */
-    c2 = -1;
-    v = 1000000000L;
-    for (i = 0; i <= 256; i++) {
-      if (freq[i] && freq[i] <= v && i != c1) {
-	v = freq[i];
-	c2 = i;
-      }
-    }
-
-    /* Done if we've merged everything into one frequency */
-    if (c2 < 0)
-      break;
-    
-    /* Else merge the two counts/trees */
-    freq[c1] += freq[c2];
-    freq[c2] = 0;
-
-    /* Increment the codesize of everything in c1's tree branch */
-    codesize[c1]++;
-    while (others[c1] >= 0) {
-      c1 = others[c1];
-      codesize[c1]++;
-    }
-    
-    others[c1] = c2;		/* chain c2 onto c1's tree branch */
-    
-    /* Increment the codesize of everything in c2's tree branch */
-    codesize[c2]++;
-    while (others[c2] >= 0) {
-      c2 = others[c2];
-      codesize[c2]++;
-    }
-  }
-
-  /* Now count the number of symbols of each code length */
-  for (i = 0; i <= 256; i++) {
-    if (codesize[i]) {
-      /* The JPEG standard seems to think that this can't happen, */
-      /* but I'm paranoid... */
-      if (codesize[i] > MAX_CLEN)
-	ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
-
-      bits[codesize[i]]++;
-    }
-  }
-
-  /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
-   * Huffman procedure assigned any such lengths, we must adjust the coding.
-   * Here is what the JPEG spec says about how this next bit works:
-   * Since symbols are paired for the longest Huffman code, the symbols are
-   * removed from this length category two at a time.  The prefix for the pair
-   * (which is one bit shorter) is allocated to one of the pair; then,
-   * skipping the BITS entry for that prefix length, a code word from the next
-   * shortest nonzero BITS entry is converted into a prefix for two code words
-   * one bit longer.
-   */
-  
-  for (i = MAX_CLEN; i > 16; i--) {
-    while (bits[i] > 0) {
-      j = i - 2;		/* find length of new prefix to be used */
-      while (bits[j] == 0)
-	j--;
-      
-      bits[i] -= 2;		/* remove two symbols */
-      bits[i-1]++;		/* one goes in this length */
-      bits[j+1] += 2;		/* two new symbols in this length */
-      bits[j]--;		/* symbol of this length is now a prefix */
-    }
-  }
-
-  /* Remove the count for the pseudo-symbol 256 from the largest codelength */
-  while (bits[i] == 0)		/* find largest codelength still in use */
-    i--;
-  bits[i]--;
-  
-  /* Return final symbol counts (only for lengths 0..16) */
-  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
-  
-  /* Return a list of the symbols sorted by code length */
-  /* It's not real clear to me why we don't need to consider the codelength
-   * changes made above, but the JPEG spec seems to think this works.
-   */
-  p = 0;
-  for (i = 1; i <= MAX_CLEN; i++) {
-    for (j = 0; j <= 255; j++) {
-      if (codesize[j] == i) {
-	htbl->huffval[p] = (UINT8) j;
-	p++;
-      }
-    }
-  }
-
-  /* Set sent_table FALSE so updated table will be written to JPEG file. */
-  htbl->sent_table = FALSE;
-}
-
-
-/*
- * Finish up a statistics-gathering pass and create the new Huffman tables.
- */
-
-METHODDEF(void)
-finish_pass_gather (j_compress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-  JHUFF_TBL **htblptr;
-  boolean did_dc[NUM_HUFF_TBLS];
-  boolean did_ac[NUM_HUFF_TBLS];
-
-  /* It's important not to apply jpeg_gen_optimal_table more than once
-   * per table, because it clobbers the input frequency counts!
-   */
-  if (cinfo->progressive_mode)
-    /* Flush out buffered data (all we care about is counting the EOB symbol) */
-    emit_eobrun(entropy);
-
-  MEMZERO(did_dc, SIZEOF(did_dc));
-  MEMZERO(did_ac, SIZEOF(did_ac));
-
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      tbl = compptr->dc_tbl_no;
-      if (! did_dc[tbl]) {
-	htblptr = & cinfo->dc_huff_tbl_ptrs[tbl];
-	if (*htblptr == NULL)
-	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[tbl]);
-	did_dc[tbl] = TRUE;
-      }
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      tbl = compptr->ac_tbl_no;
-      if (! did_ac[tbl]) {
-	htblptr = & cinfo->ac_huff_tbl_ptrs[tbl];
-	if (*htblptr == NULL)
-	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[tbl]);
-	did_ac[tbl] = TRUE;
-      }
-    }
-  }
-}
-
-
-/*
- * Initialize for a Huffman-compressed scan.
- * If gather_statistics is TRUE, we do not output anything during the scan,
- * just count the Huffman symbols used and generate Huffman code tables.
- */
-
-METHODDEF(void)
-start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-
-  if (gather_statistics)
-    entropy->pub.finish_pass = finish_pass_gather;
-  else
-    entropy->pub.finish_pass = finish_pass_huff;
-
-  if (cinfo->progressive_mode) {
-    entropy->cinfo = cinfo;
-    entropy->gather_statistics = gather_statistics;
-
-    /* We assume jcmaster.c already validated the scan parameters. */
-
-    /* Select execution routine */
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_first;
-      else
-	entropy->pub.encode_mcu = encode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_refine;
-      else {
-	entropy->pub.encode_mcu = encode_mcu_AC_refine;
-	/* AC refinement needs a correction bit buffer */
-	if (entropy->bit_buffer == NULL)
-	  entropy->bit_buffer = (char *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					MAX_CORR_BITS * SIZEOF(char));
-      }
-    }
-
-    /* Initialize AC stuff */
-    entropy->ac_tbl_no = cinfo->cur_comp_info[0]->ac_tbl_no;
-    entropy->EOBRUN = 0;
-    entropy->BE = 0;
-  } else {
-    if (gather_statistics)
-      entropy->pub.encode_mcu = encode_mcu_gather;
-    else
-      entropy->pub.encode_mcu = encode_mcu_huff;
-  }
-
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      tbl = compptr->dc_tbl_no;
-      if (gather_statistics) {
-	/* Check for invalid table index */
-	/* (make_c_derived_tbl does this in the other path) */
-	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
-	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
-	/* Allocate and zero the statistics tables */
-	/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
-	if (entropy->dc_count_ptrs[tbl] == NULL)
-	  entropy->dc_count_ptrs[tbl] = (long *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					257 * SIZEOF(long));
-	MEMZERO(entropy->dc_count_ptrs[tbl], 257 * SIZEOF(long));
-      } else {
-	/* Compute derived values for Huffman tables */
-	/* We may do this more than once for a table, but it's not expensive */
-	jpeg_make_c_derived_tbl(cinfo, TRUE, tbl,
-				& entropy->dc_derived_tbls[tbl]);
-      }
-      /* Initialize DC predictions to 0 */
-      entropy->saved.last_dc_val[ci] = 0;
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      tbl = compptr->ac_tbl_no;
-      if (gather_statistics) {
-	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
-	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
-	if (entropy->ac_count_ptrs[tbl] == NULL)
-	  entropy->ac_count_ptrs[tbl] = (long *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					257 * SIZEOF(long));
-	MEMZERO(entropy->ac_count_ptrs[tbl], 257 * SIZEOF(long));
-      } else {
-	jpeg_make_c_derived_tbl(cinfo, FALSE, tbl,
-				& entropy->ac_derived_tbls[tbl]);
-      }
-    }
-  }
-
-  /* Initialize bit buffer to empty */
-  entropy->saved.put_buffer = 0;
-  entropy->saved.put_bits = 0;
-
-  /* Initialize restart stuff */
-  entropy->restarts_to_go = cinfo->restart_interval;
-  entropy->next_restart_num = 0;
-}
-
-
-/*
- * Module initialization routine for Huffman entropy encoding.
- */
-
-GLOBAL(void)
-jinit_huff_encoder (j_compress_ptr cinfo)
-{
-  huff_entropy_ptr entropy;
-  int i;
-
-  entropy = (huff_entropy_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(huff_entropy_encoder));
-  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
-  entropy->pub.start_pass = start_pass_huff;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
-    entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
-  }
-
-  if (cinfo->progressive_mode)
-    entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
-}
+/*
+ * jchuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2006-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy encoding routines.
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Much of the complexity here has to do with supporting output suspension.
+ * If the data destination module demands suspension, we want to be able to
+ * back up to the start of the current MCU.  To do this, we copy state
+ * variables into local working storage, and update them back to the
+ * permanent JPEG objects only upon successful completion of an MCU.
+ *
+ * We do not support output suspension for the progressive JPEG mode, since
+ * the library currently does not allow multiple-scan files to be written
+ * with output suspension.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* The legal range of a DCT coefficient is
+ *  -1024 .. +1023  for 8-bit data;
+ * -16384 .. +16383 for 12-bit data.
+ * Hence the magnitude should always fit in 10 or 14 bits respectively.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MAX_COEF_BITS 10
+#else
+#define MAX_COEF_BITS 14
+#endif
+
+/* Derived data constructed for each Huffman table */
+
+typedef struct {
+  unsigned int ehufco[256];	/* code for each symbol */
+  char ehufsi[256];		/* length of code for each symbol */
+  /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
+} c_derived_tbl;
+
+
+/* Expanded entropy encoder object for Huffman encoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  INT32 put_buffer;		/* current bit-accumulation buffer */
+  int put_bits;			/* # of bits now in it */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).put_buffer = (src).put_buffer, \
+	 (dest).put_bits = (src).put_bits, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  savable_state saved;		/* Bit buffer & DC state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  int next_restart_num;		/* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+  /* Statistics tables for optimization */
+  long * dc_count_ptrs[NUM_HUFF_TBLS];
+  long * ac_count_ptrs[NUM_HUFF_TBLS];
+
+  /* Following fields used only in progressive mode */
+
+  /* Mode flag: TRUE for optimization, FALSE for actual data output */
+  boolean gather_statistics;
+
+  /* next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
+   */
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
+
+  /* Coding status for AC components */
+  int ac_tbl_no;		/* the table number of the single component */
+  unsigned int EOBRUN;		/* run length of EOBs */
+  unsigned int BE;		/* # of buffered correction bits before MCU */
+  char * bit_buffer;		/* buffer for correction bits (1 per char) */
+  /* packing correction bits tightly would save some space but cost time... */
+} huff_entropy_encoder;
+
+typedef huff_entropy_encoder * huff_entropy_ptr;
+
+/* Working state while writing an MCU (sequential mode).
+ * This struct contains all the fields that are needed by subroutines.
+ */
+
+typedef struct {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  savable_state cur;		/* Current bit buffer & DC state */
+  j_compress_ptr cinfo;		/* dump_buffer needs access to this */
+} working_state;
+
+/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
+ * buffer can hold.  Larger sizes may slightly improve compression, but
+ * 1000 is already well into the realm of overkill.
+ * The minimum safe size is 64 bits.
+ */
+
+#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
+ * We assume that int right shift is unsigned if INT32 right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	int ishift_temp;
+#define IRIGHT_SHIFT(x,shft)  \
+	((ishift_temp = (x)) < 0 ? \
+	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+	 (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ */
+
+LOCAL(void)
+jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
+			 c_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  c_derived_tbl *dtbl;
+  int p, i, l, lastp, si, maxsymbol;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (c_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(c_derived_tbl));
+  dtbl = *pdtbl;
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  lastp = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+  
+  /* Figure C.3: generate encoding tables */
+  /* These are code and size indexed by symbol value */
+
+  /* Set all codeless symbols to have code length 0;
+   * this lets us detect duplicate VAL entries here, and later
+   * allows emit_bits to detect any attempt to emit such symbols.
+   */
+  MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
+
+  /* This is also a convenient place to check for out-of-range
+   * and duplicated VAL entries.  We allow 0..255 for AC symbols
+   * but only 0..15 for DC.  (We could constrain them further
+   * based on data depth and mode, but this seems enough.)
+   */
+  maxsymbol = isDC ? 15 : 255;
+
+  for (p = 0; p < lastp; p++) {
+    i = htbl->huffval[p];
+    if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    dtbl->ehufco[i] = huffcode[p];
+    dtbl->ehufsi[i] = huffsize[p];
+  }
+}
+
+
+/* Outputting bytes to the file.
+ * NB: these must be called only when actually outputting,
+ * that is, entropy->gather_statistics == FALSE.
+ */
+
+/* Emit a byte, taking 'action' if must suspend. */
+#define emit_byte_s(state,val,action)  \
+	{ *(state)->next_output_byte++ = (JOCTET) (val);  \
+	  if (--(state)->free_in_buffer == 0)  \
+	    if (! dump_buffer_s(state))  \
+	      { action; } }
+
+/* Emit a byte */
+#define emit_byte_e(entropy,val)  \
+	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
+	  if (--(entropy)->free_in_buffer == 0)  \
+	    dump_buffer_e(entropy); }
+
+
+LOCAL(boolean)
+dump_buffer_s (working_state * state)
+/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
+{
+  struct jpeg_destination_mgr * dest = state->cinfo->dest;
+
+  if (! (*dest->empty_output_buffer) (state->cinfo))
+    return FALSE;
+  /* After a successful buffer dump, must reset buffer pointers */
+  state->next_output_byte = dest->next_output_byte;
+  state->free_in_buffer = dest->free_in_buffer;
+  return TRUE;
+}
+
+
+LOCAL(void)
+dump_buffer_e (huff_entropy_ptr entropy)
+/* Empty the output buffer; we do not support suspension in this case. */
+{
+  struct jpeg_destination_mgr * dest = entropy->cinfo->dest;
+
+  if (! (*dest->empty_output_buffer) (entropy->cinfo))
+    ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
+  /* After a successful buffer dump, must reset buffer pointers */
+  entropy->next_output_byte = dest->next_output_byte;
+  entropy->free_in_buffer = dest->free_in_buffer;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+ * left-justified in this part.  At most 16 bits can be passed to emit_bits
+ * in one call, and we never retain more than 7 bits in put_buffer
+ * between calls, so 24 bits are sufficient.
+ */
+
+INLINE
+LOCAL(boolean)
+emit_bits_s (working_state * state, unsigned int code, int size)
+/* Emit some bits; return TRUE if successful, FALSE if must suspend */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register INT32 put_buffer = (INT32) code;
+  register int put_bits = state->cur.put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
+
+  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+  
+  put_bits += size;		/* new number of bits in buffer */
+  
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
+  
+  while (put_bits >= 8) {
+    int c = (int) ((put_buffer >> 16) & 0xFF);
+    
+    emit_byte_s(state, c, return FALSE);
+    if (c == 0xFF) {		/* need to stuff a zero byte? */
+      emit_byte_s(state, 0, return FALSE);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  state->cur.put_buffer = put_buffer; /* update state variables */
+  state->cur.put_bits = put_bits;
+
+  return TRUE;
+}
+
+
+INLINE
+LOCAL(void)
+emit_bits_e (huff_entropy_ptr entropy, unsigned int code, int size)
+/* Emit some bits, unless we are in gather mode */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register INT32 put_buffer = (INT32) code;
+  register int put_bits = entropy->saved.put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+  if (entropy->gather_statistics)
+    return;			/* do nothing if we're only getting stats */
+
+  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+  
+  put_bits += size;		/* new number of bits in buffer */
+
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  /* and merge with old buffer contents */
+  put_buffer |= entropy->saved.put_buffer;
+
+  while (put_bits >= 8) {
+    int c = (int) ((put_buffer >> 16) & 0xFF);
+
+    emit_byte_e(entropy, c);
+    if (c == 0xFF) {		/* need to stuff a zero byte? */
+      emit_byte_e(entropy, 0);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  entropy->saved.put_buffer = put_buffer; /* update variables */
+  entropy->saved.put_bits = put_bits;
+}
+
+
+LOCAL(boolean)
+flush_bits_s (working_state * state)
+{
+  if (! emit_bits_s(state, 0x7F, 7)) /* fill any partial byte with ones */
+    return FALSE;
+  state->cur.put_buffer = 0;	     /* and reset bit-buffer to empty */
+  state->cur.put_bits = 0;
+  return TRUE;
+}
+
+
+LOCAL(void)
+flush_bits_e (huff_entropy_ptr entropy)
+{
+  emit_bits_e(entropy, 0x7F, 7); /* fill any partial byte with ones */
+  entropy->saved.put_buffer = 0; /* and reset bit-buffer to empty */
+  entropy->saved.put_bits = 0;
+}
+
+
+/*
+ * Emit (or just count) a Huffman symbol.
+ */
+
+INLINE
+LOCAL(void)
+emit_dc_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
+{
+  if (entropy->gather_statistics)
+    entropy->dc_count_ptrs[tbl_no][symbol]++;
+  else {
+    c_derived_tbl * tbl = entropy->dc_derived_tbls[tbl_no];
+    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
+  }
+}
+
+
+INLINE
+LOCAL(void)
+emit_ac_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
+{
+  if (entropy->gather_statistics)
+    entropy->ac_count_ptrs[tbl_no][symbol]++;
+  else {
+    c_derived_tbl * tbl = entropy->ac_derived_tbls[tbl_no];
+    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
+  }
+}
+
+
+/*
+ * Emit bits from a correction bit buffer.
+ */
+
+LOCAL(void)
+emit_buffered_bits (huff_entropy_ptr entropy, char * bufstart,
+		    unsigned int nbits)
+{
+  if (entropy->gather_statistics)
+    return;			/* no real work */
+
+  while (nbits > 0) {
+    emit_bits_e(entropy, (unsigned int) (*bufstart), 1);
+    bufstart++;
+    nbits--;
+  }
+}
+
+
+/*
+ * Emit any pending EOBRUN symbol.
+ */
+
+LOCAL(void)
+emit_eobrun (huff_entropy_ptr entropy)
+{
+  register int temp, nbits;
+
+  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
+    temp = entropy->EOBRUN;
+    nbits = 0;
+    while ((temp >>= 1))
+      nbits++;
+    /* safety check: shouldn't happen given limited correction-bit buffer */
+    if (nbits > 14)
+      ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+    emit_ac_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
+    if (nbits)
+      emit_bits_e(entropy, entropy->EOBRUN, nbits);
+
+    entropy->EOBRUN = 0;
+
+    /* Emit any buffered correction bits */
+    emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(boolean)
+emit_restart_s (working_state * state, int restart_num)
+{
+  int ci;
+
+  if (! flush_bits_s(state))
+    return FALSE;
+
+  emit_byte_s(state, 0xFF, return FALSE);
+  emit_byte_s(state, JPEG_RST0 + restart_num, return FALSE);
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
+    state->cur.last_dc_val[ci] = 0;
+
+  /* The restart counter is not updated until we successfully write the MCU. */
+
+  return TRUE;
+}
+
+
+LOCAL(void)
+emit_restart_e (huff_entropy_ptr entropy, int restart_num)
+{
+  int ci;
+
+  emit_eobrun(entropy);
+
+  if (! entropy->gather_statistics) {
+    flush_bits_e(entropy);
+    emit_byte_e(entropy, 0xFF);
+    emit_byte_e(entropy, JPEG_RST0 + restart_num);
+  }
+
+  if (entropy->cinfo->Ss == 0) {
+    /* Re-initialize DC predictions to 0 */
+    for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
+      entropy->saved.last_dc_val[ci] = 0;
+  } else {
+    /* Re-initialize all AC-related fields to 0 */
+    entropy->EOBRUN = 0;
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int temp, temp2;
+  register int nbits;
+  int blkn, ci;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+  jpeg_component_info * compptr;
+  ISHIFT_TEMPS
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart_e(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al);
+
+    /* DC differences are figured on the point-transformed values. */
+    temp = temp2 - entropy->saved.last_dc_val[ci];
+    entropy->saved.last_dc_val[ci] = temp2;
+
+    /* Encode the DC coefficient difference per section G.1.2.1 */
+    temp2 = temp;
+    if (temp < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      /* For a negative input, want temp2 = bitwise complement of abs(input) */
+      /* This code assumes we are on a two's complement machine */
+      temp2--;
+    }
+    
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    while (temp) {
+      nbits++;
+      temp >>= 1;
+    }
+    /* Check for out-of-range coefficient values.
+     * Since we're encoding a difference, the range limit is twice as much.
+     */
+    if (nbits > MAX_COEF_BITS+1)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+    
+    /* Count/emit the Huffman-coded symbol for the number of bits */
+    emit_dc_symbol(entropy, compptr->dc_tbl_no, nbits);
+    
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    if (nbits)			/* emit_bits rejects calls with size 0 */
+      emit_bits_e(entropy, (unsigned int) temp2, nbits);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int temp, temp2;
+  register int nbits;
+  register int r, k;
+  int Se, Al;
+  const int * natural_order;
+  JBLOCKROW block;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart_e(entropy, entropy->next_restart_num);
+
+  Se = cinfo->Se;
+  Al = cinfo->Al;
+  natural_order = cinfo->natural_order;
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
+  
+  r = 0;			/* r = run length of zeros */
+   
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = (*block)[natural_order[k]]) == 0) {
+      r++;
+      continue;
+    }
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value; so the code is
+     * interwoven with finding the abs value (temp) and output bits (temp2).
+     */
+    if (temp < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      temp >>= Al;		/* apply the point transform */
+      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
+      temp2 = ~temp;
+    } else {
+      temp >>= Al;		/* apply the point transform */
+      temp2 = temp;
+    }
+    /* Watch out for case that nonzero coef is zero after point transform */
+    if (temp == 0) {
+      r++;
+      continue;
+    }
+
+    /* Emit any pending EOBRUN */
+    if (entropy->EOBRUN > 0)
+      emit_eobrun(entropy);
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+    while (r > 15) {
+      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+    }
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 1;			/* there must be at least one 1 bit */
+    while ((temp >>= 1))
+      nbits++;
+    /* Check for out-of-range coefficient values */
+    if (nbits > MAX_COEF_BITS)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits);
+
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    emit_bits_e(entropy, (unsigned int) temp2, nbits);
+
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0) {			/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    if (entropy->EOBRUN == 0x7FFF)
+      emit_eobrun(entropy);	/* force it out to avoid overflow */
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int temp;
+  int blkn;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart_e(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    temp = (*block)[0];
+    emit_bits_e(entropy, (unsigned int) (temp >> Al), 1);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int temp;
+  register int r, k;
+  int EOB;
+  char *BR_buffer;
+  unsigned int BR;
+  int Se, Al;
+  const int * natural_order;
+  JBLOCKROW block;
+  int absvalues[DCTSIZE2];
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart_e(entropy, entropy->next_restart_num);
+
+  Se = cinfo->Se;
+  Al = cinfo->Al;
+  natural_order = cinfo->natural_order;
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* It is convenient to make a pre-pass to determine the transformed
+   * coefficients' absolute values and the EOB position.
+   */
+  EOB = 0;
+  for (k = cinfo->Ss; k <= Se; k++) {
+    temp = (*block)[natural_order[k]];
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if (temp < 0)
+      temp = -temp;		/* temp is abs value of input */
+    temp >>= Al;		/* apply the point transform */
+    absvalues[k] = temp;	/* save abs value for main pass */
+    if (temp == 1)
+      EOB = k;			/* EOB = index of last newly-nonzero coef */
+  }
+
+  /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
+  
+  r = 0;			/* r = run length of zeros */
+  BR = 0;			/* BR = count of buffered bits added now */
+  BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
+
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = absvalues[k]) == 0) {
+      r++;
+      continue;
+    }
+
+    /* Emit any required ZRLs, but not if they can be folded into EOB */
+    while (r > 15 && k <= EOB) {
+      /* emit any pending EOBRUN and the BE correction bits */
+      emit_eobrun(entropy);
+      /* Emit ZRL */
+      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+      /* Emit buffered correction bits that must be associated with ZRL */
+      emit_buffered_bits(entropy, BR_buffer, BR);
+      BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+      BR = 0;
+    }
+
+    /* If the coef was previously nonzero, it only needs a correction bit.
+     * NOTE: a straight translation of the spec's figure G.7 would suggest
+     * that we also need to test r > 15.  But if r > 15, we can only get here
+     * if k > EOB, which implies that this coefficient is not 1.
+     */
+    if (temp > 1) {
+      /* The correction bit is the next bit of the absolute value. */
+      BR_buffer[BR++] = (char) (temp & 1);
+      continue;
+    }
+
+    /* Emit any pending EOBRUN and the BE correction bits */
+    emit_eobrun(entropy);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1);
+
+    /* Emit output bit for newly-nonzero coef */
+    temp = ((*block)[natural_order[k]] < 0) ? 0 : 1;
+    emit_bits_e(entropy, (unsigned int) temp, 1);
+
+    /* Emit buffered correction bits that must be associated with this code */
+    emit_buffered_bits(entropy, BR_buffer, BR);
+    BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+    BR = 0;
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    entropy->BE += BR;		/* concat my correction bits to older ones */
+    /* We force out the EOB if we risk either:
+     * 1. overflow of the EOB counter;
+     * 2. overflow of the correction bit buffer during the next MCU.
+     */
+    if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1))
+      emit_eobrun(entropy);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/* Encode a single block's worth of coefficients */
+
+LOCAL(boolean)
+encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
+		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
+{
+  register int temp, temp2;
+  register int nbits;
+  register int k, r, i;
+  int Se = state->cinfo->lim_Se;
+  const int * natural_order = state->cinfo->natural_order;
+
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+
+  temp = temp2 = block[0] - last_dc_val;
+
+  if (temp < 0) {
+    temp = -temp;		/* temp is abs value of input */
+    /* For a negative input, want temp2 = bitwise complement of abs(input) */
+    /* This code assumes we are on a two's complement machine */
+    temp2--;
+  }
+
+  /* Find the number of bits needed for the magnitude of the coefficient */
+  nbits = 0;
+  while (temp) {
+    nbits++;
+    temp >>= 1;
+  }
+  /* Check for out-of-range coefficient values.
+   * Since we're encoding a difference, the range limit is twice as much.
+   */
+  if (nbits > MAX_COEF_BITS+1)
+    ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+
+  /* Emit the Huffman-coded symbol for the number of bits */
+  if (! emit_bits_s(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
+    return FALSE;
+
+  /* Emit that number of bits of the value, if positive, */
+  /* or the complement of its magnitude, if negative. */
+  if (nbits)			/* emit_bits rejects calls with size 0 */
+    if (! emit_bits_s(state, (unsigned int) temp2, nbits))
+      return FALSE;
+
+  /* Encode the AC coefficients per section F.1.2.2 */
+
+  r = 0;			/* r = run length of zeros */
+
+  for (k = 1; k <= Se; k++) {
+    if ((temp = block[natural_order[k]]) == 0) {
+      r++;
+    } else {
+      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+      while (r > 15) {
+	if (! emit_bits_s(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
+	  return FALSE;
+	r -= 16;
+      }
+
+      temp2 = temp;
+      if (temp < 0) {
+	temp = -temp;		/* temp is abs value of input */
+	/* This code assumes we are on a two's complement machine */
+	temp2--;
+      }
+
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      nbits = 1;		/* there must be at least one 1 bit */
+      while ((temp >>= 1))
+	nbits++;
+      /* Check for out-of-range coefficient values */
+      if (nbits > MAX_COEF_BITS)
+	ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+
+      /* Emit Huffman symbol for run length / number of bits */
+      i = (r << 4) + nbits;
+      if (! emit_bits_s(state, actbl->ehufco[i], actbl->ehufsi[i]))
+	return FALSE;
+
+      /* Emit that number of bits of the value, if positive, */
+      /* or the complement of its magnitude, if negative. */
+      if (! emit_bits_s(state, (unsigned int) temp2, nbits))
+	return FALSE;
+
+      r = 0;
+    }
+  }
+
+  /* If the last coef(s) were zero, emit an end-of-block code */
+  if (r > 0)
+    if (! emit_bits_s(state, actbl->ehufco[0], actbl->ehufsi[0]))
+      return FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output one MCU's worth of Huffman-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  working_state state;
+  int blkn, ci;
+  jpeg_component_info * compptr;
+
+  /* Load up working state */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cinfo = cinfo;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! emit_restart_s(&state, entropy->next_restart_num))
+	return FALSE;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    if (! encode_one_block(&state,
+			   MCU_data[blkn][0], state.cur.last_dc_val[ci],
+			   entropy->dc_derived_tbls[compptr->dc_tbl_no],
+			   entropy->ac_derived_tbls[compptr->ac_tbl_no]))
+      return FALSE;
+    /* Update last_dc_val */
+    state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
+  }
+
+  /* Completed MCU, so update state */
+  cinfo->dest->next_output_byte = state.next_output_byte;
+  cinfo->dest->free_in_buffer = state.free_in_buffer;
+  ASSIGN_STATE(entropy->saved, state.cur);
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  working_state state;
+
+  if (cinfo->progressive_mode) {
+    entropy->next_output_byte = cinfo->dest->next_output_byte;
+    entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+    /* Flush out any buffered data */
+    emit_eobrun(entropy);
+    flush_bits_e(entropy);
+
+    cinfo->dest->next_output_byte = entropy->next_output_byte;
+    cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+  } else {
+    /* Load up working state ... flush_bits needs it */
+    state.next_output_byte = cinfo->dest->next_output_byte;
+    state.free_in_buffer = cinfo->dest->free_in_buffer;
+    ASSIGN_STATE(state.cur, entropy->saved);
+    state.cinfo = cinfo;
+
+    /* Flush out the last data */
+    if (! flush_bits_s(&state))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+    /* Update state */
+    cinfo->dest->next_output_byte = state.next_output_byte;
+    cinfo->dest->free_in_buffer = state.free_in_buffer;
+    ASSIGN_STATE(entropy->saved, state.cur);
+  }
+}
+
+
+/*
+ * Huffman coding optimization.
+ *
+ * We first scan the supplied data and count the number of uses of each symbol
+ * that is to be Huffman-coded. (This process MUST agree with the code above.)
+ * Then we build a Huffman coding tree for the observed counts.
+ * Symbols which are not needed at all for the particular image are not
+ * assigned any code, which saves space in the DHT marker as well as in
+ * the compressed data.
+ */
+
+
+/* Process a single block's worth of coefficients */
+
+LOCAL(void)
+htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
+		 long dc_counts[], long ac_counts[])
+{
+  register int temp;
+  register int nbits;
+  register int k, r;
+  int Se = cinfo->lim_Se;
+  const int * natural_order = cinfo->natural_order;
+  
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+  
+  temp = block[0] - last_dc_val;
+  if (temp < 0)
+    temp = -temp;
+  
+  /* Find the number of bits needed for the magnitude of the coefficient */
+  nbits = 0;
+  while (temp) {
+    nbits++;
+    temp >>= 1;
+  }
+  /* Check for out-of-range coefficient values.
+   * Since we're encoding a difference, the range limit is twice as much.
+   */
+  if (nbits > MAX_COEF_BITS+1)
+    ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+  /* Count the Huffman symbol for the number of bits */
+  dc_counts[nbits]++;
+  
+  /* Encode the AC coefficients per section F.1.2.2 */
+  
+  r = 0;			/* r = run length of zeros */
+  
+  for (k = 1; k <= Se; k++) {
+    if ((temp = block[natural_order[k]]) == 0) {
+      r++;
+    } else {
+      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+      while (r > 15) {
+	ac_counts[0xF0]++;
+	r -= 16;
+      }
+      
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      if (temp < 0)
+	temp = -temp;
+      
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      nbits = 1;		/* there must be at least one 1 bit */
+      while ((temp >>= 1))
+	nbits++;
+      /* Check for out-of-range coefficient values */
+      if (nbits > MAX_COEF_BITS)
+	ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+      
+      /* Count Huffman symbol for run length / number of bits */
+      ac_counts[(r << 4) + nbits]++;
+      
+      r = 0;
+    }
+  }
+
+  /* If the last coef(s) were zero, emit an end-of-block code */
+  if (r > 0)
+    ac_counts[0]++;
+}
+
+
+/*
+ * Trial-encode one MCU's worth of Huffman-compressed coefficients.
+ * No data is actually output, so no suspension return is possible.
+ */
+
+METHODDEF(boolean)
+encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn, ci;
+  jpeg_component_info * compptr;
+
+  /* Take care of restart intervals if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      /* Re-initialize DC predictions to 0 */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+	entropy->saved.last_dc_val[ci] = 0;
+      /* Update restart state */
+      entropy->restarts_to_go = cinfo->restart_interval;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
+		    entropy->dc_count_ptrs[compptr->dc_tbl_no],
+		    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
+    entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Generate the best Huffman code table for the given counts, fill htbl.
+ *
+ * The JPEG standard requires that no symbol be assigned a codeword of all
+ * one bits (so that padding bits added at the end of a compressed segment
+ * can't look like a valid code).  Because of the canonical ordering of
+ * codewords, this just means that there must be an unused slot in the
+ * longest codeword length category.  Section K.2 of the JPEG spec suggests
+ * reserving such a slot by pretending that symbol 256 is a valid symbol
+ * with count 1.  In theory that's not optimal; giving it count zero but
+ * including it in the symbol set anyway should give a better Huffman code.
+ * But the theoretically better code actually seems to come out worse in
+ * practice, because it produces more all-ones bytes (which incur stuffed
+ * zero bytes in the final file).  In any case the difference is tiny.
+ *
+ * The JPEG standard requires Huffman codes to be no more than 16 bits long.
+ * If some symbols have a very small but nonzero probability, the Huffman tree
+ * must be adjusted to meet the code length restriction.  We currently use
+ * the adjustment method suggested in JPEG section K.2.  This method is *not*
+ * optimal; it may not choose the best possible limited-length code.  But
+ * typically only very-low-frequency symbols will be given less-than-optimal
+ * lengths, so the code is almost optimal.  Experimental comparisons against
+ * an optimal limited-length-code algorithm indicate that the difference is
+ * microscopic --- usually less than a hundredth of a percent of total size.
+ * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
+ */
+
+LOCAL(void)
+jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
+{
+#define MAX_CLEN 32		/* assumed maximum initial code length */
+  UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
+  int codesize[257];		/* codesize[k] = code length of symbol k */
+  int others[257];		/* next symbol in current branch of tree */
+  int c1, c2;
+  int p, i, j;
+  long v;
+
+  /* This algorithm is explained in section K.2 of the JPEG standard */
+
+  MEMZERO(bits, SIZEOF(bits));
+  MEMZERO(codesize, SIZEOF(codesize));
+  for (i = 0; i < 257; i++)
+    others[i] = -1;		/* init links to empty */
+  
+  freq[256] = 1;		/* make sure 256 has a nonzero count */
+  /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
+   * that no real symbol is given code-value of all ones, because 256
+   * will be placed last in the largest codeword category.
+   */
+
+  /* Huffman's basic algorithm to assign optimal code lengths to symbols */
+
+  for (;;) {
+    /* Find the smallest nonzero frequency, set c1 = its symbol */
+    /* In case of ties, take the larger symbol number */
+    c1 = -1;
+    v = 1000000000L;
+    for (i = 0; i <= 256; i++) {
+      if (freq[i] && freq[i] <= v) {
+	v = freq[i];
+	c1 = i;
+      }
+    }
+
+    /* Find the next smallest nonzero frequency, set c2 = its symbol */
+    /* In case of ties, take the larger symbol number */
+    c2 = -1;
+    v = 1000000000L;
+    for (i = 0; i <= 256; i++) {
+      if (freq[i] && freq[i] <= v && i != c1) {
+	v = freq[i];
+	c2 = i;
+      }
+    }
+
+    /* Done if we've merged everything into one frequency */
+    if (c2 < 0)
+      break;
+    
+    /* Else merge the two counts/trees */
+    freq[c1] += freq[c2];
+    freq[c2] = 0;
+
+    /* Increment the codesize of everything in c1's tree branch */
+    codesize[c1]++;
+    while (others[c1] >= 0) {
+      c1 = others[c1];
+      codesize[c1]++;
+    }
+    
+    others[c1] = c2;		/* chain c2 onto c1's tree branch */
+    
+    /* Increment the codesize of everything in c2's tree branch */
+    codesize[c2]++;
+    while (others[c2] >= 0) {
+      c2 = others[c2];
+      codesize[c2]++;
+    }
+  }
+
+  /* Now count the number of symbols of each code length */
+  for (i = 0; i <= 256; i++) {
+    if (codesize[i]) {
+      /* The JPEG standard seems to think that this can't happen, */
+      /* but I'm paranoid... */
+      if (codesize[i] > MAX_CLEN)
+	ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
+
+      bits[codesize[i]]++;
+    }
+  }
+
+  /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
+   * Huffman procedure assigned any such lengths, we must adjust the coding.
+   * Here is what the JPEG spec says about how this next bit works:
+   * Since symbols are paired for the longest Huffman code, the symbols are
+   * removed from this length category two at a time.  The prefix for the pair
+   * (which is one bit shorter) is allocated to one of the pair; then,
+   * skipping the BITS entry for that prefix length, a code word from the next
+   * shortest nonzero BITS entry is converted into a prefix for two code words
+   * one bit longer.
+   */
+  
+  for (i = MAX_CLEN; i > 16; i--) {
+    while (bits[i] > 0) {
+      j = i - 2;		/* find length of new prefix to be used */
+      while (bits[j] == 0)
+	j--;
+      
+      bits[i] -= 2;		/* remove two symbols */
+      bits[i-1]++;		/* one goes in this length */
+      bits[j+1] += 2;		/* two new symbols in this length */
+      bits[j]--;		/* symbol of this length is now a prefix */
+    }
+  }
+
+  /* Remove the count for the pseudo-symbol 256 from the largest codelength */
+  while (bits[i] == 0)		/* find largest codelength still in use */
+    i--;
+  bits[i]--;
+  
+  /* Return final symbol counts (only for lengths 0..16) */
+  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
+  
+  /* Return a list of the symbols sorted by code length */
+  /* It's not real clear to me why we don't need to consider the codelength
+   * changes made above, but the JPEG spec seems to think this works.
+   */
+  p = 0;
+  for (i = 1; i <= MAX_CLEN; i++) {
+    for (j = 0; j <= 255; j++) {
+      if (codesize[j] == i) {
+	htbl->huffval[p] = (UINT8) j;
+	p++;
+      }
+    }
+  }
+
+  /* Set sent_table FALSE so updated table will be written to JPEG file. */
+  htbl->sent_table = FALSE;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+  JHUFF_TBL **htblptr;
+  boolean did_dc[NUM_HUFF_TBLS];
+  boolean did_ac[NUM_HUFF_TBLS];
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  if (cinfo->progressive_mode)
+    /* Flush out buffered data (all we care about is counting the EOB symbol) */
+    emit_eobrun(entropy);
+
+  MEMZERO(did_dc, SIZEOF(did_dc));
+  MEMZERO(did_ac, SIZEOF(did_ac));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
+      tbl = compptr->dc_tbl_no;
+      if (! did_dc[tbl]) {
+	htblptr = & cinfo->dc_huff_tbl_ptrs[tbl];
+	if (*htblptr == NULL)
+	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[tbl]);
+	did_dc[tbl] = TRUE;
+      }
+    }
+    /* AC needs no table when not present */
+    if (cinfo->Se) {
+      tbl = compptr->ac_tbl_no;
+      if (! did_ac[tbl]) {
+	htblptr = & cinfo->ac_huff_tbl_ptrs[tbl];
+	if (*htblptr == NULL)
+	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[tbl]);
+	did_ac[tbl] = TRUE;
+      }
+    }
+  }
+}
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ * If gather_statistics is TRUE, we do not output anything during the scan,
+ * just count the Huffman symbols used and generate Huffman code tables.
+ */
+
+METHODDEF(void)
+start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  if (gather_statistics)
+    entropy->pub.finish_pass = finish_pass_gather;
+  else
+    entropy->pub.finish_pass = finish_pass_huff;
+
+  if (cinfo->progressive_mode) {
+    entropy->cinfo = cinfo;
+    entropy->gather_statistics = gather_statistics;
+
+    /* We assume jcmaster.c already validated the scan parameters. */
+
+    /* Select execution routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.encode_mcu = encode_mcu_DC_first;
+      else
+	entropy->pub.encode_mcu = encode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.encode_mcu = encode_mcu_DC_refine;
+      else {
+	entropy->pub.encode_mcu = encode_mcu_AC_refine;
+	/* AC refinement needs a correction bit buffer */
+	if (entropy->bit_buffer == NULL)
+	  entropy->bit_buffer = (char *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					MAX_CORR_BITS * SIZEOF(char));
+      }
+    }
+
+    /* Initialize AC stuff */
+    entropy->ac_tbl_no = cinfo->cur_comp_info[0]->ac_tbl_no;
+    entropy->EOBRUN = 0;
+    entropy->BE = 0;
+  } else {
+    if (gather_statistics)
+      entropy->pub.encode_mcu = encode_mcu_gather;
+    else
+      entropy->pub.encode_mcu = encode_mcu_huff;
+  }
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
+      tbl = compptr->dc_tbl_no;
+      if (gather_statistics) {
+	/* Check for invalid table index */
+	/* (make_c_derived_tbl does this in the other path) */
+	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
+	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
+	/* Allocate and zero the statistics tables */
+	/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+	if (entropy->dc_count_ptrs[tbl] == NULL)
+	  entropy->dc_count_ptrs[tbl] = (long *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					257 * SIZEOF(long));
+	MEMZERO(entropy->dc_count_ptrs[tbl], 257 * SIZEOF(long));
+      } else {
+	/* Compute derived values for Huffman tables */
+	/* We may do this more than once for a table, but it's not expensive */
+	jpeg_make_c_derived_tbl(cinfo, TRUE, tbl,
+				& entropy->dc_derived_tbls[tbl]);
+      }
+      /* Initialize DC predictions to 0 */
+      entropy->saved.last_dc_val[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->Se) {
+      tbl = compptr->ac_tbl_no;
+      if (gather_statistics) {
+	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
+	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
+	if (entropy->ac_count_ptrs[tbl] == NULL)
+	  entropy->ac_count_ptrs[tbl] = (long *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					257 * SIZEOF(long));
+	MEMZERO(entropy->ac_count_ptrs[tbl], 257 * SIZEOF(long));
+      } else {
+	jpeg_make_c_derived_tbl(cinfo, FALSE, tbl,
+				& entropy->ac_derived_tbls[tbl]);
+      }
+    }
+  }
+
+  /* Initialize bit buffer to empty */
+  entropy->saved.put_buffer = 0;
+  entropy->saved.put_bits = 0;
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/*
+ * Module initialization routine for Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_huff_encoder (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(huff_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+  entropy->pub.start_pass = start_pass_huff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+    entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
+  }
+
+  if (cinfo->progressive_mode)
+    entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jcinit.c b/plugins/AdvaImg/src/LibJPEG/jcinit.c
index 0ba310f217..f7aa89fdb3 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcinit.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcinit.c
@@ -1,65 +1,65 @@
-/*
- * jcinit.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains initialization logic for the JPEG compressor.
- * This routine is in charge of selecting the modules to be executed and
- * making an initialization call to each one.
- *
- * Logically, this code belongs in jcmaster.c.  It's split out because
- * linking this routine implies linking the entire compression library.
- * For a transcoding-only application, we want to be able to use jcmaster.c
- * without linking in the whole library.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Master selection of compression modules.
- * This is done once at the start of processing an image.  We determine
- * which modules will be used and give them appropriate initialization calls.
- */
-
-GLOBAL(void)
-jinit_compress_master (j_compress_ptr cinfo)
-{
-  /* Initialize master control (includes parameter checking/processing) */
-  jinit_c_master_control(cinfo, FALSE /* full compression */);
-
-  /* Preprocessing */
-  if (! cinfo->raw_data_in) {
-    jinit_color_converter(cinfo);
-    jinit_downsampler(cinfo);
-    jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
-  }
-  /* Forward DCT */
-  jinit_forward_dct(cinfo);
-  /* Entropy encoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_encoder(cinfo);
-  else {
-    jinit_huff_encoder(cinfo);
-  }
-
-  /* Need a full-image coefficient buffer in any multi-pass mode. */
-  jinit_c_coef_controller(cinfo,
-		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
-  jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
-
-  jinit_marker_writer(cinfo);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Write the datastream header (SOI) immediately.
-   * Frame and scan headers are postponed till later.
-   * This lets application insert special markers after the SOI.
-   */
-  (*cinfo->marker->write_file_header) (cinfo);
-}
+/*
+ * jcinit.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains initialization logic for the JPEG compressor.
+ * This routine is in charge of selecting the modules to be executed and
+ * making an initialization call to each one.
+ *
+ * Logically, this code belongs in jcmaster.c.  It's split out because
+ * linking this routine implies linking the entire compression library.
+ * For a transcoding-only application, we want to be able to use jcmaster.c
+ * without linking in the whole library.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Master selection of compression modules.
+ * This is done once at the start of processing an image.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ */
+
+GLOBAL(void)
+jinit_compress_master (j_compress_ptr cinfo)
+{
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, FALSE /* full compression */);
+
+  /* Preprocessing */
+  if (! cinfo->raw_data_in) {
+    jinit_color_converter(cinfo);
+    jinit_downsampler(cinfo);
+    jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
+  }
+  /* Forward DCT */
+  jinit_forward_dct(cinfo);
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code)
+    jinit_arith_encoder(cinfo);
+  else {
+    jinit_huff_encoder(cinfo);
+  }
+
+  /* Need a full-image coefficient buffer in any multi-pass mode. */
+  jinit_c_coef_controller(cinfo,
+		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
+  jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jcmainct.c b/plugins/AdvaImg/src/LibJPEG/jcmainct.c
index 7de75d1675..29d53a22ea 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcmainct.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcmainct.c
@@ -1,293 +1,297 @@
-/*
- * jcmainct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the main buffer controller for compression.
- * The main buffer lies between the pre-processor and the JPEG
- * compressor proper; it holds downsampled data in the JPEG colorspace.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Note: currently, there is no operating mode in which a full-image buffer
- * is needed at this step.  If there were, that mode could not be used with
- * "raw data" input, since this module is bypassed in that case.  However,
- * we've left the code here for possible use in special applications.
- */
-#undef FULL_MAIN_BUFFER_SUPPORTED
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_main_controller pub; /* public fields */
-
-  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
-  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
-  boolean suspended;		/* remember if we suspended output */
-  J_BUF_MODE pass_mode;		/* current operating mode */
-
-  /* If using just a strip buffer, this points to the entire set of buffers
-   * (we allocate one for each component).  In the full-image case, this
-   * points to the currently accessible strips of the virtual arrays.
-   */
-  JSAMPARRAY buffer[MAX_COMPONENTS];
-
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-  /* If using full-image storage, this array holds pointers to virtual-array
-   * control blocks for each component.  Unused if not full-image storage.
-   */
-  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
-#endif
-} my_main_controller;
-
-typedef my_main_controller * my_main_ptr;
-
-
-/* Forward declarations */
-METHODDEF(void) process_data_simple_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-METHODDEF(void) process_data_buffer_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
-#endif
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-
-  /* Do nothing in raw-data mode. */
-  if (cinfo->raw_data_in)
-    return;
-
-  main->cur_iMCU_row = 0;	/* initialize counters */
-  main->rowgroup_ctr = 0;
-  main->suspended = FALSE;
-  main->pass_mode = pass_mode;	/* save mode for use by process_data */
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-    if (main->whole_image[0] != NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif
-    main->pub.process_data = process_data_simple_main;
-    break;
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-  case JBUF_SAVE_SOURCE:
-  case JBUF_CRANK_DEST:
-  case JBUF_SAVE_AND_PASS:
-    if (main->whole_image[0] == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    main->pub.process_data = process_data_buffer_main;
-    break;
-#endif
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    break;
-  }
-}
-
-
-/*
- * Process some data.
- * This routine handles the simple pass-through mode,
- * where we have only a strip buffer.
- */
-
-METHODDEF(void)
-process_data_simple_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-
-  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
-    /* Read input data if we haven't filled the main buffer yet */
-    if (main->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
-      (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					main->buffer, &main->rowgroup_ctr,
-					(JDIMENSION) cinfo->min_DCT_v_scaled_size);
-
-    /* If we don't have a full iMCU row buffered, return to application for
-     * more data.  Note that preprocessor will always pad to fill the iMCU row
-     * at the bottom of the image.
-     */
-    if (main->rowgroup_ctr != (JDIMENSION) cinfo->min_DCT_v_scaled_size)
-      return;
-
-    /* Send the completed row to the compressor */
-    if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
-      /* If compressor did not consume the whole row, then we must need to
-       * suspend processing and return to the application.  In this situation
-       * we pretend we didn't yet consume the last input row; otherwise, if
-       * it happened to be the last row of the image, the application would
-       * think we were done.
-       */
-      if (! main->suspended) {
-	(*in_row_ctr)--;
-	main->suspended = TRUE;
-      }
-      return;
-    }
-    /* We did finish the row.  Undo our little suspension hack if a previous
-     * call suspended; then mark the main buffer empty.
-     */
-    if (main->suspended) {
-      (*in_row_ctr)++;
-      main->suspended = FALSE;
-    }
-    main->rowgroup_ctr = 0;
-    main->cur_iMCU_row++;
-  }
-}
-
-
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-
-/*
- * Process some data.
- * This routine handles all of the modes that use a full-size buffer.
- */
-
-METHODDEF(void)
-process_data_buffer_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-  int ci;
-  jpeg_component_info *compptr;
-  boolean writing = (main->pass_mode != JBUF_CRANK_DEST);
-
-  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
-    /* Realign the virtual buffers if at the start of an iMCU row. */
-    if (main->rowgroup_ctr == 0) {
-      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	main->buffer[ci] = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, main->whole_image[ci],
-	   main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
-	   (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
-      }
-      /* In a read pass, pretend we just read some source data. */
-      if (! writing) {
-	*in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
-	main->rowgroup_ctr = DCTSIZE;
-      }
-    }
-
-    /* If a write pass, read input data until the current iMCU row is full. */
-    /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
-    if (writing) {
-      (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					main->buffer, &main->rowgroup_ctr,
-					(JDIMENSION) DCTSIZE);
-      /* Return to application if we need more data to fill the iMCU row. */
-      if (main->rowgroup_ctr < DCTSIZE)
-	return;
-    }
-
-    /* Emit data, unless this is a sink-only pass. */
-    if (main->pass_mode != JBUF_SAVE_SOURCE) {
-      if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
-	/* If compressor did not consume the whole row, then we must need to
-	 * suspend processing and return to the application.  In this situation
-	 * we pretend we didn't yet consume the last input row; otherwise, if
-	 * it happened to be the last row of the image, the application would
-	 * think we were done.
-	 */
-	if (! main->suspended) {
-	  (*in_row_ctr)--;
-	  main->suspended = TRUE;
-	}
-	return;
-      }
-      /* We did finish the row.  Undo our little suspension hack if a previous
-       * call suspended; then mark the main buffer empty.
-       */
-      if (main->suspended) {
-	(*in_row_ctr)++;
-	main->suspended = FALSE;
-      }
-    }
-
-    /* If get here, we are done with this iMCU row.  Mark buffer empty. */
-    main->rowgroup_ctr = 0;
-    main->cur_iMCU_row++;
-  }
-}
-
-#endif /* FULL_MAIN_BUFFER_SUPPORTED */
-
-
-/*
- * Initialize main buffer controller.
- */
-
-GLOBAL(void)
-jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
-{
-  my_main_ptr main;
-  int ci;
-  jpeg_component_info *compptr;
-
-  main = (my_main_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_main_controller));
-  cinfo->main = (struct jpeg_c_main_controller *) main;
-  main->pub.start_pass = start_pass_main;
-
-  /* We don't need to create a buffer in raw-data mode. */
-  if (cinfo->raw_data_in)
-    return;
-
-  /* Create the buffer.  It holds downsampled data, so each component
-   * may be of a different size.
-   */
-  if (need_full_buffer) {
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-    /* Allocate a full-image virtual array for each component */
-    /* Note we pad the bottom to a multiple of the iMCU height */
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      main->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 compptr->width_in_blocks * compptr->DCT_h_scaled_size,
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor) * DCTSIZE,
-	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
-    }
-#else
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif
-  } else {
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-    main->whole_image[0] = NULL; /* flag for no virtual arrays */
-#endif
-    /* Allocate a strip buffer for each component */
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      main->buffer[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 compptr->width_in_blocks * compptr->DCT_h_scaled_size,
-	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
-    }
-  }
-}
+/*
+ * jcmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for compression.
+ * The main buffer lies between the pre-processor and the JPEG
+ * compressor proper; it holds downsampled data in the JPEG colorspace.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Note: currently, there is no operating mode in which a full-image buffer
+ * is needed at this step.  If there were, that mode could not be used with
+ * "raw data" input, since this module is bypassed in that case.  However,
+ * we've left the code here for possible use in special applications.
+ */
+#undef FULL_MAIN_BUFFER_SUPPORTED
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_main_controller pub; /* public fields */
+
+  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
+  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
+  boolean suspended;		/* remember if we suspended output */
+  J_BUF_MODE pass_mode;		/* current operating mode */
+
+  /* If using just a strip buffer, this points to the entire set of buffers
+   * (we allocate one for each component).  In the full-image case, this
+   * points to the currently accessible strips of the virtual arrays.
+   */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  /* If using full-image storage, this array holds pointers to virtual-array
+   * control blocks for each component.  Unused if not full-image storage.
+   */
+  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
+#endif
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+METHODDEF(void) process_data_buffer_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  /* Do nothing in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  mainp->cur_iMCU_row = 0;	/* initialize counters */
+  mainp->rowgroup_ctr = 0;
+  mainp->suspended = FALSE;
+  mainp->pass_mode = pass_mode;	/* save mode for use by process_data */
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    if (mainp->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+    mainp->pub.process_data = process_data_simple_main;
+    break;
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  case JBUF_SAVE_SOURCE:
+  case JBUF_CRANK_DEST:
+  case JBUF_SAVE_AND_PASS:
+    if (mainp->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    mainp->pub.process_data = process_data_buffer_main;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This routine handles the simple pass-through mode,
+ * where we have only a strip buffer.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  while (mainp->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Read input data if we haven't filled the main buffer yet */
+    if (mainp->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					mainp->buffer, &mainp->rowgroup_ctr,
+					(JDIMENSION) cinfo->min_DCT_v_scaled_size);
+
+    /* If we don't have a full iMCU row buffered, return to application for
+     * more data.  Note that preprocessor will always pad to fill the iMCU row
+     * at the bottom of the image.
+     */
+    if (mainp->rowgroup_ctr != (JDIMENSION) cinfo->min_DCT_v_scaled_size)
+      return;
+
+    /* Send the completed row to the compressor */
+    if (! (*cinfo->coef->compress_data) (cinfo, mainp->buffer)) {
+      /* If compressor did not consume the whole row, then we must need to
+       * suspend processing and return to the application.  In this situation
+       * we pretend we didn't yet consume the last input row; otherwise, if
+       * it happened to be the last row of the image, the application would
+       * think we were done.
+       */
+      if (! mainp->suspended) {
+	(*in_row_ctr)--;
+	mainp->suspended = TRUE;
+      }
+      return;
+    }
+    /* We did finish the row.  Undo our little suspension hack if a previous
+     * call suspended; then mark the main buffer empty.
+     */
+    if (mainp->suspended) {
+      (*in_row_ctr)++;
+      mainp->suspended = FALSE;
+    }
+    mainp->rowgroup_ctr = 0;
+    mainp->cur_iMCU_row++;
+  }
+}
+
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+
+/*
+ * Process some data.
+ * This routine handles all of the modes that use a full-size buffer.
+ */
+
+METHODDEF(void)
+process_data_buffer_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci;
+  jpeg_component_info *compptr;
+  boolean writing = (mainp->pass_mode != JBUF_CRANK_DEST);
+
+  while (mainp->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Realign the virtual buffers if at the start of an iMCU row. */
+    if (mainp->rowgroup_ctr == 0) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	mainp->buffer[ci] = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, mainp->whole_image[ci], mainp->cur_iMCU_row *
+	   ((JDIMENSION) (compptr->v_samp_factor * cinfo->min_DCT_v_scaled_size)),
+	   (JDIMENSION) (compptr->v_samp_factor * cinfo->min_DCT_v_scaled_size),
+	   writing);
+      }
+      /* In a read pass, pretend we just read some source data. */
+      if (! writing) {
+	*in_row_ctr += (JDIMENSION)
+	  (cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size);
+	mainp->rowgroup_ctr = (JDIMENSION) cinfo->min_DCT_v_scaled_size;
+      }
+    }
+
+    /* If a write pass, read input data until the current iMCU row is full. */
+    /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
+    if (writing) {
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					mainp->buffer, &mainp->rowgroup_ctr,
+					(JDIMENSION) cinfo->min_DCT_v_scaled_size);
+      /* Return to application if we need more data to fill the iMCU row. */
+      if (mainp->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
+	return;
+    }
+
+    /* Emit data, unless this is a sink-only pass. */
+    if (mainp->pass_mode != JBUF_SAVE_SOURCE) {
+      if (! (*cinfo->coef->compress_data) (cinfo, mainp->buffer)) {
+	/* If compressor did not consume the whole row, then we must need to
+	 * suspend processing and return to the application.  In this situation
+	 * we pretend we didn't yet consume the last input row; otherwise, if
+	 * it happened to be the last row of the image, the application would
+	 * think we were done.
+	 */
+	if (! mainp->suspended) {
+	  (*in_row_ctr)--;
+	  mainp->suspended = TRUE;
+	}
+	return;
+      }
+      /* We did finish the row.  Undo our little suspension hack if a previous
+       * call suspended; then mark the main buffer empty.
+       */
+      if (mainp->suspended) {
+	(*in_row_ctr)++;
+	mainp->suspended = FALSE;
+      }
+    }
+
+    /* If get here, we are done with this iMCU row.  Mark buffer empty. */
+    mainp->rowgroup_ctr = 0;
+    mainp->cur_iMCU_row++;
+  }
+}
+
+#endif /* FULL_MAIN_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr mainp;
+  int ci;
+  jpeg_component_info *compptr;
+
+  mainp = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = &mainp->pub;
+  mainp->pub.start_pass = start_pass_main;
+
+  /* We don't need to create a buffer in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  /* Create the buffer.  It holds downsampled data, so each component
+   * may be of a different size.
+   */
+  if (need_full_buffer) {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component */
+    /* Note we pad the bottom to a multiple of the iMCU height */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      mainp->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
+	 ((JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				 (long) compptr->v_samp_factor)) *
+	 ((JDIMENSION) cinfo->min_DCT_v_scaled_size),
+	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    mainp->whole_image[0] = NULL; /* flag for no virtual arrays */
+#endif
+    /* Allocate a strip buffer for each component */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      mainp->buffer[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
+	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
+    }
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jcmarker.c b/plugins/AdvaImg/src/LibJPEG/jcmarker.c
index 606c19af39..84fd20db31 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcmarker.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcmarker.c
@@ -1,682 +1,719 @@
-/*
- * jcmarker.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2010 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains routines to write JPEG datastream markers.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-typedef enum {			/* JPEG marker codes */
-  M_SOF0  = 0xc0,
-  M_SOF1  = 0xc1,
-  M_SOF2  = 0xc2,
-  M_SOF3  = 0xc3,
-  
-  M_SOF5  = 0xc5,
-  M_SOF6  = 0xc6,
-  M_SOF7  = 0xc7,
-  
-  M_JPG   = 0xc8,
-  M_SOF9  = 0xc9,
-  M_SOF10 = 0xca,
-  M_SOF11 = 0xcb,
-  
-  M_SOF13 = 0xcd,
-  M_SOF14 = 0xce,
-  M_SOF15 = 0xcf,
-  
-  M_DHT   = 0xc4,
-  
-  M_DAC   = 0xcc,
-  
-  M_RST0  = 0xd0,
-  M_RST1  = 0xd1,
-  M_RST2  = 0xd2,
-  M_RST3  = 0xd3,
-  M_RST4  = 0xd4,
-  M_RST5  = 0xd5,
-  M_RST6  = 0xd6,
-  M_RST7  = 0xd7,
-  
-  M_SOI   = 0xd8,
-  M_EOI   = 0xd9,
-  M_SOS   = 0xda,
-  M_DQT   = 0xdb,
-  M_DNL   = 0xdc,
-  M_DRI   = 0xdd,
-  M_DHP   = 0xde,
-  M_EXP   = 0xdf,
-  
-  M_APP0  = 0xe0,
-  M_APP1  = 0xe1,
-  M_APP2  = 0xe2,
-  M_APP3  = 0xe3,
-  M_APP4  = 0xe4,
-  M_APP5  = 0xe5,
-  M_APP6  = 0xe6,
-  M_APP7  = 0xe7,
-  M_APP8  = 0xe8,
-  M_APP9  = 0xe9,
-  M_APP10 = 0xea,
-  M_APP11 = 0xeb,
-  M_APP12 = 0xec,
-  M_APP13 = 0xed,
-  M_APP14 = 0xee,
-  M_APP15 = 0xef,
-  
-  M_JPG0  = 0xf0,
-  M_JPG13 = 0xfd,
-  M_COM   = 0xfe,
-  
-  M_TEM   = 0x01,
-  
-  M_ERROR = 0x100
-} JPEG_MARKER;
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_marker_writer pub; /* public fields */
-
-  unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */
-} my_marker_writer;
-
-typedef my_marker_writer * my_marker_ptr;
-
-
-/*
- * Basic output routines.
- *
- * Note that we do not support suspension while writing a marker.
- * Therefore, an application using suspension must ensure that there is
- * enough buffer space for the initial markers (typ. 600-700 bytes) before
- * calling jpeg_start_compress, and enough space to write the trailing EOI
- * (a few bytes) before calling jpeg_finish_compress.  Multipass compression
- * modes are not supported at all with suspension, so those two are the only
- * points where markers will be written.
- */
-
-LOCAL(void)
-emit_byte (j_compress_ptr cinfo, int val)
-/* Emit a byte */
-{
-  struct jpeg_destination_mgr * dest = cinfo->dest;
-
-  *(dest->next_output_byte)++ = (JOCTET) val;
-  if (--dest->free_in_buffer == 0) {
-    if (! (*dest->empty_output_buffer) (cinfo))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-  }
-}
-
-
-LOCAL(void)
-emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark)
-/* Emit a marker code */
-{
-  emit_byte(cinfo, 0xFF);
-  emit_byte(cinfo, (int) mark);
-}
-
-
-LOCAL(void)
-emit_2bytes (j_compress_ptr cinfo, int value)
-/* Emit a 2-byte integer; these are always MSB first in JPEG files */
-{
-  emit_byte(cinfo, (value >> 8) & 0xFF);
-  emit_byte(cinfo, value & 0xFF);
-}
-
-
-/*
- * Routines to write specific marker types.
- */
-
-LOCAL(int)
-emit_dqt (j_compress_ptr cinfo, int index)
-/* Emit a DQT marker */
-/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
-{
-  JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index];
-  int prec;
-  int i;
-
-  if (qtbl == NULL)
-    ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
-
-  prec = 0;
-  for (i = 0; i <= cinfo->lim_Se; i++) {
-    if (qtbl->quantval[cinfo->natural_order[i]] > 255)
-      prec = 1;
-  }
-
-  if (! qtbl->sent_table) {
-    emit_marker(cinfo, M_DQT);
-
-    emit_2bytes(cinfo,
-      prec ? cinfo->lim_Se * 2 + 2 + 1 + 2 : cinfo->lim_Se + 1 + 1 + 2);
-
-    emit_byte(cinfo, index + (prec<<4));
-
-    for (i = 0; i <= cinfo->lim_Se; i++) {
-      /* The table entries must be emitted in zigzag order. */
-      unsigned int qval = qtbl->quantval[cinfo->natural_order[i]];
-      if (prec)
-	emit_byte(cinfo, (int) (qval >> 8));
-      emit_byte(cinfo, (int) (qval & 0xFF));
-    }
-
-    qtbl->sent_table = TRUE;
-  }
-
-  return prec;
-}
-
-
-LOCAL(void)
-emit_dht (j_compress_ptr cinfo, int index, boolean is_ac)
-/* Emit a DHT marker */
-{
-  JHUFF_TBL * htbl;
-  int length, i;
-  
-  if (is_ac) {
-    htbl = cinfo->ac_huff_tbl_ptrs[index];
-    index += 0x10;		/* output index has AC bit set */
-  } else {
-    htbl = cinfo->dc_huff_tbl_ptrs[index];
-  }
-
-  if (htbl == NULL)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
-  
-  if (! htbl->sent_table) {
-    emit_marker(cinfo, M_DHT);
-    
-    length = 0;
-    for (i = 1; i <= 16; i++)
-      length += htbl->bits[i];
-    
-    emit_2bytes(cinfo, length + 2 + 1 + 16);
-    emit_byte(cinfo, index);
-    
-    for (i = 1; i <= 16; i++)
-      emit_byte(cinfo, htbl->bits[i]);
-    
-    for (i = 0; i < length; i++)
-      emit_byte(cinfo, htbl->huffval[i]);
-    
-    htbl->sent_table = TRUE;
-  }
-}
-
-
-LOCAL(void)
-emit_dac (j_compress_ptr cinfo)
-/* Emit a DAC marker */
-/* Since the useful info is so small, we want to emit all the tables in */
-/* one DAC marker.  Therefore this routine does its own scan of the table. */
-{
-#ifdef C_ARITH_CODING_SUPPORTED
-  char dc_in_use[NUM_ARITH_TBLS];
-  char ac_in_use[NUM_ARITH_TBLS];
-  int length, i;
-  jpeg_component_info *compptr;
-
-  for (i = 0; i < NUM_ARITH_TBLS; i++)
-    dc_in_use[i] = ac_in_use[i] = 0;
-
-  for (i = 0; i < cinfo->comps_in_scan; i++) {
-    compptr = cinfo->cur_comp_info[i];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0)
-      dc_in_use[compptr->dc_tbl_no] = 1;
-    /* AC needs no table when not present */
-    if (cinfo->Se)
-      ac_in_use[compptr->ac_tbl_no] = 1;
-  }
-
-  length = 0;
-  for (i = 0; i < NUM_ARITH_TBLS; i++)
-    length += dc_in_use[i] + ac_in_use[i];
-
-  if (length) {
-    emit_marker(cinfo, M_DAC);
-
-    emit_2bytes(cinfo, length*2 + 2);
-
-    for (i = 0; i < NUM_ARITH_TBLS; i++) {
-      if (dc_in_use[i]) {
-	emit_byte(cinfo, i);
-	emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
-      }
-      if (ac_in_use[i]) {
-	emit_byte(cinfo, i + 0x10);
-	emit_byte(cinfo, cinfo->arith_ac_K[i]);
-      }
-    }
-  }
-#endif /* C_ARITH_CODING_SUPPORTED */
-}
-
-
-LOCAL(void)
-emit_dri (j_compress_ptr cinfo)
-/* Emit a DRI marker */
-{
-  emit_marker(cinfo, M_DRI);
-  
-  emit_2bytes(cinfo, 4);	/* fixed length */
-
-  emit_2bytes(cinfo, (int) cinfo->restart_interval);
-}
-
-
-LOCAL(void)
-emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
-/* Emit a SOF marker */
-{
-  int ci;
-  jpeg_component_info *compptr;
-  
-  emit_marker(cinfo, code);
-  
-  emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
-
-  /* Make sure image isn't bigger than SOF field can handle */
-  if ((long) cinfo->jpeg_height > 65535L ||
-      (long) cinfo->jpeg_width > 65535L)
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
-
-  emit_byte(cinfo, cinfo->data_precision);
-  emit_2bytes(cinfo, (int) cinfo->jpeg_height);
-  emit_2bytes(cinfo, (int) cinfo->jpeg_width);
-
-  emit_byte(cinfo, cinfo->num_components);
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    emit_byte(cinfo, compptr->component_id);
-    emit_byte(cinfo, (compptr->h_samp_factor << 4) + compptr->v_samp_factor);
-    emit_byte(cinfo, compptr->quant_tbl_no);
-  }
-}
-
-
-LOCAL(void)
-emit_sos (j_compress_ptr cinfo)
-/* Emit a SOS marker */
-{
-  int i, td, ta;
-  jpeg_component_info *compptr;
-  
-  emit_marker(cinfo, M_SOS);
-  
-  emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
-  
-  emit_byte(cinfo, cinfo->comps_in_scan);
-  
-  for (i = 0; i < cinfo->comps_in_scan; i++) {
-    compptr = cinfo->cur_comp_info[i];
-    emit_byte(cinfo, compptr->component_id);
-
-    /* We emit 0 for unused field(s); this is recommended by the P&M text
-     * but does not seem to be specified in the standard.
-     */
-
-    /* DC needs no table for refinement scan */
-    td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0;
-    /* AC needs no table when not present */
-    ta = cinfo->Se ? compptr->ac_tbl_no : 0;
-
-    emit_byte(cinfo, (td << 4) + ta);
-  }
-
-  emit_byte(cinfo, cinfo->Ss);
-  emit_byte(cinfo, cinfo->Se);
-  emit_byte(cinfo, (cinfo->Ah << 4) + cinfo->Al);
-}
-
-
-LOCAL(void)
-emit_pseudo_sos (j_compress_ptr cinfo)
-/* Emit a pseudo SOS marker */
-{
-  emit_marker(cinfo, M_SOS);
-  
-  emit_2bytes(cinfo, 2 + 1 + 3); /* length */
-  
-  emit_byte(cinfo, 0); /* Ns */
-
-  emit_byte(cinfo, 0); /* Ss */
-  emit_byte(cinfo, cinfo->block_size * cinfo->block_size - 1); /* Se */
-  emit_byte(cinfo, 0); /* Ah/Al */
-}
-
-
-LOCAL(void)
-emit_jfif_app0 (j_compress_ptr cinfo)
-/* Emit a JFIF-compliant APP0 marker */
-{
-  /*
-   * Length of APP0 block	(2 bytes)
-   * Block ID			(4 bytes - ASCII "JFIF")
-   * Zero byte			(1 byte to terminate the ID string)
-   * Version Major, Minor	(2 bytes - major first)
-   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
-   * Xdpu			(2 bytes - dots per unit horizontal)
-   * Ydpu			(2 bytes - dots per unit vertical)
-   * Thumbnail X size		(1 byte)
-   * Thumbnail Y size		(1 byte)
-   */
-  
-  emit_marker(cinfo, M_APP0);
-  
-  emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
-
-  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
-  emit_byte(cinfo, 0x46);
-  emit_byte(cinfo, 0x49);
-  emit_byte(cinfo, 0x46);
-  emit_byte(cinfo, 0);
-  emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
-  emit_byte(cinfo, cinfo->JFIF_minor_version);
-  emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
-  emit_2bytes(cinfo, (int) cinfo->X_density);
-  emit_2bytes(cinfo, (int) cinfo->Y_density);
-  emit_byte(cinfo, 0);		/* No thumbnail image */
-  emit_byte(cinfo, 0);
-}
-
-
-LOCAL(void)
-emit_adobe_app14 (j_compress_ptr cinfo)
-/* Emit an Adobe APP14 marker */
-{
-  /*
-   * Length of APP14 block	(2 bytes)
-   * Block ID			(5 bytes - ASCII "Adobe")
-   * Version Number		(2 bytes - currently 100)
-   * Flags0			(2 bytes - currently 0)
-   * Flags1			(2 bytes - currently 0)
-   * Color transform		(1 byte)
-   *
-   * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
-   * now in circulation seem to use Version = 100, so that's what we write.
-   *
-   * We write the color transform byte as 1 if the JPEG color space is
-   * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
-   * whether the encoder performed a transformation, which is pretty useless.
-   */
-  
-  emit_marker(cinfo, M_APP14);
-  
-  emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
-
-  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
-  emit_byte(cinfo, 0x64);
-  emit_byte(cinfo, 0x6F);
-  emit_byte(cinfo, 0x62);
-  emit_byte(cinfo, 0x65);
-  emit_2bytes(cinfo, 100);	/* Version */
-  emit_2bytes(cinfo, 0);	/* Flags0 */
-  emit_2bytes(cinfo, 0);	/* Flags1 */
-  switch (cinfo->jpeg_color_space) {
-  case JCS_YCbCr:
-    emit_byte(cinfo, 1);	/* Color transform = 1 */
-    break;
-  case JCS_YCCK:
-    emit_byte(cinfo, 2);	/* Color transform = 2 */
-    break;
-  default:
-    emit_byte(cinfo, 0);	/* Color transform = 0 */
-    break;
-  }
-}
-
-
-/*
- * These routines allow writing an arbitrary marker with parameters.
- * The only intended use is to emit COM or APPn markers after calling
- * write_file_header and before calling write_frame_header.
- * Other uses are not guaranteed to produce desirable results.
- * Counting the parameter bytes properly is the caller's responsibility.
- */
-
-METHODDEF(void)
-write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
-/* Emit an arbitrary marker header */
-{
-  if (datalen > (unsigned int) 65533)		/* safety check */
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  emit_marker(cinfo, (JPEG_MARKER) marker);
-
-  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
-}
-
-METHODDEF(void)
-write_marker_byte (j_compress_ptr cinfo, int val)
-/* Emit one byte of marker parameters following write_marker_header */
-{
-  emit_byte(cinfo, val);
-}
-
-
-/*
- * Write datastream header.
- * This consists of an SOI and optional APPn markers.
- * We recommend use of the JFIF marker, but not the Adobe marker,
- * when using YCbCr or grayscale data.  The JFIF marker should NOT
- * be used for any other JPEG colorspace.  The Adobe marker is helpful
- * to distinguish RGB, CMYK, and YCCK colorspaces.
- * Note that an application can write additional header markers after
- * jpeg_start_compress returns.
- */
-
-METHODDEF(void)
-write_file_header (j_compress_ptr cinfo)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-
-  emit_marker(cinfo, M_SOI);	/* first the SOI */
-
-  /* SOI is defined to reset restart interval to 0 */
-  marker->last_restart_interval = 0;
-
-  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
-    emit_jfif_app0(cinfo);
-  if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
-    emit_adobe_app14(cinfo);
-}
-
-
-/*
- * Write frame header.
- * This consists of DQT and SOFn markers, and a conditional pseudo SOS marker.
- * Note that we do not emit the SOF until we have emitted the DQT(s).
- * This avoids compatibility problems with incorrect implementations that
- * try to error-check the quant table numbers as soon as they see the SOF.
- */
-
-METHODDEF(void)
-write_frame_header (j_compress_ptr cinfo)
-{
-  int ci, prec;
-  boolean is_baseline;
-  jpeg_component_info *compptr;
-  
-  /* Emit DQT for each quantization table.
-   * Note that emit_dqt() suppresses any duplicate tables.
-   */
-  prec = 0;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    prec += emit_dqt(cinfo, compptr->quant_tbl_no);
-  }
-  /* now prec is nonzero iff there are any 16-bit quant tables. */
-
-  /* Check for a non-baseline specification.
-   * Note we assume that Huffman table numbers won't be changed later.
-   */
-  if (cinfo->arith_code || cinfo->progressive_mode ||
-      cinfo->data_precision != 8 || cinfo->block_size != DCTSIZE) {
-    is_baseline = FALSE;
-  } else {
-    is_baseline = TRUE;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
-	is_baseline = FALSE;
-    }
-    if (prec && is_baseline) {
-      is_baseline = FALSE;
-      /* If it's baseline except for quantizer size, warn the user */
-      TRACEMS(cinfo, 0, JTRC_16BIT_TABLES);
-    }
-  }
-
-  /* Emit the proper SOF marker */
-  if (cinfo->arith_code) {
-    if (cinfo->progressive_mode)
-      emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
-    else
-      emit_sof(cinfo, M_SOF9);  /* SOF code for sequential arithmetic */
-  } else {
-    if (cinfo->progressive_mode)
-      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
-    else if (is_baseline)
-      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
-    else
-      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
-  }
-
-  /* Check to emit pseudo SOS marker */
-  if (cinfo->progressive_mode && cinfo->block_size != DCTSIZE)
-    emit_pseudo_sos(cinfo);
-}
-
-
-/*
- * Write scan header.
- * This consists of DHT or DAC markers, optional DRI, and SOS.
- * Compressed data will be written following the SOS.
- */
-
-METHODDEF(void)
-write_scan_header (j_compress_ptr cinfo)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-  int i;
-  jpeg_component_info *compptr;
-
-  if (cinfo->arith_code) {
-    /* Emit arith conditioning info.  We may have some duplication
-     * if the file has multiple scans, but it's so small it's hardly
-     * worth worrying about.
-     */
-    emit_dac(cinfo);
-  } else {
-    /* Emit Huffman tables.
-     * Note that emit_dht() suppresses any duplicate tables.
-     */
-    for (i = 0; i < cinfo->comps_in_scan; i++) {
-      compptr = cinfo->cur_comp_info[i];
-      /* DC needs no table for refinement scan */
-      if (cinfo->Ss == 0 && cinfo->Ah == 0)
-	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
-      /* AC needs no table when not present */
-      if (cinfo->Se)
-	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
-    }
-  }
-
-  /* Emit DRI if required --- note that DRI value could change for each scan.
-   * We avoid wasting space with unnecessary DRIs, however.
-   */
-  if (cinfo->restart_interval != marker->last_restart_interval) {
-    emit_dri(cinfo);
-    marker->last_restart_interval = cinfo->restart_interval;
-  }
-
-  emit_sos(cinfo);
-}
-
-
-/*
- * Write datastream trailer.
- */
-
-METHODDEF(void)
-write_file_trailer (j_compress_ptr cinfo)
-{
-  emit_marker(cinfo, M_EOI);
-}
-
-
-/*
- * Write an abbreviated table-specification datastream.
- * This consists of SOI, DQT and DHT tables, and EOI.
- * Any table that is defined and not marked sent_table = TRUE will be
- * emitted.  Note that all tables will be marked sent_table = TRUE at exit.
- */
-
-METHODDEF(void)
-write_tables_only (j_compress_ptr cinfo)
-{
-  int i;
-
-  emit_marker(cinfo, M_SOI);
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    if (cinfo->quant_tbl_ptrs[i] != NULL)
-      (void) emit_dqt(cinfo, i);
-  }
-
-  if (! cinfo->arith_code) {
-    for (i = 0; i < NUM_HUFF_TBLS; i++) {
-      if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, FALSE);
-      if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, TRUE);
-    }
-  }
-
-  emit_marker(cinfo, M_EOI);
-}
-
-
-/*
- * Initialize the marker writer module.
- */
-
-GLOBAL(void)
-jinit_marker_writer (j_compress_ptr cinfo)
-{
-  my_marker_ptr marker;
-
-  /* Create the subobject */
-  marker = (my_marker_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_marker_writer));
-  cinfo->marker = (struct jpeg_marker_writer *) marker;
-  /* Initialize method pointers */
-  marker->pub.write_file_header = write_file_header;
-  marker->pub.write_frame_header = write_frame_header;
-  marker->pub.write_scan_header = write_scan_header;
-  marker->pub.write_file_trailer = write_file_trailer;
-  marker->pub.write_tables_only = write_tables_only;
-  marker->pub.write_marker_header = write_marker_header;
-  marker->pub.write_marker_byte = write_marker_byte;
-  /* Initialize private state */
-  marker->last_restart_interval = 0;
-}
+/*
+ * jcmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2003-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to write JPEG datastream markers.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+
+  M_DHT   = 0xc4,
+
+  M_DAC   = 0xcc,
+
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+
+  M_JPG0  = 0xf0,
+  M_JPG8  = 0xf8,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+
+  M_TEM   = 0x01,
+
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_writer pub; /* public fields */
+
+  unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */
+} my_marker_writer;
+
+typedef my_marker_writer * my_marker_ptr;
+
+
+/*
+ * Basic output routines.
+ *
+ * Note that we do not support suspension while writing a marker.
+ * Therefore, an application using suspension must ensure that there is
+ * enough buffer space for the initial markers (typ. 600-700 bytes) before
+ * calling jpeg_start_compress, and enough space to write the trailing EOI
+ * (a few bytes) before calling jpeg_finish_compress.  Multipass compression
+ * modes are not supported at all with suspension, so those two are the only
+ * points where markers will be written.
+ */
+
+LOCAL(void)
+emit_byte (j_compress_ptr cinfo, int val)
+/* Emit a byte */
+{
+  struct jpeg_destination_mgr * dest = cinfo->dest;
+
+  *(dest->next_output_byte)++ = (JOCTET) val;
+  if (--dest->free_in_buffer == 0) {
+    if (! (*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  }
+}
+
+
+LOCAL(void)
+emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark)
+/* Emit a marker code */
+{
+  emit_byte(cinfo, 0xFF);
+  emit_byte(cinfo, (int) mark);
+}
+
+
+LOCAL(void)
+emit_2bytes (j_compress_ptr cinfo, int value)
+/* Emit a 2-byte integer; these are always MSB first in JPEG files */
+{
+  emit_byte(cinfo, (value >> 8) & 0xFF);
+  emit_byte(cinfo, value & 0xFF);
+}
+
+
+/*
+ * Routines to write specific marker types.
+ */
+
+LOCAL(int)
+emit_dqt (j_compress_ptr cinfo, int index)
+/* Emit a DQT marker */
+/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
+{
+  JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index];
+  int prec;
+  int i;
+
+  if (qtbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
+
+  prec = 0;
+  for (i = 0; i <= cinfo->lim_Se; i++) {
+    if (qtbl->quantval[cinfo->natural_order[i]] > 255)
+      prec = 1;
+  }
+
+  if (! qtbl->sent_table) {
+    emit_marker(cinfo, M_DQT);
+
+    emit_2bytes(cinfo,
+      prec ? cinfo->lim_Se * 2 + 2 + 1 + 2 : cinfo->lim_Se + 1 + 1 + 2);
+
+    emit_byte(cinfo, index + (prec<<4));
+
+    for (i = 0; i <= cinfo->lim_Se; i++) {
+      /* The table entries must be emitted in zigzag order. */
+      unsigned int qval = qtbl->quantval[cinfo->natural_order[i]];
+      if (prec)
+	emit_byte(cinfo, (int) (qval >> 8));
+      emit_byte(cinfo, (int) (qval & 0xFF));
+    }
+
+    qtbl->sent_table = TRUE;
+  }
+
+  return prec;
+}
+
+
+LOCAL(void)
+emit_dht (j_compress_ptr cinfo, int index, boolean is_ac)
+/* Emit a DHT marker */
+{
+  JHUFF_TBL * htbl;
+  int length, i;
+  
+  if (is_ac) {
+    htbl = cinfo->ac_huff_tbl_ptrs[index];
+    index += 0x10;		/* output index has AC bit set */
+  } else {
+    htbl = cinfo->dc_huff_tbl_ptrs[index];
+  }
+
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
+  
+  if (! htbl->sent_table) {
+    emit_marker(cinfo, M_DHT);
+    
+    length = 0;
+    for (i = 1; i <= 16; i++)
+      length += htbl->bits[i];
+    
+    emit_2bytes(cinfo, length + 2 + 1 + 16);
+    emit_byte(cinfo, index);
+    
+    for (i = 1; i <= 16; i++)
+      emit_byte(cinfo, htbl->bits[i]);
+    
+    for (i = 0; i < length; i++)
+      emit_byte(cinfo, htbl->huffval[i]);
+    
+    htbl->sent_table = TRUE;
+  }
+}
+
+
+LOCAL(void)
+emit_dac (j_compress_ptr cinfo)
+/* Emit a DAC marker */
+/* Since the useful info is so small, we want to emit all the tables in */
+/* one DAC marker.  Therefore this routine does its own scan of the table. */
+{
+#ifdef C_ARITH_CODING_SUPPORTED
+  char dc_in_use[NUM_ARITH_TBLS];
+  char ac_in_use[NUM_ARITH_TBLS];
+  int length, i;
+  jpeg_component_info *compptr;
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    dc_in_use[i] = ac_in_use[i] = 0;
+
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0)
+      dc_in_use[compptr->dc_tbl_no] = 1;
+    /* AC needs no table when not present */
+    if (cinfo->Se)
+      ac_in_use[compptr->ac_tbl_no] = 1;
+  }
+
+  length = 0;
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    length += dc_in_use[i] + ac_in_use[i];
+
+  if (length) {
+    emit_marker(cinfo, M_DAC);
+
+    emit_2bytes(cinfo, length*2 + 2);
+
+    for (i = 0; i < NUM_ARITH_TBLS; i++) {
+      if (dc_in_use[i]) {
+	emit_byte(cinfo, i);
+	emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
+      }
+      if (ac_in_use[i]) {
+	emit_byte(cinfo, i + 0x10);
+	emit_byte(cinfo, cinfo->arith_ac_K[i]);
+      }
+    }
+  }
+#endif /* C_ARITH_CODING_SUPPORTED */
+}
+
+
+LOCAL(void)
+emit_dri (j_compress_ptr cinfo)
+/* Emit a DRI marker */
+{
+  emit_marker(cinfo, M_DRI);
+  
+  emit_2bytes(cinfo, 4);	/* fixed length */
+
+  emit_2bytes(cinfo, (int) cinfo->restart_interval);
+}
+
+
+LOCAL(void)
+emit_lse_ict (j_compress_ptr cinfo)
+/* Emit an LSE inverse color transform specification marker */
+{
+  /* Support only 1 transform */
+  if (cinfo->color_transform != JCT_SUBTRACT_GREEN ||
+      cinfo->num_components < 3)
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+
+  emit_marker(cinfo, M_JPG8);
+  
+  emit_2bytes(cinfo, 24);	/* fixed length */
+
+  emit_byte(cinfo, 0x0D);	/* ID inverse transform specification */
+  emit_2bytes(cinfo, MAXJSAMPLE);	/* MAXTRANS */
+  emit_byte(cinfo, 3);		/* Nt=3 */
+  emit_byte(cinfo, cinfo->comp_info[1].component_id);
+  emit_byte(cinfo, cinfo->comp_info[0].component_id);
+  emit_byte(cinfo, cinfo->comp_info[2].component_id);
+  emit_byte(cinfo, 0x80);	/* F1: CENTER1=1, NORM1=0 */
+  emit_2bytes(cinfo, 0);	/* A(1,1)=0 */
+  emit_2bytes(cinfo, 0);	/* A(1,2)=0 */
+  emit_byte(cinfo, 0);		/* F2: CENTER2=0, NORM2=0 */
+  emit_2bytes(cinfo, 1);	/* A(2,1)=1 */
+  emit_2bytes(cinfo, 0);	/* A(2,2)=0 */
+  emit_byte(cinfo, 0);		/* F3: CENTER3=0, NORM3=0 */
+  emit_2bytes(cinfo, 1);	/* A(3,1)=1 */
+  emit_2bytes(cinfo, 0);	/* A(3,2)=0 */
+}
+
+
+LOCAL(void)
+emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
+/* Emit a SOF marker */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, code);
+  
+  emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
+
+  /* Make sure image isn't bigger than SOF field can handle */
+  if ((long) cinfo->jpeg_height > 65535L ||
+      (long) cinfo->jpeg_width > 65535L)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
+
+  emit_byte(cinfo, cinfo->data_precision);
+  emit_2bytes(cinfo, (int) cinfo->jpeg_height);
+  emit_2bytes(cinfo, (int) cinfo->jpeg_width);
+
+  emit_byte(cinfo, cinfo->num_components);
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    emit_byte(cinfo, compptr->component_id);
+    emit_byte(cinfo, (compptr->h_samp_factor << 4) + compptr->v_samp_factor);
+    emit_byte(cinfo, compptr->quant_tbl_no);
+  }
+}
+
+
+LOCAL(void)
+emit_sos (j_compress_ptr cinfo)
+/* Emit a SOS marker */
+{
+  int i, td, ta;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, M_SOS);
+  
+  emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
+  
+  emit_byte(cinfo, cinfo->comps_in_scan);
+  
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    emit_byte(cinfo, compptr->component_id);
+
+    /* We emit 0 for unused field(s); this is recommended by the P&M text
+     * but does not seem to be specified in the standard.
+     */
+
+    /* DC needs no table for refinement scan */
+    td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0;
+    /* AC needs no table when not present */
+    ta = cinfo->Se ? compptr->ac_tbl_no : 0;
+
+    emit_byte(cinfo, (td << 4) + ta);
+  }
+
+  emit_byte(cinfo, cinfo->Ss);
+  emit_byte(cinfo, cinfo->Se);
+  emit_byte(cinfo, (cinfo->Ah << 4) + cinfo->Al);
+}
+
+
+LOCAL(void)
+emit_pseudo_sos (j_compress_ptr cinfo)
+/* Emit a pseudo SOS marker */
+{
+  emit_marker(cinfo, M_SOS);
+  
+  emit_2bytes(cinfo, 2 + 1 + 3); /* length */
+  
+  emit_byte(cinfo, 0); /* Ns */
+
+  emit_byte(cinfo, 0); /* Ss */
+  emit_byte(cinfo, cinfo->block_size * cinfo->block_size - 1); /* Se */
+  emit_byte(cinfo, 0); /* Ah/Al */
+}
+
+
+LOCAL(void)
+emit_jfif_app0 (j_compress_ptr cinfo)
+/* Emit a JFIF-compliant APP0 marker */
+{
+  /*
+   * Length of APP0 block	(2 bytes)
+   * Block ID			(4 bytes - ASCII "JFIF")
+   * Zero byte			(1 byte to terminate the ID string)
+   * Version Major, Minor	(2 bytes - major first)
+   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
+   * Xdpu			(2 bytes - dots per unit horizontal)
+   * Ydpu			(2 bytes - dots per unit vertical)
+   * Thumbnail X size		(1 byte)
+   * Thumbnail Y size		(1 byte)
+   */
+  
+  emit_marker(cinfo, M_APP0);
+  
+  emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
+
+  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0x49);
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0);
+  emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
+  emit_byte(cinfo, cinfo->JFIF_minor_version);
+  emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
+  emit_2bytes(cinfo, (int) cinfo->X_density);
+  emit_2bytes(cinfo, (int) cinfo->Y_density);
+  emit_byte(cinfo, 0);		/* No thumbnail image */
+  emit_byte(cinfo, 0);
+}
+
+
+LOCAL(void)
+emit_adobe_app14 (j_compress_ptr cinfo)
+/* Emit an Adobe APP14 marker */
+{
+  /*
+   * Length of APP14 block	(2 bytes)
+   * Block ID			(5 bytes - ASCII "Adobe")
+   * Version Number		(2 bytes - currently 100)
+   * Flags0			(2 bytes - currently 0)
+   * Flags1			(2 bytes - currently 0)
+   * Color transform		(1 byte)
+   *
+   * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
+   * now in circulation seem to use Version = 100, so that's what we write.
+   *
+   * We write the color transform byte as 1 if the JPEG color space is
+   * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
+   * whether the encoder performed a transformation, which is pretty useless.
+   */
+  
+  emit_marker(cinfo, M_APP14);
+  
+  emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
+
+  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
+  emit_byte(cinfo, 0x64);
+  emit_byte(cinfo, 0x6F);
+  emit_byte(cinfo, 0x62);
+  emit_byte(cinfo, 0x65);
+  emit_2bytes(cinfo, 100);	/* Version */
+  emit_2bytes(cinfo, 0);	/* Flags0 */
+  emit_2bytes(cinfo, 0);	/* Flags1 */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_YCbCr:
+    emit_byte(cinfo, 1);	/* Color transform = 1 */
+    break;
+  case JCS_YCCK:
+    emit_byte(cinfo, 2);	/* Color transform = 2 */
+    break;
+  default:
+    emit_byte(cinfo, 0);	/* Color transform = 0 */
+    break;
+  }
+}
+
+
+/*
+ * These routines allow writing an arbitrary marker with parameters.
+ * The only intended use is to emit COM or APPn markers after calling
+ * write_file_header and before calling write_frame_header.
+ * Other uses are not guaranteed to produce desirable results.
+ * Counting the parameter bytes properly is the caller's responsibility.
+ */
+
+METHODDEF(void)
+write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+/* Emit an arbitrary marker header */
+{
+  if (datalen > (unsigned int) 65533)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  emit_marker(cinfo, (JPEG_MARKER) marker);
+
+  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
+}
+
+METHODDEF(void)
+write_marker_byte (j_compress_ptr cinfo, int val)
+/* Emit one byte of marker parameters following write_marker_header */
+{
+  emit_byte(cinfo, val);
+}
+
+
+/*
+ * Write datastream header.
+ * This consists of an SOI and optional APPn markers.
+ * We recommend use of the JFIF marker, but not the Adobe marker,
+ * when using YCbCr or grayscale data.  The JFIF marker should NOT
+ * be used for any other JPEG colorspace.  The Adobe marker is helpful
+ * to distinguish RGB, CMYK, and YCCK colorspaces.
+ * Note that an application can write additional header markers after
+ * jpeg_start_compress returns.
+ */
+
+METHODDEF(void)
+write_file_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  emit_marker(cinfo, M_SOI);	/* first the SOI */
+
+  /* SOI is defined to reset restart interval to 0 */
+  marker->last_restart_interval = 0;
+
+  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
+    emit_jfif_app0(cinfo);
+  if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
+    emit_adobe_app14(cinfo);
+}
+
+
+/*
+ * Write frame header.
+ * This consists of DQT and SOFn markers,
+ * a conditional LSE marker and a conditional pseudo SOS marker.
+ * Note that we do not emit the SOF until we have emitted the DQT(s).
+ * This avoids compatibility problems with incorrect implementations that
+ * try to error-check the quant table numbers as soon as they see the SOF.
+ */
+
+METHODDEF(void)
+write_frame_header (j_compress_ptr cinfo)
+{
+  int ci, prec;
+  boolean is_baseline;
+  jpeg_component_info *compptr;
+  
+  /* Emit DQT for each quantization table.
+   * Note that emit_dqt() suppresses any duplicate tables.
+   */
+  prec = 0;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    prec += emit_dqt(cinfo, compptr->quant_tbl_no);
+  }
+  /* now prec is nonzero iff there are any 16-bit quant tables. */
+
+  /* Check for a non-baseline specification.
+   * Note we assume that Huffman table numbers won't be changed later.
+   */
+  if (cinfo->arith_code || cinfo->progressive_mode ||
+      cinfo->data_precision != 8 || cinfo->block_size != DCTSIZE) {
+    is_baseline = FALSE;
+  } else {
+    is_baseline = TRUE;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
+	is_baseline = FALSE;
+    }
+    if (prec && is_baseline) {
+      is_baseline = FALSE;
+      /* If it's baseline except for quantizer size, warn the user */
+      TRACEMS(cinfo, 0, JTRC_16BIT_TABLES);
+    }
+  }
+
+  /* Emit the proper SOF marker */
+  if (cinfo->arith_code) {
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
+    else
+      emit_sof(cinfo, M_SOF9);  /* SOF code for sequential arithmetic */
+  } else {
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
+    else if (is_baseline)
+      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
+    else
+      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
+  }
+
+  /* Check to emit LSE inverse color transform specification marker */
+  if (cinfo->color_transform)
+    emit_lse_ict(cinfo);
+
+  /* Check to emit pseudo SOS marker */
+  if (cinfo->progressive_mode && cinfo->block_size != DCTSIZE)
+    emit_pseudo_sos(cinfo);
+}
+
+
+/*
+ * Write scan header.
+ * This consists of DHT or DAC markers, optional DRI, and SOS.
+ * Compressed data will be written following the SOS.
+ */
+
+METHODDEF(void)
+write_scan_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  int i;
+  jpeg_component_info *compptr;
+
+  if (cinfo->arith_code) {
+    /* Emit arith conditioning info.  We may have some duplication
+     * if the file has multiple scans, but it's so small it's hardly
+     * worth worrying about.
+     */
+    emit_dac(cinfo);
+  } else {
+    /* Emit Huffman tables.
+     * Note that emit_dht() suppresses any duplicate tables.
+     */
+    for (i = 0; i < cinfo->comps_in_scan; i++) {
+      compptr = cinfo->cur_comp_info[i];
+      /* DC needs no table for refinement scan */
+      if (cinfo->Ss == 0 && cinfo->Ah == 0)
+	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+      /* AC needs no table when not present */
+      if (cinfo->Se)
+	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+    }
+  }
+
+  /* Emit DRI if required --- note that DRI value could change for each scan.
+   * We avoid wasting space with unnecessary DRIs, however.
+   */
+  if (cinfo->restart_interval != marker->last_restart_interval) {
+    emit_dri(cinfo);
+    marker->last_restart_interval = cinfo->restart_interval;
+  }
+
+  emit_sos(cinfo);
+}
+
+
+/*
+ * Write datastream trailer.
+ */
+
+METHODDEF(void)
+write_file_trailer (j_compress_ptr cinfo)
+{
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Write an abbreviated table-specification datastream.
+ * This consists of SOI, DQT and DHT tables, and EOI.
+ * Any table that is defined and not marked sent_table = TRUE will be
+ * emitted.  Note that all tables will be marked sent_table = TRUE at exit.
+ */
+
+METHODDEF(void)
+write_tables_only (j_compress_ptr cinfo)
+{
+  int i;
+
+  emit_marker(cinfo, M_SOI);
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if (cinfo->quant_tbl_ptrs[i] != NULL)
+      (void) emit_dqt(cinfo, i);
+  }
+
+  if (! cinfo->arith_code) {
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, FALSE);
+      if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, TRUE);
+    }
+  }
+
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Initialize the marker writer module.
+ */
+
+GLOBAL(void)
+jinit_marker_writer (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker;
+
+  /* Create the subobject */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_marker_writer));
+  cinfo->marker = &marker->pub;
+  /* Initialize method pointers */
+  marker->pub.write_file_header = write_file_header;
+  marker->pub.write_frame_header = write_frame_header;
+  marker->pub.write_scan_header = write_scan_header;
+  marker->pub.write_file_trailer = write_file_trailer;
+  marker->pub.write_tables_only = write_tables_only;
+  marker->pub.write_marker_header = write_marker_header;
+  marker->pub.write_marker_byte = write_marker_byte;
+  /* Initialize private state */
+  marker->last_restart_interval = 0;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jcmaster.c b/plugins/AdvaImg/src/LibJPEG/jcmaster.c
index caf80a53b3..ef73194b8b 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcmaster.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcmaster.c
@@ -1,858 +1,858 @@
-/*
- * jcmaster.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2003-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains master control logic for the JPEG compressor.
- * These routines are concerned with parameter validation, initial setup,
- * and inter-pass control (determining the number of passes and the work 
- * to be done in each pass).
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private state */
-
-typedef enum {
-	main_pass,		/* input data, also do first output step */
-	huff_opt_pass,		/* Huffman code optimization pass */
-	output_pass		/* data output pass */
-} c_pass_type;
-
-typedef struct {
-  struct jpeg_comp_master pub;	/* public fields */
-
-  c_pass_type pass_type;	/* the type of the current pass */
-
-  int pass_number;		/* # of passes completed */
-  int total_passes;		/* total # of passes needed */
-
-  int scan_number;		/* current index in scan_info[] */
-} my_comp_master;
-
-typedef my_comp_master * my_master_ptr;
-
-
-/*
- * Support routines that do various essential calculations.
- */
-
-/*
- * Compute JPEG image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- */
-
-GLOBAL(void)
-jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
-/* Do computations that are needed before master selection phase */
-{
-#ifdef DCT_SCALING_SUPPORTED
-
-  /* Sanity check on input image dimensions to prevent overflow in
-   * following calculation.
-   * We do check jpeg_width and jpeg_height in initial_setup below,
-   * but image_width and image_height can come from arbitrary data,
-   * and we need some space for multiplication by block_size.
-   */
-  if (((long) cinfo->image_width >> 24) || ((long) cinfo->image_height >> 24))
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-  /* Compute actual JPEG image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/1 scaling */
-    cinfo->jpeg_width = cinfo->image_width * cinfo->block_size;
-    cinfo->jpeg_height = cinfo->image_height * cinfo->block_size;
-    cinfo->min_DCT_h_scaled_size = 1;
-    cinfo->min_DCT_v_scaled_size = 1;
-  } else if (cinfo->scale_num * 2 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/2 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 2L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 2L);
-    cinfo->min_DCT_h_scaled_size = 2;
-    cinfo->min_DCT_v_scaled_size = 2;
-  } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/3 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 3L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 3L);
-    cinfo->min_DCT_h_scaled_size = 3;
-    cinfo->min_DCT_v_scaled_size = 3;
-  } else if (cinfo->scale_num * 4 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/4 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 4L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 4L);
-    cinfo->min_DCT_h_scaled_size = 4;
-    cinfo->min_DCT_v_scaled_size = 4;
-  } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/5 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 5L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 5L);
-    cinfo->min_DCT_h_scaled_size = 5;
-    cinfo->min_DCT_v_scaled_size = 5;
-  } else if (cinfo->scale_num * 6 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/6 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 6L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 6L);
-    cinfo->min_DCT_h_scaled_size = 6;
-    cinfo->min_DCT_v_scaled_size = 6;
-  } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/7 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 7L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 7L);
-    cinfo->min_DCT_h_scaled_size = 7;
-    cinfo->min_DCT_v_scaled_size = 7;
-  } else if (cinfo->scale_num * 8 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/8 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 8L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 8L);
-    cinfo->min_DCT_h_scaled_size = 8;
-    cinfo->min_DCT_v_scaled_size = 8;
-  } else if (cinfo->scale_num * 9 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/9 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 9L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 9L);
-    cinfo->min_DCT_h_scaled_size = 9;
-    cinfo->min_DCT_v_scaled_size = 9;
-  } else if (cinfo->scale_num * 10 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/10 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 10L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 10L);
-    cinfo->min_DCT_h_scaled_size = 10;
-    cinfo->min_DCT_v_scaled_size = 10;
-  } else if (cinfo->scale_num * 11 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/11 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 11L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 11L);
-    cinfo->min_DCT_h_scaled_size = 11;
-    cinfo->min_DCT_v_scaled_size = 11;
-  } else if (cinfo->scale_num * 12 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/12 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 12L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 12L);
-    cinfo->min_DCT_h_scaled_size = 12;
-    cinfo->min_DCT_v_scaled_size = 12;
-  } else if (cinfo->scale_num * 13 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/13 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 13L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 13L);
-    cinfo->min_DCT_h_scaled_size = 13;
-    cinfo->min_DCT_v_scaled_size = 13;
-  } else if (cinfo->scale_num * 14 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/14 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 14L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 14L);
-    cinfo->min_DCT_h_scaled_size = 14;
-    cinfo->min_DCT_v_scaled_size = 14;
-  } else if (cinfo->scale_num * 15 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/15 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 15L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 15L);
-    cinfo->min_DCT_h_scaled_size = 15;
-    cinfo->min_DCT_v_scaled_size = 15;
-  } else {
-    /* Provide block_size/16 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 16L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 16L);
-    cinfo->min_DCT_h_scaled_size = 16;
-    cinfo->min_DCT_v_scaled_size = 16;
-  }
-
-#else /* !DCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->jpeg_width = cinfo->image_width;
-  cinfo->jpeg_height = cinfo->image_height;
-  cinfo->min_DCT_h_scaled_size = DCTSIZE;
-  cinfo->min_DCT_v_scaled_size = DCTSIZE;
-
-#endif /* DCT_SCALING_SUPPORTED */
-}
-
-
-LOCAL(void)
-jpeg_calc_trans_dimensions (j_compress_ptr cinfo)
-{
-  if (cinfo->min_DCT_h_scaled_size != cinfo->min_DCT_v_scaled_size)
-    ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	     cinfo->min_DCT_h_scaled_size, cinfo->min_DCT_v_scaled_size);
-
-  cinfo->block_size = cinfo->min_DCT_h_scaled_size;
-}
-
-
-LOCAL(void)
-initial_setup (j_compress_ptr cinfo, boolean transcode_only)
-/* Do computations that are needed before master selection phase */
-{
-  int ci, ssize;
-  jpeg_component_info *compptr;
-  long samplesperrow;
-  JDIMENSION jd_samplesperrow;
-
-  if (transcode_only)
-    jpeg_calc_trans_dimensions(cinfo);
-  else
-    jpeg_calc_jpeg_dimensions(cinfo);
-
-  /* Sanity check on block_size */
-  if (cinfo->block_size < 1 || cinfo->block_size > 16)
-    ERREXIT2(cinfo, JERR_BAD_DCTSIZE, cinfo->block_size, cinfo->block_size);
-
-  /* Derive natural_order from block_size */
-  switch (cinfo->block_size) {
-  case 2: cinfo->natural_order = jpeg_natural_order2; break;
-  case 3: cinfo->natural_order = jpeg_natural_order3; break;
-  case 4: cinfo->natural_order = jpeg_natural_order4; break;
-  case 5: cinfo->natural_order = jpeg_natural_order5; break;
-  case 6: cinfo->natural_order = jpeg_natural_order6; break;
-  case 7: cinfo->natural_order = jpeg_natural_order7; break;
-  default: cinfo->natural_order = jpeg_natural_order; break;
-  }
-
-  /* Derive lim_Se from block_size */
-  cinfo->lim_Se = cinfo->block_size < DCTSIZE ?
-    cinfo->block_size * cinfo->block_size - 1 : DCTSIZE2-1;
-
-  /* Sanity check on image dimensions */
-  if (cinfo->jpeg_height <= 0 || cinfo->jpeg_width <= 0 ||
-      cinfo->num_components <= 0 || cinfo->input_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  /* Make sure image isn't bigger than I can handle */
-  if ((long) cinfo->jpeg_height > (long) JPEG_MAX_DIMENSION ||
-      (long) cinfo->jpeg_width > (long) JPEG_MAX_DIMENSION)
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-  /* Width of an input scanline must be representable as JDIMENSION. */
-  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
-  jd_samplesperrow = (JDIMENSION) samplesperrow;
-  if ((long) jd_samplesperrow != samplesperrow)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Check that number of components won't exceed internal array sizes */
-  if (cinfo->num_components > MAX_COMPONENTS)
-    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
-
-  /* Compute maximum sampling factors; check factor validity */
-  cinfo->max_h_samp_factor = 1;
-  cinfo->max_v_samp_factor = 1;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
-      ERREXIT(cinfo, JERR_BAD_SAMPLING);
-    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
-    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
-  }
-
-  /* Compute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Fill in the correct component_index value; don't rely on application */
-    compptr->component_index = ci;
-    /* In selecting the actual DCT scaling for each component, we try to
-     * scale down the chroma components via DCT scaling rather than downsampling.
-     * This saves time if the downsampler gets to use 1:1 scaling.
-     * Note this code adapts subsampling ratios which are powers of 2.
-     */
-    ssize = 1;
-#ifdef DCT_SCALING_SUPPORTED
-    while (cinfo->min_DCT_h_scaled_size * ssize <=
-	   (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
-	   (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) == 0) {
-      ssize = ssize * 2;
-    }
-#endif
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
-    ssize = 1;
-#ifdef DCT_SCALING_SUPPORTED
-    while (cinfo->min_DCT_v_scaled_size * ssize <=
-	   (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
-	   (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) == 0) {
-      ssize = ssize * 2;
-    }
-#endif
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
-
-    /* We don't support DCT ratios larger than 2. */
-    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
-	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
-    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
-	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
-
-    /* Size in DCT blocks */
-    compptr->width_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->height_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* Size in samples */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_width *
-		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_height *
-		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* Mark component needed (this flag isn't actually used for compression) */
-    compptr->component_needed = TRUE;
-  }
-
-  /* Compute number of fully interleaved MCU rows (number of times that
-   * main controller will call coefficient controller).
-   */
-  cinfo->total_iMCU_rows = (JDIMENSION)
-    jdiv_round_up((long) cinfo->jpeg_height,
-		  (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-}
-
-
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-
-LOCAL(void)
-validate_script (j_compress_ptr cinfo)
-/* Verify that the scan script in cinfo->scan_info[] is valid; also
- * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
- */
-{
-  const jpeg_scan_info * scanptr;
-  int scanno, ncomps, ci, coefi, thisi;
-  int Ss, Se, Ah, Al;
-  boolean component_sent[MAX_COMPONENTS];
-#ifdef C_PROGRESSIVE_SUPPORTED
-  int * last_bitpos_ptr;
-  int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
-  /* -1 until that coefficient has been seen; then last Al for it */
-#endif
-
-  if (cinfo->num_scans <= 0)
-    ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
-
-  /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
-   * for progressive JPEG, no scan can have this.
-   */
-  scanptr = cinfo->scan_info;
-  if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2-1) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-    cinfo->progressive_mode = TRUE;
-    last_bitpos_ptr = & last_bitpos[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++) 
-      for (coefi = 0; coefi < DCTSIZE2; coefi++)
-	*last_bitpos_ptr++ = -1;
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    cinfo->progressive_mode = FALSE;
-    for (ci = 0; ci < cinfo->num_components; ci++) 
-      component_sent[ci] = FALSE;
-  }
-
-  for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
-    /* Validate component indexes */
-    ncomps = scanptr->comps_in_scan;
-    if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, ncomps, MAX_COMPS_IN_SCAN);
-    for (ci = 0; ci < ncomps; ci++) {
-      thisi = scanptr->component_index[ci];
-      if (thisi < 0 || thisi >= cinfo->num_components)
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-      /* Components must appear in SOF order within each scan */
-      if (ci > 0 && thisi <= scanptr->component_index[ci-1])
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-    }
-    /* Validate progression parameters */
-    Ss = scanptr->Ss;
-    Se = scanptr->Se;
-    Ah = scanptr->Ah;
-    Al = scanptr->Al;
-    if (cinfo->progressive_mode) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-      /* The JPEG spec simply gives the ranges 0..13 for Ah and Al, but that
-       * seems wrong: the upper bound ought to depend on data precision.
-       * Perhaps they really meant 0..N+1 for N-bit precision.
-       * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
-       * out-of-range reconstructed DC values during the first DC scan,
-       * which might cause problems for some decoders.
-       */
-#if BITS_IN_JSAMPLE == 8
-#define MAX_AH_AL 10
-#else
-#define MAX_AH_AL 13
-#endif
-      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
-	  Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      if (Ss == 0) {
-	if (Se != 0)		/* DC and AC together not OK */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      } else {
-	if (ncomps != 1)	/* AC scans must be for only one component */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      }
-      for (ci = 0; ci < ncomps; ci++) {
-	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
-	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	for (coefi = Ss; coefi <= Se; coefi++) {
-	  if (last_bitpos_ptr[coefi] < 0) {
-	    /* first scan of this coefficient */
-	    if (Ah != 0)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  } else {
-	    /* not first scan */
-	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  }
-	  last_bitpos_ptr[coefi] = Al;
-	}
-      }
-#endif
-    } else {
-      /* For sequential JPEG, all progression parameters must be these: */
-      if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      /* Make sure components are not sent twice */
-      for (ci = 0; ci < ncomps; ci++) {
-	thisi = scanptr->component_index[ci];
-	if (component_sent[thisi])
-	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-	component_sent[thisi] = TRUE;
-      }
-    }
-  }
-
-  /* Now verify that everything got sent. */
-  if (cinfo->progressive_mode) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-    /* For progressive mode, we only check that at least some DC data
-     * got sent for each component; the spec does not require that all bits
-     * of all coefficients be transmitted.  Would it be wiser to enforce
-     * transmission of all coefficient bits??
-     */
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      if (last_bitpos[ci][0] < 0)
-	ERREXIT(cinfo, JERR_MISSING_DATA);
-    }
-#endif
-  } else {
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      if (! component_sent[ci])
-	ERREXIT(cinfo, JERR_MISSING_DATA);
-    }
-  }
-}
-
-
-LOCAL(void)
-reduce_script (j_compress_ptr cinfo)
-/* Adapt scan script for use with reduced block size;
- * assume that script has been validated before.
- */
-{
-  jpeg_scan_info * scanptr;
-  int idxout, idxin;
-
-  /* Circumvent const declaration for this function */
-  scanptr = (jpeg_scan_info *) cinfo->scan_info;
-  idxout = 0;
-
-  for (idxin = 0; idxin < cinfo->num_scans; idxin++) {
-    /* After skipping, idxout becomes smaller than idxin */
-    if (idxin != idxout)
-      /* Copy rest of data;
-       * note we stay in given chunk of allocated memory.
-       */
-      scanptr[idxout] = scanptr[idxin];
-    if (scanptr[idxout].Ss > cinfo->lim_Se)
-      /* Entire scan out of range - skip this entry */
-      continue;
-    if (scanptr[idxout].Se > cinfo->lim_Se)
-      /* Limit scan to end of block */
-      scanptr[idxout].Se = cinfo->lim_Se;
-    idxout++;
-  }
-
-  cinfo->num_scans = idxout;
-}
-
-#endif /* C_MULTISCAN_FILES_SUPPORTED */
-
-
-LOCAL(void)
-select_scan_parameters (j_compress_ptr cinfo)
-/* Set up the scan parameters for the current scan */
-{
-  int ci;
-
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-  if (cinfo->scan_info != NULL) {
-    /* Prepare for current scan --- the script is already validated */
-    my_master_ptr master = (my_master_ptr) cinfo->master;
-    const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number;
-
-    cinfo->comps_in_scan = scanptr->comps_in_scan;
-    for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
-      cinfo->cur_comp_info[ci] =
-	&cinfo->comp_info[scanptr->component_index[ci]];
-    }
-    if (cinfo->progressive_mode) {
-      cinfo->Ss = scanptr->Ss;
-      cinfo->Se = scanptr->Se;
-      cinfo->Ah = scanptr->Ah;
-      cinfo->Al = scanptr->Al;
-      return;
-    }
-  }
-  else
-#endif
-  {
-    /* Prepare for single sequential-JPEG scan containing all components */
-    if (cinfo->num_components > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPS_IN_SCAN);
-    cinfo->comps_in_scan = cinfo->num_components;
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
-    }
-  }
-  cinfo->Ss = 0;
-  cinfo->Se = cinfo->block_size * cinfo->block_size - 1;
-  cinfo->Ah = 0;
-  cinfo->Al = 0;
-}
-
-
-LOCAL(void)
-per_scan_setup (j_compress_ptr cinfo)
-/* Do computations that are needed before processing a JPEG scan */
-/* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
-{
-  int ci, mcublks, tmp;
-  jpeg_component_info *compptr;
-  
-  if (cinfo->comps_in_scan == 1) {
-    
-    /* Noninterleaved (single-component) scan */
-    compptr = cinfo->cur_comp_info[0];
-    
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = compptr->width_in_blocks;
-    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-    
-    /* For noninterleaved scan, always one block per MCU */
-    compptr->MCU_width = 1;
-    compptr->MCU_height = 1;
-    compptr->MCU_blocks = 1;
-    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
-    compptr->last_col_width = 1;
-    /* For noninterleaved scans, it is convenient to define last_row_height
-     * as the number of block rows present in the last iMCU row.
-     */
-    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-    if (tmp == 0) tmp = compptr->v_samp_factor;
-    compptr->last_row_height = tmp;
-    
-    /* Prepare array describing MCU composition */
-    cinfo->blocks_in_MCU = 1;
-    cinfo->MCU_membership[0] = 0;
-    
-  } else {
-    
-    /* Interleaved (multi-component) scan */
-    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-    
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_width,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    cinfo->MCU_rows_in_scan = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_height,
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    
-    cinfo->blocks_in_MCU = 0;
-    
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Sampling factors give # of blocks of component in each MCU */
-      compptr->MCU_width = compptr->h_samp_factor;
-      compptr->MCU_height = compptr->v_samp_factor;
-      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
-      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
-      /* Figure number of non-dummy blocks in last MCU column & row */
-      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
-      if (tmp == 0) tmp = compptr->MCU_width;
-      compptr->last_col_width = tmp;
-      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
-      if (tmp == 0) tmp = compptr->MCU_height;
-      compptr->last_row_height = tmp;
-      /* Prepare array describing MCU composition */
-      mcublks = compptr->MCU_blocks;
-      if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
-      while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
-      }
-    }
-    
-  }
-
-  /* Convert restart specified in rows to actual MCU count. */
-  /* Note that count must fit in 16 bits, so we provide limiting. */
-  if (cinfo->restart_in_rows > 0) {
-    long nominal = (long) cinfo->restart_in_rows * (long) cinfo->MCUs_per_row;
-    cinfo->restart_interval = (unsigned int) MIN(nominal, 65535L);
-  }
-}
-
-
-/*
- * Per-pass setup.
- * This is called at the beginning of each pass.  We determine which modules
- * will be active during this pass and give them appropriate start_pass calls.
- * We also set is_last_pass to indicate whether any more passes will be
- * required.
- */
-
-METHODDEF(void)
-prepare_for_pass (j_compress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  switch (master->pass_type) {
-  case main_pass:
-    /* Initial pass: will collect input data, and do either Huffman
-     * optimization or data output for the first scan.
-     */
-    select_scan_parameters(cinfo);
-    per_scan_setup(cinfo);
-    if (! cinfo->raw_data_in) {
-      (*cinfo->cconvert->start_pass) (cinfo);
-      (*cinfo->downsample->start_pass) (cinfo);
-      (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU);
-    }
-    (*cinfo->fdct->start_pass) (cinfo);
-    (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
-    (*cinfo->coef->start_pass) (cinfo,
-				(master->total_passes > 1 ?
-				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
-    (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
-    if (cinfo->optimize_coding) {
-      /* No immediate data output; postpone writing frame/scan headers */
-      master->pub.call_pass_startup = FALSE;
-    } else {
-      /* Will write frame/scan headers at first jpeg_write_scanlines call */
-      master->pub.call_pass_startup = TRUE;
-    }
-    break;
-#ifdef ENTROPY_OPT_SUPPORTED
-  case huff_opt_pass:
-    /* Do Huffman optimization for a scan after the first one. */
-    select_scan_parameters(cinfo);
-    per_scan_setup(cinfo);
-    if (cinfo->Ss != 0 || cinfo->Ah == 0) {
-      (*cinfo->entropy->start_pass) (cinfo, TRUE);
-      (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
-      master->pub.call_pass_startup = FALSE;
-      break;
-    }
-    /* Special case: Huffman DC refinement scans need no Huffman table
-     * and therefore we can skip the optimization pass for them.
-     */
-    master->pass_type = output_pass;
-    master->pass_number++;
-    /*FALLTHROUGH*/
-#endif
-  case output_pass:
-    /* Do a data-output pass. */
-    /* We need not repeat per-scan setup if prior optimization pass did it. */
-    if (! cinfo->optimize_coding) {
-      select_scan_parameters(cinfo);
-      per_scan_setup(cinfo);
-    }
-    (*cinfo->entropy->start_pass) (cinfo, FALSE);
-    (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
-    /* We emit frame/scan headers now */
-    if (master->scan_number == 0)
-      (*cinfo->marker->write_frame_header) (cinfo);
-    (*cinfo->marker->write_scan_header) (cinfo);
-    master->pub.call_pass_startup = FALSE;
-    break;
-  default:
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-  }
-
-  master->pub.is_last_pass = (master->pass_number == master->total_passes-1);
-
-  /* Set up progress monitor's pass info if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->completed_passes = master->pass_number;
-    cinfo->progress->total_passes = master->total_passes;
-  }
-}
-
-
-/*
- * Special start-of-pass hook.
- * This is called by jpeg_write_scanlines if call_pass_startup is TRUE.
- * In single-pass processing, we need this hook because we don't want to
- * write frame/scan headers during jpeg_start_compress; we want to let the
- * application write COM markers etc. between jpeg_start_compress and the
- * jpeg_write_scanlines loop.
- * In multi-pass processing, this routine is not used.
- */
-
-METHODDEF(void)
-pass_startup (j_compress_ptr cinfo)
-{
-  cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */
-
-  (*cinfo->marker->write_frame_header) (cinfo);
-  (*cinfo->marker->write_scan_header) (cinfo);
-}
-
-
-/*
- * Finish up at end of pass.
- */
-
-METHODDEF(void)
-finish_pass_master (j_compress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  /* The entropy coder always needs an end-of-pass call,
-   * either to analyze statistics or to flush its output buffer.
-   */
-  (*cinfo->entropy->finish_pass) (cinfo);
-
-  /* Update state for next pass */
-  switch (master->pass_type) {
-  case main_pass:
-    /* next pass is either output of scan 0 (after optimization)
-     * or output of scan 1 (if no optimization).
-     */
-    master->pass_type = output_pass;
-    if (! cinfo->optimize_coding)
-      master->scan_number++;
-    break;
-  case huff_opt_pass:
-    /* next pass is always output of current scan */
-    master->pass_type = output_pass;
-    break;
-  case output_pass:
-    /* next pass is either optimization or output of next scan */
-    if (cinfo->optimize_coding)
-      master->pass_type = huff_opt_pass;
-    master->scan_number++;
-    break;
-  }
-
-  master->pass_number++;
-}
-
-
-/*
- * Initialize master compression control.
- */
-
-GLOBAL(void)
-jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
-{
-  my_master_ptr master;
-
-  master = (my_master_ptr)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(my_comp_master));
-  cinfo->master = (struct jpeg_comp_master *) master;
-  master->pub.prepare_for_pass = prepare_for_pass;
-  master->pub.pass_startup = pass_startup;
-  master->pub.finish_pass = finish_pass_master;
-  master->pub.is_last_pass = FALSE;
-
-  /* Validate parameters, determine derived values */
-  initial_setup(cinfo, transcode_only);
-
-  if (cinfo->scan_info != NULL) {
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-    validate_script(cinfo);
-    if (cinfo->block_size < DCTSIZE)
-      reduce_script(cinfo);
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    cinfo->progressive_mode = FALSE;
-    cinfo->num_scans = 1;
-  }
-
-  if ((cinfo->progressive_mode || cinfo->block_size < DCTSIZE) &&
-      !cinfo->arith_code)			/*  TEMPORARY HACK ??? */
-    /* assume default tables no good for progressive or downscale mode */
-    cinfo->optimize_coding = TRUE;
-
-  /* Initialize my private state */
-  if (transcode_only) {
-    /* no main pass in transcoding */
-    if (cinfo->optimize_coding)
-      master->pass_type = huff_opt_pass;
-    else
-      master->pass_type = output_pass;
-  } else {
-    /* for normal compression, first pass is always this type: */
-    master->pass_type = main_pass;
-  }
-  master->scan_number = 0;
-  master->pass_number = 0;
-  if (cinfo->optimize_coding)
-    master->total_passes = cinfo->num_scans * 2;
-  else
-    master->total_passes = cinfo->num_scans;
-}
+/*
+ * jcmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2003-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG compressor.
+ * These routines are concerned with parameter validation, initial setup,
+ * and inter-pass control (determining the number of passes and the work 
+ * to be done in each pass).
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef enum {
+	main_pass,		/* input data, also do first output step */
+	huff_opt_pass,		/* Huffman code optimization pass */
+	output_pass		/* data output pass */
+} c_pass_type;
+
+typedef struct {
+  struct jpeg_comp_master pub;	/* public fields */
+
+  c_pass_type pass_type;	/* the type of the current pass */
+
+  int pass_number;		/* # of passes completed */
+  int total_passes;		/* total # of passes needed */
+
+  int scan_number;		/* current index in scan_info[] */
+} my_comp_master;
+
+typedef my_comp_master * my_master_ptr;
+
+
+/*
+ * Support routines that do various essential calculations.
+ */
+
+/*
+ * Compute JPEG image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+GLOBAL(void)
+jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+#ifdef DCT_SCALING_SUPPORTED
+
+  /* Sanity check on input image dimensions to prevent overflow in
+   * following calculation.
+   * We do check jpeg_width and jpeg_height in initial_setup below,
+   * but image_width and image_height can come from arbitrary data,
+   * and we need some space for multiplication by block_size.
+   */
+  if (((long) cinfo->image_width >> 24) || ((long) cinfo->image_height >> 24))
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* Compute actual JPEG image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/1 scaling */
+    cinfo->jpeg_width = cinfo->image_width * cinfo->block_size;
+    cinfo->jpeg_height = cinfo->image_height * cinfo->block_size;
+    cinfo->min_DCT_h_scaled_size = 1;
+    cinfo->min_DCT_v_scaled_size = 1;
+  } else if (cinfo->scale_num * 2 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/2 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 2L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 2L);
+    cinfo->min_DCT_h_scaled_size = 2;
+    cinfo->min_DCT_v_scaled_size = 2;
+  } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/3 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 3L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 3L);
+    cinfo->min_DCT_h_scaled_size = 3;
+    cinfo->min_DCT_v_scaled_size = 3;
+  } else if (cinfo->scale_num * 4 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/4 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 4L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 4L);
+    cinfo->min_DCT_h_scaled_size = 4;
+    cinfo->min_DCT_v_scaled_size = 4;
+  } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/5 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 5L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 5L);
+    cinfo->min_DCT_h_scaled_size = 5;
+    cinfo->min_DCT_v_scaled_size = 5;
+  } else if (cinfo->scale_num * 6 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/6 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 6L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 6L);
+    cinfo->min_DCT_h_scaled_size = 6;
+    cinfo->min_DCT_v_scaled_size = 6;
+  } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/7 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 7L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 7L);
+    cinfo->min_DCT_h_scaled_size = 7;
+    cinfo->min_DCT_v_scaled_size = 7;
+  } else if (cinfo->scale_num * 8 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/8 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 8L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 8L);
+    cinfo->min_DCT_h_scaled_size = 8;
+    cinfo->min_DCT_v_scaled_size = 8;
+  } else if (cinfo->scale_num * 9 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/9 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 9L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 9L);
+    cinfo->min_DCT_h_scaled_size = 9;
+    cinfo->min_DCT_v_scaled_size = 9;
+  } else if (cinfo->scale_num * 10 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/10 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 10L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 10L);
+    cinfo->min_DCT_h_scaled_size = 10;
+    cinfo->min_DCT_v_scaled_size = 10;
+  } else if (cinfo->scale_num * 11 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/11 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 11L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 11L);
+    cinfo->min_DCT_h_scaled_size = 11;
+    cinfo->min_DCT_v_scaled_size = 11;
+  } else if (cinfo->scale_num * 12 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/12 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 12L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 12L);
+    cinfo->min_DCT_h_scaled_size = 12;
+    cinfo->min_DCT_v_scaled_size = 12;
+  } else if (cinfo->scale_num * 13 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/13 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 13L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 13L);
+    cinfo->min_DCT_h_scaled_size = 13;
+    cinfo->min_DCT_v_scaled_size = 13;
+  } else if (cinfo->scale_num * 14 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/14 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 14L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 14L);
+    cinfo->min_DCT_h_scaled_size = 14;
+    cinfo->min_DCT_v_scaled_size = 14;
+  } else if (cinfo->scale_num * 15 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/15 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 15L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 15L);
+    cinfo->min_DCT_h_scaled_size = 15;
+    cinfo->min_DCT_v_scaled_size = 15;
+  } else {
+    /* Provide block_size/16 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 16L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 16L);
+    cinfo->min_DCT_h_scaled_size = 16;
+    cinfo->min_DCT_v_scaled_size = 16;
+  }
+
+#else /* !DCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->jpeg_width = cinfo->image_width;
+  cinfo->jpeg_height = cinfo->image_height;
+  cinfo->min_DCT_h_scaled_size = DCTSIZE;
+  cinfo->min_DCT_v_scaled_size = DCTSIZE;
+
+#endif /* DCT_SCALING_SUPPORTED */
+}
+
+
+LOCAL(void)
+jpeg_calc_trans_dimensions (j_compress_ptr cinfo)
+{
+  if (cinfo->min_DCT_h_scaled_size != cinfo->min_DCT_v_scaled_size)
+    ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+	     cinfo->min_DCT_h_scaled_size, cinfo->min_DCT_v_scaled_size);
+
+  cinfo->block_size = cinfo->min_DCT_h_scaled_size;
+}
+
+
+LOCAL(void)
+initial_setup (j_compress_ptr cinfo, boolean transcode_only)
+/* Do computations that are needed before master selection phase */
+{
+  int ci, ssize;
+  jpeg_component_info *compptr;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  if (transcode_only)
+    jpeg_calc_trans_dimensions(cinfo);
+  else
+    jpeg_calc_jpeg_dimensions(cinfo);
+
+  /* Sanity check on block_size */
+  if (cinfo->block_size < 1 || cinfo->block_size > 16)
+    ERREXIT2(cinfo, JERR_BAD_DCTSIZE, cinfo->block_size, cinfo->block_size);
+
+  /* Derive natural_order from block_size */
+  switch (cinfo->block_size) {
+  case 2: cinfo->natural_order = jpeg_natural_order2; break;
+  case 3: cinfo->natural_order = jpeg_natural_order3; break;
+  case 4: cinfo->natural_order = jpeg_natural_order4; break;
+  case 5: cinfo->natural_order = jpeg_natural_order5; break;
+  case 6: cinfo->natural_order = jpeg_natural_order6; break;
+  case 7: cinfo->natural_order = jpeg_natural_order7; break;
+  default: cinfo->natural_order = jpeg_natural_order; break;
+  }
+
+  /* Derive lim_Se from block_size */
+  cinfo->lim_Se = cinfo->block_size < DCTSIZE ?
+    cinfo->block_size * cinfo->block_size - 1 : DCTSIZE2-1;
+
+  /* Sanity check on image dimensions */
+  if (cinfo->jpeg_height <= 0 || cinfo->jpeg_width <= 0 ||
+      cinfo->num_components <= 0 || cinfo->input_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->jpeg_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->jpeg_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* Width of an input scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Fill in the correct component_index value; don't rely on application */
+    compptr->component_index = ci;
+    /* In selecting the actual DCT scaling for each component, we try to
+     * scale down the chroma components via DCT scaling rather than downsampling.
+     * This saves time if the downsampler gets to use 1:1 scaling.
+     * Note this code adapts subsampling ratios which are powers of 2.
+     */
+    ssize = 1;
+#ifdef DCT_SCALING_SUPPORTED
+    while (cinfo->min_DCT_h_scaled_size * ssize <=
+	   (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
+	   (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) == 0) {
+      ssize = ssize * 2;
+    }
+#endif
+    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
+    ssize = 1;
+#ifdef DCT_SCALING_SUPPORTED
+    while (cinfo->min_DCT_v_scaled_size * ssize <=
+	   (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
+	   (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) == 0) {
+      ssize = ssize * 2;
+    }
+#endif
+    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
+
+    /* We don't support DCT ratios larger than 2. */
+    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
+	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
+    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
+	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
+
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_width *
+		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_height *
+		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    /* Mark component needed (this flag isn't actually used for compression) */
+    compptr->component_needed = TRUE;
+  }
+
+  /* Compute number of fully interleaved MCU rows (number of times that
+   * main controller will call coefficient controller).
+   */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->jpeg_height,
+		  (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+}
+
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+
+LOCAL(void)
+validate_script (j_compress_ptr cinfo)
+/* Verify that the scan script in cinfo->scan_info[] is valid; also
+ * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
+ */
+{
+  const jpeg_scan_info * scanptr;
+  int scanno, ncomps, ci, coefi, thisi;
+  int Ss, Se, Ah, Al;
+  boolean component_sent[MAX_COMPONENTS];
+#ifdef C_PROGRESSIVE_SUPPORTED
+  int * last_bitpos_ptr;
+  int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
+  /* -1 until that coefficient has been seen; then last Al for it */
+#endif
+
+  if (cinfo->num_scans <= 0)
+    ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
+
+  /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
+   * for progressive JPEG, no scan can have this.
+   */
+  scanptr = cinfo->scan_info;
+  if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2-1) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    cinfo->progressive_mode = TRUE;
+    last_bitpos_ptr = & last_bitpos[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      for (coefi = 0; coefi < DCTSIZE2; coefi++)
+	*last_bitpos_ptr++ = -1;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      component_sent[ci] = FALSE;
+  }
+
+  for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
+    /* Validate component indexes */
+    ncomps = scanptr->comps_in_scan;
+    if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, ncomps, MAX_COMPS_IN_SCAN);
+    for (ci = 0; ci < ncomps; ci++) {
+      thisi = scanptr->component_index[ci];
+      if (thisi < 0 || thisi >= cinfo->num_components)
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+      /* Components must appear in SOF order within each scan */
+      if (ci > 0 && thisi <= scanptr->component_index[ci-1])
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+    }
+    /* Validate progression parameters */
+    Ss = scanptr->Ss;
+    Se = scanptr->Se;
+    Ah = scanptr->Ah;
+    Al = scanptr->Al;
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      /* The JPEG spec simply gives the ranges 0..13 for Ah and Al, but that
+       * seems wrong: the upper bound ought to depend on data precision.
+       * Perhaps they really meant 0..N+1 for N-bit precision.
+       * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
+       * out-of-range reconstructed DC values during the first DC scan,
+       * which might cause problems for some decoders.
+       */
+#if BITS_IN_JSAMPLE == 8
+#define MAX_AH_AL 10
+#else
+#define MAX_AH_AL 13
+#endif
+      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
+	  Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      if (Ss == 0) {
+	if (Se != 0)		/* DC and AC together not OK */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      } else {
+	if (ncomps != 1)	/* AC scans must be for only one component */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      }
+      for (ci = 0; ci < ncomps; ci++) {
+	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
+	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	for (coefi = Ss; coefi <= Se; coefi++) {
+	  if (last_bitpos_ptr[coefi] < 0) {
+	    /* first scan of this coefficient */
+	    if (Ah != 0)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  } else {
+	    /* not first scan */
+	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  }
+	  last_bitpos_ptr[coefi] = Al;
+	}
+      }
+#endif
+    } else {
+      /* For sequential JPEG, all progression parameters must be these: */
+      if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      /* Make sure components are not sent twice */
+      for (ci = 0; ci < ncomps; ci++) {
+	thisi = scanptr->component_index[ci];
+	if (component_sent[thisi])
+	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+	component_sent[thisi] = TRUE;
+      }
+    }
+  }
+
+  /* Now verify that everything got sent. */
+  if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    /* For progressive mode, we only check that at least some DC data
+     * got sent for each component; the spec does not require that all bits
+     * of all coefficients be transmitted.  Would it be wiser to enforce
+     * transmission of all coefficient bits??
+     */
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (last_bitpos[ci][0] < 0)
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+#endif
+  } else {
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (! component_sent[ci])
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+  }
+}
+
+
+LOCAL(void)
+reduce_script (j_compress_ptr cinfo)
+/* Adapt scan script for use with reduced block size;
+ * assume that script has been validated before.
+ */
+{
+  jpeg_scan_info * scanptr;
+  int idxout, idxin;
+
+  /* Circumvent const declaration for this function */
+  scanptr = (jpeg_scan_info *) cinfo->scan_info;
+  idxout = 0;
+
+  for (idxin = 0; idxin < cinfo->num_scans; idxin++) {
+    /* After skipping, idxout becomes smaller than idxin */
+    if (idxin != idxout)
+      /* Copy rest of data;
+       * note we stay in given chunk of allocated memory.
+       */
+      scanptr[idxout] = scanptr[idxin];
+    if (scanptr[idxout].Ss > cinfo->lim_Se)
+      /* Entire scan out of range - skip this entry */
+      continue;
+    if (scanptr[idxout].Se > cinfo->lim_Se)
+      /* Limit scan to end of block */
+      scanptr[idxout].Se = cinfo->lim_Se;
+    idxout++;
+  }
+
+  cinfo->num_scans = idxout;
+}
+
+#endif /* C_MULTISCAN_FILES_SUPPORTED */
+
+
+LOCAL(void)
+select_scan_parameters (j_compress_ptr cinfo)
+/* Set up the scan parameters for the current scan */
+{
+  int ci;
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+  if (cinfo->scan_info != NULL) {
+    /* Prepare for current scan --- the script is already validated */
+    my_master_ptr master = (my_master_ptr) cinfo->master;
+    const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number;
+
+    cinfo->comps_in_scan = scanptr->comps_in_scan;
+    for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
+      cinfo->cur_comp_info[ci] =
+	&cinfo->comp_info[scanptr->component_index[ci]];
+    }
+    if (cinfo->progressive_mode) {
+      cinfo->Ss = scanptr->Ss;
+      cinfo->Se = scanptr->Se;
+      cinfo->Ah = scanptr->Ah;
+      cinfo->Al = scanptr->Al;
+      return;
+    }
+  }
+  else
+#endif
+  {
+    /* Prepare for single sequential-JPEG scan containing all components */
+    if (cinfo->num_components > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPS_IN_SCAN);
+    cinfo->comps_in_scan = cinfo->num_components;
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
+    }
+  }
+  cinfo->Ss = 0;
+  cinfo->Se = cinfo->block_size * cinfo->block_size - 1;
+  cinfo->Ah = 0;
+  cinfo->Al = 0;
+}
+
+
+LOCAL(void)
+per_scan_setup (j_compress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  
+  if (cinfo->comps_in_scan == 1) {
+    
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+    
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+    
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+    
+  } else {
+    
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_width,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_height,
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    
+    cinfo->blocks_in_MCU = 0;
+    
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+    
+  }
+
+  /* Convert restart specified in rows to actual MCU count. */
+  /* Note that count must fit in 16 bits, so we provide limiting. */
+  if (cinfo->restart_in_rows > 0) {
+    long nominal = (long) cinfo->restart_in_rows * (long) cinfo->MCUs_per_row;
+    cinfo->restart_interval = (unsigned int) MIN(nominal, 65535L);
+  }
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each pass.  We determine which modules
+ * will be active during this pass and give them appropriate start_pass calls.
+ * We also set is_last_pass to indicate whether any more passes will be
+ * required.
+ */
+
+METHODDEF(void)
+prepare_for_pass (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  switch (master->pass_type) {
+  case main_pass:
+    /* Initial pass: will collect input data, and do either Huffman
+     * optimization or data output for the first scan.
+     */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (! cinfo->raw_data_in) {
+      (*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->downsample->start_pass) (cinfo);
+      (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+    (*cinfo->fdct->start_pass) (cinfo);
+    (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
+    (*cinfo->coef->start_pass) (cinfo,
+				(master->total_passes > 1 ?
+				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+    (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    if (cinfo->optimize_coding) {
+      /* No immediate data output; postpone writing frame/scan headers */
+      master->pub.call_pass_startup = FALSE;
+    } else {
+      /* Will write frame/scan headers at first jpeg_write_scanlines call */
+      master->pub.call_pass_startup = TRUE;
+    }
+    break;
+#ifdef ENTROPY_OPT_SUPPORTED
+  case huff_opt_pass:
+    /* Do Huffman optimization for a scan after the first one. */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (cinfo->Ss != 0 || cinfo->Ah == 0) {
+      (*cinfo->entropy->start_pass) (cinfo, TRUE);
+      (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+      master->pub.call_pass_startup = FALSE;
+      break;
+    }
+    /* Special case: Huffman DC refinement scans need no Huffman table
+     * and therefore we can skip the optimization pass for them.
+     */
+    master->pass_type = output_pass;
+    master->pass_number++;
+    /*FALLTHROUGH*/
+#endif
+  case output_pass:
+    /* Do a data-output pass. */
+    /* We need not repeat per-scan setup if prior optimization pass did it. */
+    if (! cinfo->optimize_coding) {
+      select_scan_parameters(cinfo);
+      per_scan_setup(cinfo);
+    }
+    (*cinfo->entropy->start_pass) (cinfo, FALSE);
+    (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+    /* We emit frame/scan headers now */
+    if (master->scan_number == 0)
+      (*cinfo->marker->write_frame_header) (cinfo);
+    (*cinfo->marker->write_scan_header) (cinfo);
+    master->pub.call_pass_startup = FALSE;
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+  }
+
+  master->pub.is_last_pass = (master->pass_number == master->total_passes-1);
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->total_passes;
+  }
+}
+
+
+/*
+ * Special start-of-pass hook.
+ * This is called by jpeg_write_scanlines if call_pass_startup is TRUE.
+ * In single-pass processing, we need this hook because we don't want to
+ * write frame/scan headers during jpeg_start_compress; we want to let the
+ * application write COM markers etc. between jpeg_start_compress and the
+ * jpeg_write_scanlines loop.
+ * In multi-pass processing, this routine is not used.
+ */
+
+METHODDEF(void)
+pass_startup (j_compress_ptr cinfo)
+{
+  cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */
+
+  (*cinfo->marker->write_frame_header) (cinfo);
+  (*cinfo->marker->write_scan_header) (cinfo);
+}
+
+
+/*
+ * Finish up at end of pass.
+ */
+
+METHODDEF(void)
+finish_pass_master (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* The entropy coder always needs an end-of-pass call,
+   * either to analyze statistics or to flush its output buffer.
+   */
+  (*cinfo->entropy->finish_pass) (cinfo);
+
+  /* Update state for next pass */
+  switch (master->pass_type) {
+  case main_pass:
+    /* next pass is either output of scan 0 (after optimization)
+     * or output of scan 1 (if no optimization).
+     */
+    master->pass_type = output_pass;
+    if (! cinfo->optimize_coding)
+      master->scan_number++;
+    break;
+  case huff_opt_pass:
+    /* next pass is always output of current scan */
+    master->pass_type = output_pass;
+    break;
+  case output_pass:
+    /* next pass is either optimization or output of next scan */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    master->scan_number++;
+    break;
+  }
+
+  master->pass_number++;
+}
+
+
+/*
+ * Initialize master compression control.
+ */
+
+GLOBAL(void)
+jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(my_comp_master));
+  cinfo->master = (struct jpeg_comp_master *) master;
+  master->pub.prepare_for_pass = prepare_for_pass;
+  master->pub.pass_startup = pass_startup;
+  master->pub.finish_pass = finish_pass_master;
+  master->pub.is_last_pass = FALSE;
+
+  /* Validate parameters, determine derived values */
+  initial_setup(cinfo, transcode_only);
+
+  if (cinfo->scan_info != NULL) {
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+    validate_script(cinfo);
+    if (cinfo->block_size < DCTSIZE)
+      reduce_script(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    cinfo->num_scans = 1;
+  }
+
+  if ((cinfo->progressive_mode || cinfo->block_size < DCTSIZE) &&
+      !cinfo->arith_code)			/*  TEMPORARY HACK ??? */
+    /* assume default tables no good for progressive or downscale mode */
+    cinfo->optimize_coding = TRUE;
+
+  /* Initialize my private state */
+  if (transcode_only) {
+    /* no main pass in transcoding */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    else
+      master->pass_type = output_pass;
+  } else {
+    /* for normal compression, first pass is always this type: */
+    master->pass_type = main_pass;
+  }
+  master->scan_number = 0;
+  master->pass_number = 0;
+  if (cinfo->optimize_coding)
+    master->total_passes = cinfo->num_scans * 2;
+  else
+    master->total_passes = cinfo->num_scans;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jcomapi.c b/plugins/AdvaImg/src/LibJPEG/jcomapi.c
index 9b1fa7568a..1b1a340c1c 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcomapi.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcomapi.c
@@ -1,106 +1,106 @@
-/*
- * jcomapi.c
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface routines that are used for both
- * compression and decompression.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Abort processing of a JPEG compression or decompression operation,
- * but don't destroy the object itself.
- *
- * For this, we merely clean up all the nonpermanent memory pools.
- * Note that temp files (virtual arrays) are not allowed to belong to
- * the permanent pool, so we will be able to close all temp files here.
- * Closing a data source or destination, if necessary, is the application's
- * responsibility.
- */
-
-GLOBAL(void)
-jpeg_abort (j_common_ptr cinfo)
-{
-  int pool;
-
-  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
-  if (cinfo->mem == NULL)
-    return;
-
-  /* Releasing pools in reverse order might help avoid fragmentation
-   * with some (brain-damaged) malloc libraries.
-   */
-  for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) {
-    (*cinfo->mem->free_pool) (cinfo, pool);
-  }
-
-  /* Reset overall state for possible reuse of object */
-  if (cinfo->is_decompressor) {
-    cinfo->global_state = DSTATE_START;
-    /* Try to keep application from accessing now-deleted marker list.
-     * A bit kludgy to do it here, but this is the most central place.
-     */
-    ((j_decompress_ptr) cinfo)->marker_list = NULL;
-  } else {
-    cinfo->global_state = CSTATE_START;
-  }
-}
-
-
-/*
- * Destruction of a JPEG object.
- *
- * Everything gets deallocated except the master jpeg_compress_struct itself
- * and the error manager struct.  Both of these are supplied by the application
- * and must be freed, if necessary, by the application.  (Often they are on
- * the stack and so don't need to be freed anyway.)
- * Closing a data source or destination, if necessary, is the application's
- * responsibility.
- */
-
-GLOBAL(void)
-jpeg_destroy (j_common_ptr cinfo)
-{
-  /* We need only tell the memory manager to release everything. */
-  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
-  if (cinfo->mem != NULL)
-    (*cinfo->mem->self_destruct) (cinfo);
-  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
-  cinfo->global_state = 0;	/* mark it destroyed */
-}
-
-
-/*
- * Convenience routines for allocating quantization and Huffman tables.
- * (Would jutils.c be a more reasonable place to put these?)
- */
-
-GLOBAL(JQUANT_TBL *)
-jpeg_alloc_quant_table (j_common_ptr cinfo)
-{
-  JQUANT_TBL *tbl;
-
-  tbl = (JQUANT_TBL *)
-    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
-  return tbl;
-}
-
-
-GLOBAL(JHUFF_TBL *)
-jpeg_alloc_huff_table (j_common_ptr cinfo)
-{
-  JHUFF_TBL *tbl;
-
-  tbl = (JHUFF_TBL *)
-    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
-  return tbl;
-}
+/*
+ * jcomapi.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface routines that are used for both
+ * compression and decompression.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Abort processing of a JPEG compression or decompression operation,
+ * but don't destroy the object itself.
+ *
+ * For this, we merely clean up all the nonpermanent memory pools.
+ * Note that temp files (virtual arrays) are not allowed to belong to
+ * the permanent pool, so we will be able to close all temp files here.
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_abort (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
+  if (cinfo->mem == NULL)
+    return;
+
+  /* Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) {
+    (*cinfo->mem->free_pool) (cinfo, pool);
+  }
+
+  /* Reset overall state for possible reuse of object */
+  if (cinfo->is_decompressor) {
+    cinfo->global_state = DSTATE_START;
+    /* Try to keep application from accessing now-deleted marker list.
+     * A bit kludgy to do it here, but this is the most central place.
+     */
+    ((j_decompress_ptr) cinfo)->marker_list = NULL;
+  } else {
+    cinfo->global_state = CSTATE_START;
+  }
+}
+
+
+/*
+ * Destruction of a JPEG object.
+ *
+ * Everything gets deallocated except the master jpeg_compress_struct itself
+ * and the error manager struct.  Both of these are supplied by the application
+ * and must be freed, if necessary, by the application.  (Often they are on
+ * the stack and so don't need to be freed anyway.)
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_destroy (j_common_ptr cinfo)
+{
+  /* We need only tell the memory manager to release everything. */
+  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
+  if (cinfo->mem != NULL)
+    (*cinfo->mem->self_destruct) (cinfo);
+  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
+  cinfo->global_state = 0;	/* mark it destroyed */
+}
+
+
+/*
+ * Convenience routines for allocating quantization and Huffman tables.
+ * (Would jutils.c be a more reasonable place to put these?)
+ */
+
+GLOBAL(JQUANT_TBL *)
+jpeg_alloc_quant_table (j_common_ptr cinfo)
+{
+  JQUANT_TBL *tbl;
+
+  tbl = (JQUANT_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
+
+
+GLOBAL(JHUFF_TBL *)
+jpeg_alloc_huff_table (j_common_ptr cinfo)
+{
+  JHUFF_TBL *tbl;
+
+  tbl = (JHUFF_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jconfig.h b/plugins/AdvaImg/src/LibJPEG/jconfig.h
index 44e5c52ee5..4a321ed276 100644
--- a/plugins/AdvaImg/src/LibJPEG/jconfig.h
+++ b/plugins/AdvaImg/src/LibJPEG/jconfig.h
@@ -1,47 +1,45 @@
-/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 95 or NT. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#ifndef HAVE_STDLIB_H
-#define HAVE_STDLIB_H
-#endif
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS	/* we presume a 32-bit flat memory model */
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-/* Define "boolean" as unsigned char, not int, per Windows custom */
-#ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
-typedef unsigned char boolean;
-#endif
-#define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
-
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define TWO_FILE_COMMANDLINE	/* optional */
-#define USE_SETMODE		/* Microsoft has setmode() */
-#undef NEED_SIGNAL_CATCHER
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
+/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 95 or NT. */
+/* see jconfig.txt for explanations */
+
+#define HAVE_PROTOTYPES
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+/* #define void char */
+/* #define const */
+#undef CHAR_IS_UNSIGNED
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+#undef NEED_BSD_STRINGS
+#undef NEED_SYS_TYPES_H
+#undef NEED_FAR_POINTERS	/* we presume a 32-bit flat memory model */
+#undef NEED_SHORT_EXTERNAL_NAMES
+#undef INCOMPLETE_TYPES_BROKEN
+
+/* Define "boolean" as unsigned char, not enum, per Windows custom */
+#ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
+typedef unsigned char boolean;
+#endif
+#define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
+
+
+#ifdef JPEG_INTERNALS
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+#endif /* JPEG_INTERNALS */
+
+#ifdef JPEG_CJPEG_DJPEG
+
+#define BMP_SUPPORTED		/* BMP image file format */
+#define GIF_SUPPORTED		/* GIF image file format */
+#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED		/* Utah RLE image file format */
+#define TARGA_SUPPORTED		/* Targa image file format */
+
+#define TWO_FILE_COMMANDLINE	/* optional */
+#define USE_SETMODE		/* Microsoft has setmode() */
+#undef NEED_SIGNAL_CATCHER
+#undef DONT_USE_B_MODE
+#undef PROGRESS_REPORT		/* optional */
+
+#endif /* JPEG_CJPEG_DJPEG */
diff --git a/plugins/AdvaImg/src/LibJPEG/jcparam.c b/plugins/AdvaImg/src/LibJPEG/jcparam.c
index c5e85dda55..f440bc9d0e 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcparam.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcparam.c
@@ -1,632 +1,637 @@
-/*
- * jcparam.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2008 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains optional default-setting code for the JPEG compressor.
- * Applications do not have to use this file, but those that don't use it
- * must know a lot more about the innards of the JPEG code.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Quantization table setup routines
- */
-
-GLOBAL(void)
-jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
-		      const unsigned int *basic_table,
-		      int scale_factor, boolean force_baseline)
-/* Define a quantization table equal to the basic_table times
- * a scale factor (given as a percentage).
- * If force_baseline is TRUE, the computed quantization table entries
- * are limited to 1..255 for JPEG baseline compatibility.
- */
-{
-  JQUANT_TBL ** qtblptr;
-  int i;
-  long temp;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS)
-    ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl);
-
-  qtblptr = & cinfo->quant_tbl_ptrs[which_tbl];
-
-  if (*qtblptr == NULL)
-    *qtblptr = jpeg_alloc_quant_table((j_common_ptr) cinfo);
-
-  for (i = 0; i < DCTSIZE2; i++) {
-    temp = ((long) basic_table[i] * scale_factor + 50L) / 100L;
-    /* limit the values to the valid range */
-    if (temp <= 0L) temp = 1L;
-    if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
-    if (force_baseline && temp > 255L)
-      temp = 255L;		/* limit to baseline range if requested */
-    (*qtblptr)->quantval[i] = (UINT16) temp;
-  }
-
-  /* Initialize sent_table FALSE so table will be written to JPEG file. */
-  (*qtblptr)->sent_table = FALSE;
-}
-
-
-/* These are the sample quantization tables given in JPEG spec section K.1.
- * The spec says that the values given produce "good" quality, and
- * when divided by 2, "very good" quality.
- */
-static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
-  16,  11,  10,  16,  24,  40,  51,  61,
-  12,  12,  14,  19,  26,  58,  60,  55,
-  14,  13,  16,  24,  40,  57,  69,  56,
-  14,  17,  22,  29,  51,  87,  80,  62,
-  18,  22,  37,  56,  68, 109, 103,  77,
-  24,  35,  55,  64,  81, 104, 113,  92,
-  49,  64,  78,  87, 103, 121, 120, 101,
-  72,  92,  95,  98, 112, 100, 103,  99
-};
-static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
-  17,  18,  24,  47,  99,  99,  99,  99,
-  18,  21,  26,  66,  99,  99,  99,  99,
-  24,  26,  56,  99,  99,  99,  99,  99,
-  47,  66,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99
-};
-
-
-GLOBAL(void)
-jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
-/* Set or change the 'quality' (quantization) setting, using default tables
- * and straight percentage-scaling quality scales.
- * This entry point allows different scalings for luminance and chrominance.
- */
-{
-  /* Set up two quantization tables using the specified scaling */
-  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       cinfo->q_scale_factor[0], force_baseline);
-  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       cinfo->q_scale_factor[1], force_baseline);
-}
-
-
-GLOBAL(void)
-jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
-			 boolean force_baseline)
-/* Set or change the 'quality' (quantization) setting, using default tables
- * and a straight percentage-scaling quality scale.  In most cases it's better
- * to use jpeg_set_quality (below); this entry point is provided for
- * applications that insist on a linear percentage scaling.
- */
-{
-  /* Set up two quantization tables using the specified scaling */
-  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       scale_factor, force_baseline);
-  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       scale_factor, force_baseline);
-}
-
-
-GLOBAL(int)
-jpeg_quality_scaling (int quality)
-/* Convert a user-specified quality rating to a percentage scaling factor
- * for an underlying quantization table, using our recommended scaling curve.
- * The input 'quality' factor should be 0 (terrible) to 100 (very good).
- */
-{
-  /* Safety limit on quality factor.  Convert 0 to 1 to avoid zero divide. */
-  if (quality <= 0) quality = 1;
-  if (quality > 100) quality = 100;
-
-  /* The basic table is used as-is (scaling 100) for a quality of 50.
-   * Qualities 50..100 are converted to scaling percentage 200 - 2*Q;
-   * note that at Q=100 the scaling is 0, which will cause jpeg_add_quant_table
-   * to make all the table entries 1 (hence, minimum quantization loss).
-   * Qualities 1..50 are converted to scaling percentage 5000/Q.
-   */
-  if (quality < 50)
-    quality = 5000 / quality;
-  else
-    quality = 200 - quality*2;
-
-  return quality;
-}
-
-
-GLOBAL(void)
-jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
-/* Set or change the 'quality' (quantization) setting, using default tables.
- * This is the standard quality-adjusting entry point for typical user
- * interfaces; only those who want detailed control over quantization tables
- * would use the preceding three routines directly.
- */
-{
-  /* Convert user 0-100 rating to percentage scaling */
-  quality = jpeg_quality_scaling(quality);
-
-  /* Set up standard quality tables */
-  jpeg_set_linear_quality(cinfo, quality, force_baseline);
-}
-
-
-/*
- * Huffman table setup routines
- */
-
-LOCAL(void)
-add_huff_table (j_compress_ptr cinfo,
-		JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val)
-/* Define a Huffman table */
-{
-  int nsymbols, len;
-
-  if (*htblptr == NULL)
-    *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-
-  /* Copy the number-of-symbols-of-each-code-length counts */
-  MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
-
-  /* Validate the counts.  We do this here mainly so we can copy the right
-   * number of symbols from the val[] array, without risking marching off
-   * the end of memory.  jchuff.c will do a more thorough test later.
-   */
-  nsymbols = 0;
-  for (len = 1; len <= 16; len++)
-    nsymbols += bits[len];
-  if (nsymbols < 1 || nsymbols > 256)
-    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-
-  MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8));
-
-  /* Initialize sent_table FALSE so table will be written to JPEG file. */
-  (*htblptr)->sent_table = FALSE;
-}
-
-
-LOCAL(void)
-std_huff_tables (j_compress_ptr cinfo)
-/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
-/* IMPORTANT: these are only valid for 8-bit data precision! */
-{
-  static const UINT8 bits_dc_luminance[17] =
-    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
-  static const UINT8 val_dc_luminance[] =
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-  
-  static const UINT8 bits_dc_chrominance[17] =
-    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
-  static const UINT8 val_dc_chrominance[] =
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-  
-  static const UINT8 bits_ac_luminance[17] =
-    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
-  static const UINT8 val_ac_luminance[] =
-    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
-      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
-      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
-      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
-      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
-      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
-      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
-      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
-      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
-      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
-      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
-      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
-      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
-      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
-      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
-      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-      0xf9, 0xfa };
-  
-  static const UINT8 bits_ac_chrominance[17] =
-    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
-  static const UINT8 val_ac_chrominance[] =
-    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
-      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
-      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
-      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
-      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
-      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
-      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
-      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
-      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
-      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
-      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
-      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
-      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
-      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
-      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
-      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
-      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
-      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-      0xf9, 0xfa };
-  
-  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[0],
-		 bits_dc_luminance, val_dc_luminance);
-  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[0],
-		 bits_ac_luminance, val_ac_luminance);
-  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[1],
-		 bits_dc_chrominance, val_dc_chrominance);
-  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[1],
-		 bits_ac_chrominance, val_ac_chrominance);
-}
-
-
-/*
- * Default parameter setup for compression.
- *
- * Applications that don't choose to use this routine must do their
- * own setup of all these parameters.  Alternately, you can call this
- * to establish defaults and then alter parameters selectively.  This
- * is the recommended approach since, if we add any new parameters,
- * your code will still work (they'll be set to reasonable defaults).
- */
-
-GLOBAL(void)
-jpeg_set_defaults (j_compress_ptr cinfo)
-{
-  int i;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* Allocate comp_info array large enough for maximum component count.
-   * Array is made permanent in case application wants to compress
-   * multiple images at same param settings.
-   */
-  if (cinfo->comp_info == NULL)
-    cinfo->comp_info = (jpeg_component_info *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
-
-  /* Initialize everything not dependent on the color space */
-
-  cinfo->scale_num = 1;		/* 1:1 scaling */
-  cinfo->scale_denom = 1;
-  cinfo->data_precision = BITS_IN_JSAMPLE;
-  /* Set up two quantization tables using default quality of 75 */
-  jpeg_set_quality(cinfo, 75, TRUE);
-  /* Set up two Huffman tables */
-  std_huff_tables(cinfo);
-
-  /* Initialize default arithmetic coding conditioning */
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    cinfo->arith_dc_L[i] = 0;
-    cinfo->arith_dc_U[i] = 1;
-    cinfo->arith_ac_K[i] = 5;
-  }
-
-  /* Default is no multiple-scan output */
-  cinfo->scan_info = NULL;
-  cinfo->num_scans = 0;
-
-  /* Expect normal source image, not raw downsampled data */
-  cinfo->raw_data_in = FALSE;
-
-  /* Use Huffman coding, not arithmetic coding, by default */
-  cinfo->arith_code = FALSE;
-
-  /* By default, don't do extra passes to optimize entropy coding */
-  cinfo->optimize_coding = FALSE;
-  /* The standard Huffman tables are only valid for 8-bit data precision.
-   * If the precision is higher, force optimization on so that usable
-   * tables will be computed.  This test can be removed if default tables
-   * are supplied that are valid for the desired precision.
-   */
-  if (cinfo->data_precision > 8)
-    cinfo->optimize_coding = TRUE;
-
-  /* By default, use the simpler non-cosited sampling alignment */
-  cinfo->CCIR601_sampling = FALSE;
-
-  /* By default, apply fancy downsampling */
-  cinfo->do_fancy_downsampling = TRUE;
-
-  /* No input smoothing */
-  cinfo->smoothing_factor = 0;
-
-  /* DCT algorithm preference */
-  cinfo->dct_method = JDCT_DEFAULT;
-
-  /* No restart markers */
-  cinfo->restart_interval = 0;
-  cinfo->restart_in_rows = 0;
-
-  /* Fill in default JFIF marker parameters.  Note that whether the marker
-   * will actually be written is determined by jpeg_set_colorspace.
-   *
-   * By default, the library emits JFIF version code 1.01.
-   * An application that wants to emit JFIF 1.02 extension markers should set
-   * JFIF_minor_version to 2.  We could probably get away with just defaulting
-   * to 1.02, but there may still be some decoders in use that will complain
-   * about that; saying 1.01 should minimize compatibility problems.
-   */
-  cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
-  cinfo->JFIF_minor_version = 1;
-  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
-  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
-  cinfo->Y_density = 1;
-
-  /* Choose JPEG colorspace based on input space, set defaults accordingly */
-
-  jpeg_default_colorspace(cinfo);
-}
-
-
-/*
- * Select an appropriate JPEG colorspace for in_color_space.
- */
-
-GLOBAL(void)
-jpeg_default_colorspace (j_compress_ptr cinfo)
-{
-  switch (cinfo->in_color_space) {
-  case JCS_GRAYSCALE:
-    jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
-    break;
-  case JCS_RGB:
-    jpeg_set_colorspace(cinfo, JCS_YCbCr);
-    break;
-  case JCS_YCbCr:
-    jpeg_set_colorspace(cinfo, JCS_YCbCr);
-    break;
-  case JCS_CMYK:
-    jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
-    break;
-  case JCS_YCCK:
-    jpeg_set_colorspace(cinfo, JCS_YCCK);
-    break;
-  case JCS_UNKNOWN:
-    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
-    break;
-  default:
-    ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-  }
-}
-
-
-/*
- * Set the JPEG colorspace, and choose colorspace-dependent default values.
- */
-
-GLOBAL(void)
-jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
-{
-  jpeg_component_info * compptr;
-  int ci;
-
-#define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl)  \
-  (compptr = &cinfo->comp_info[index], \
-   compptr->component_id = (id), \
-   compptr->h_samp_factor = (hsamp), \
-   compptr->v_samp_factor = (vsamp), \
-   compptr->quant_tbl_no = (quant), \
-   compptr->dc_tbl_no = (dctbl), \
-   compptr->ac_tbl_no = (actbl) )
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* For all colorspaces, we use Q and Huff tables 0 for luminance components,
-   * tables 1 for chrominance components.
-   */
-
-  cinfo->jpeg_color_space = colorspace;
-
-  cinfo->write_JFIF_header = FALSE; /* No marker for non-JFIF colorspaces */
-  cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
-
-  switch (colorspace) {
-  case JCS_GRAYSCALE:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->num_components = 1;
-    /* JFIF specifies component ID 1 */
-    SET_COMP(0, 1, 1,1, 0, 0,0);
-    break;
-  case JCS_RGB:
-    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
-    cinfo->num_components = 3;
-    SET_COMP(0, 0x52 /* 'R' */, 1,1, 0, 0,0);
-    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0);
-    SET_COMP(2, 0x42 /* 'B' */, 1,1, 0, 0,0);
-    break;
-  case JCS_YCbCr:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->num_components = 3;
-    /* JFIF specifies component IDs 1,2,3 */
-    /* We default to 2x2 subsamples of chrominance */
-    SET_COMP(0, 1, 2,2, 0, 0,0);
-    SET_COMP(1, 2, 1,1, 1, 1,1);
-    SET_COMP(2, 3, 1,1, 1, 1,1);
-    break;
-  case JCS_CMYK:
-    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
-    cinfo->num_components = 4;
-    SET_COMP(0, 0x43 /* 'C' */, 1,1, 0, 0,0);
-    SET_COMP(1, 0x4D /* 'M' */, 1,1, 0, 0,0);
-    SET_COMP(2, 0x59 /* 'Y' */, 1,1, 0, 0,0);
-    SET_COMP(3, 0x4B /* 'K' */, 1,1, 0, 0,0);
-    break;
-  case JCS_YCCK:
-    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
-    cinfo->num_components = 4;
-    SET_COMP(0, 1, 2,2, 0, 0,0);
-    SET_COMP(1, 2, 1,1, 1, 1,1);
-    SET_COMP(2, 3, 1,1, 1, 1,1);
-    SET_COMP(3, 4, 2,2, 0, 0,0);
-    break;
-  case JCS_UNKNOWN:
-    cinfo->num_components = cinfo->input_components;
-    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPONENTS);
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      SET_COMP(ci, ci, 1,1, 0, 0,0);
-    }
-    break;
-  default:
-    ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-  }
-}
-
-
-#ifdef C_PROGRESSIVE_SUPPORTED
-
-LOCAL(jpeg_scan_info *)
-fill_a_scan (jpeg_scan_info * scanptr, int ci,
-	     int Ss, int Se, int Ah, int Al)
-/* Support routine: generate one scan for specified component */
-{
-  scanptr->comps_in_scan = 1;
-  scanptr->component_index[0] = ci;
-  scanptr->Ss = Ss;
-  scanptr->Se = Se;
-  scanptr->Ah = Ah;
-  scanptr->Al = Al;
-  scanptr++;
-  return scanptr;
-}
-
-LOCAL(jpeg_scan_info *)
-fill_scans (jpeg_scan_info * scanptr, int ncomps,
-	    int Ss, int Se, int Ah, int Al)
-/* Support routine: generate one scan for each component */
-{
-  int ci;
-
-  for (ci = 0; ci < ncomps; ci++) {
-    scanptr->comps_in_scan = 1;
-    scanptr->component_index[0] = ci;
-    scanptr->Ss = Ss;
-    scanptr->Se = Se;
-    scanptr->Ah = Ah;
-    scanptr->Al = Al;
-    scanptr++;
-  }
-  return scanptr;
-}
-
-LOCAL(jpeg_scan_info *)
-fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al)
-/* Support routine: generate interleaved DC scan if possible, else N scans */
-{
-  int ci;
-
-  if (ncomps <= MAX_COMPS_IN_SCAN) {
-    /* Single interleaved DC scan */
-    scanptr->comps_in_scan = ncomps;
-    for (ci = 0; ci < ncomps; ci++)
-      scanptr->component_index[ci] = ci;
-    scanptr->Ss = scanptr->Se = 0;
-    scanptr->Ah = Ah;
-    scanptr->Al = Al;
-    scanptr++;
-  } else {
-    /* Noninterleaved DC scan for each component */
-    scanptr = fill_scans(scanptr, ncomps, 0, 0, Ah, Al);
-  }
-  return scanptr;
-}
-
-
-/*
- * Create a recommended progressive-JPEG script.
- * cinfo->num_components and cinfo->jpeg_color_space must be correct.
- */
-
-GLOBAL(void)
-jpeg_simple_progression (j_compress_ptr cinfo)
-{
-  int ncomps = cinfo->num_components;
-  int nscans;
-  jpeg_scan_info * scanptr;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* Figure space needed for script.  Calculation must match code below! */
-  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
-    /* Custom script for YCbCr color images. */
-    nscans = 10;
-  } else {
-    /* All-purpose script for other color spaces. */
-    if (ncomps > MAX_COMPS_IN_SCAN)
-      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
-    else
-      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
-  }
-
-  /* Allocate space for script.
-   * We need to put it in the permanent pool in case the application performs
-   * multiple compressions without changing the settings.  To avoid a memory
-   * leak if jpeg_simple_progression is called repeatedly for the same JPEG
-   * object, we try to re-use previously allocated space, and we allocate
-   * enough space to handle YCbCr even if initially asked for grayscale.
-   */
-  if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
-    cinfo->script_space_size = MAX(nscans, 10);
-    cinfo->script_space = (jpeg_scan_info *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
-  }
-  scanptr = cinfo->script_space;
-  cinfo->scan_info = scanptr;
-  cinfo->num_scans = nscans;
-
-  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
-    /* Custom script for YCbCr color images. */
-    /* Initial DC scan */
-    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
-    /* Initial AC scan: get some luma data out in a hurry */
-    scanptr = fill_a_scan(scanptr, 0, 1, 5, 0, 2);
-    /* Chroma data is too small to be worth expending many scans on */
-    scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 1);
-    scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 1);
-    /* Complete spectral selection for luma AC */
-    scanptr = fill_a_scan(scanptr, 0, 6, 63, 0, 2);
-    /* Refine next bit of luma AC */
-    scanptr = fill_a_scan(scanptr, 0, 1, 63, 2, 1);
-    /* Finish DC successive approximation */
-    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
-    /* Finish AC successive approximation */
-    scanptr = fill_a_scan(scanptr, 2, 1, 63, 1, 0);
-    scanptr = fill_a_scan(scanptr, 1, 1, 63, 1, 0);
-    /* Luma bottom bit comes last since it's usually largest scan */
-    scanptr = fill_a_scan(scanptr, 0, 1, 63, 1, 0);
-  } else {
-    /* All-purpose script for other color spaces. */
-    /* Successive approximation first pass */
-    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
-    scanptr = fill_scans(scanptr, ncomps, 1, 5, 0, 2);
-    scanptr = fill_scans(scanptr, ncomps, 6, 63, 0, 2);
-    /* Successive approximation second pass */
-    scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
-    /* Successive approximation final pass */
-    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
-    scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
-  }
-}
-
-#endif /* C_PROGRESSIVE_SUPPORTED */
+/*
+ * jcparam.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2003-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains optional default-setting code for the JPEG compressor.
+ * Applications do not have to use this file, but those that don't use it
+ * must know a lot more about the innards of the JPEG code.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Quantization table setup routines
+ */
+
+GLOBAL(void)
+jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
+		      const unsigned int *basic_table,
+		      int scale_factor, boolean force_baseline)
+/* Define a quantization table equal to the basic_table times
+ * a scale factor (given as a percentage).
+ * If force_baseline is TRUE, the computed quantization table entries
+ * are limited to 1..255 for JPEG baseline compatibility.
+ */
+{
+  JQUANT_TBL ** qtblptr;
+  int i;
+  long temp;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS)
+    ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl);
+
+  qtblptr = & cinfo->quant_tbl_ptrs[which_tbl];
+
+  if (*qtblptr == NULL)
+    *qtblptr = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    temp = ((long) basic_table[i] * scale_factor + 50L) / 100L;
+    /* limit the values to the valid range */
+    if (temp <= 0L) temp = 1L;
+    if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
+    if (force_baseline && temp > 255L)
+      temp = 255L;		/* limit to baseline range if requested */
+    (*qtblptr)->quantval[i] = (UINT16) temp;
+  }
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*qtblptr)->sent_table = FALSE;
+}
+
+
+/* These are the sample quantization tables given in JPEG spec section K.1.
+ * The spec says that the values given produce "good" quality, and
+ * when divided by 2, "very good" quality.
+ */
+static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+  16,  11,  10,  16,  24,  40,  51,  61,
+  12,  12,  14,  19,  26,  58,  60,  55,
+  14,  13,  16,  24,  40,  57,  69,  56,
+  14,  17,  22,  29,  51,  87,  80,  62,
+  18,  22,  37,  56,  68, 109, 103,  77,
+  24,  35,  55,  64,  81, 104, 113,  92,
+  49,  64,  78,  87, 103, 121, 120, 101,
+  72,  92,  95,  98, 112, 100, 103,  99
+};
+static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+  17,  18,  24,  47,  99,  99,  99,  99,
+  18,  21,  26,  66,  99,  99,  99,  99,
+  24,  26,  56,  99,  99,  99,  99,  99,
+  47,  66,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99
+};
+
+
+GLOBAL(void)
+jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and straight percentage-scaling quality scales.
+ * This entry point allows different scalings for luminance and chrominance.
+ */
+{
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+		       cinfo->q_scale_factor[0], force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+		       cinfo->q_scale_factor[1], force_baseline);
+}
+
+
+GLOBAL(void)
+jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
+			 boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and a straight percentage-scaling quality scale.  In most cases it's better
+ * to use jpeg_set_quality (below); this entry point is provided for
+ * applications that insist on a linear percentage scaling.
+ */
+{
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+		       scale_factor, force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+		       scale_factor, force_baseline);
+}
+
+
+GLOBAL(int)
+jpeg_quality_scaling (int quality)
+/* Convert a user-specified quality rating to a percentage scaling factor
+ * for an underlying quantization table, using our recommended scaling curve.
+ * The input 'quality' factor should be 0 (terrible) to 100 (very good).
+ */
+{
+  /* Safety limit on quality factor.  Convert 0 to 1 to avoid zero divide. */
+  if (quality <= 0) quality = 1;
+  if (quality > 100) quality = 100;
+
+  /* The basic table is used as-is (scaling 100) for a quality of 50.
+   * Qualities 50..100 are converted to scaling percentage 200 - 2*Q;
+   * note that at Q=100 the scaling is 0, which will cause jpeg_add_quant_table
+   * to make all the table entries 1 (hence, minimum quantization loss).
+   * Qualities 1..50 are converted to scaling percentage 5000/Q.
+   */
+  if (quality < 50)
+    quality = 5000 / quality;
+  else
+    quality = 200 - quality*2;
+
+  return quality;
+}
+
+
+GLOBAL(void)
+jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables.
+ * This is the standard quality-adjusting entry point for typical user
+ * interfaces; only those who want detailed control over quantization tables
+ * would use the preceding routines directly.
+ */
+{
+  /* Convert user 0-100 rating to percentage scaling */
+  quality = jpeg_quality_scaling(quality);
+
+  /* Set up standard quality tables */
+  jpeg_set_linear_quality(cinfo, quality, force_baseline);
+}
+
+
+/*
+ * Huffman table setup routines
+ */
+
+LOCAL(void)
+add_huff_table (j_compress_ptr cinfo,
+		JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val)
+/* Define a Huffman table */
+{
+  int nsymbols, len;
+
+  if (*htblptr == NULL)
+    *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+
+  /* Copy the number-of-symbols-of-each-code-length counts */
+  MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+
+  /* Validate the counts.  We do this here mainly so we can copy the right
+   * number of symbols from the val[] array, without risking marching off
+   * the end of memory.  jchuff.c will do a more thorough test later.
+   */
+  nsymbols = 0;
+  for (len = 1; len <= 16; len++)
+    nsymbols += bits[len];
+  if (nsymbols < 1 || nsymbols > 256)
+    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+  MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8));
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*htblptr)->sent_table = FALSE;
+}
+
+
+LOCAL(void)
+std_huff_tables (j_compress_ptr cinfo)
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+{
+  static const UINT8 bits_dc_luminance[17] =
+    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_luminance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+  
+  static const UINT8 bits_dc_chrominance[17] =
+    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_chrominance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+  
+  static const UINT8 bits_ac_luminance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+  static const UINT8 val_ac_luminance[] =
+    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+  
+  static const UINT8 bits_ac_chrominance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+  static const UINT8 val_ac_chrominance[] =
+    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+  
+  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[0],
+		 bits_dc_luminance, val_dc_luminance);
+  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[0],
+		 bits_ac_luminance, val_ac_luminance);
+  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[1],
+		 bits_dc_chrominance, val_dc_chrominance);
+  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[1],
+		 bits_ac_chrominance, val_ac_chrominance);
+}
+
+
+/*
+ * Default parameter setup for compression.
+ *
+ * Applications that don't choose to use this routine must do their
+ * own setup of all these parameters.  Alternately, you can call this
+ * to establish defaults and then alter parameters selectively.  This
+ * is the recommended approach since, if we add any new parameters,
+ * your code will still work (they'll be set to reasonable defaults).
+ */
+
+GLOBAL(void)
+jpeg_set_defaults (j_compress_ptr cinfo)
+{
+  int i;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Allocate comp_info array large enough for maximum component count.
+   * Array is made permanent in case application wants to compress
+   * multiple images at same param settings.
+   */
+  if (cinfo->comp_info == NULL)
+    cinfo->comp_info = (jpeg_component_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
+
+  /* Initialize everything not dependent on the color space */
+
+  cinfo->scale_num = 1;		/* 1:1 scaling */
+  cinfo->scale_denom = 1;
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+  /* Set up two quantization tables using default quality of 75 */
+  jpeg_set_quality(cinfo, 75, TRUE);
+  /* Set up two Huffman tables */
+  std_huff_tables(cinfo);
+
+  /* Initialize default arithmetic coding conditioning */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+
+  /* Default is no multiple-scan output */
+  cinfo->scan_info = NULL;
+  cinfo->num_scans = 0;
+
+  /* Expect normal source image, not raw downsampled data */
+  cinfo->raw_data_in = FALSE;
+
+  /* Use Huffman coding, not arithmetic coding, by default */
+  cinfo->arith_code = FALSE;
+
+  /* By default, don't do extra passes to optimize entropy coding */
+  cinfo->optimize_coding = FALSE;
+  /* The standard Huffman tables are only valid for 8-bit data precision.
+   * If the precision is higher, force optimization on so that usable
+   * tables will be computed.  This test can be removed if default tables
+   * are supplied that are valid for the desired precision.
+   */
+  if (cinfo->data_precision > 8)
+    cinfo->optimize_coding = TRUE;
+
+  /* By default, use the simpler non-cosited sampling alignment */
+  cinfo->CCIR601_sampling = FALSE;
+
+  /* By default, apply fancy downsampling */
+  cinfo->do_fancy_downsampling = TRUE;
+
+  /* No input smoothing */
+  cinfo->smoothing_factor = 0;
+
+  /* DCT algorithm preference */
+  cinfo->dct_method = JDCT_DEFAULT;
+
+  /* No restart markers */
+  cinfo->restart_interval = 0;
+  cinfo->restart_in_rows = 0;
+
+  /* Fill in default JFIF marker parameters.  Note that whether the marker
+   * will actually be written is determined by jpeg_set_colorspace.
+   *
+   * By default, the library emits JFIF version code 1.01.
+   * An application that wants to emit JFIF 1.02 extension markers should set
+   * JFIF_minor_version to 2.  We could probably get away with just defaulting
+   * to 1.02, but there may still be some decoders in use that will complain
+   * about that; saying 1.01 should minimize compatibility problems.
+   */
+  cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
+  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
+  cinfo->Y_density = 1;
+
+  /* No color transform */
+  cinfo->color_transform = JCT_NONE;
+
+  /* Choose JPEG colorspace based on input space, set defaults accordingly */
+
+  jpeg_default_colorspace(cinfo);
+}
+
+
+/*
+ * Select an appropriate JPEG colorspace for in_color_space.
+ */
+
+GLOBAL(void)
+jpeg_default_colorspace (j_compress_ptr cinfo)
+{
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
+    break;
+  case JCS_RGB:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_YCbCr:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_CMYK:
+    jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
+    break;
+  case JCS_YCCK:
+    jpeg_set_colorspace(cinfo, JCS_YCCK);
+    break;
+  case JCS_UNKNOWN:
+    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+  }
+}
+
+
+/*
+ * Set the JPEG colorspace, and choose colorspace-dependent default values.
+ */
+
+GLOBAL(void)
+jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
+{
+  jpeg_component_info * compptr;
+  int ci;
+
+#define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl)  \
+  (compptr = &cinfo->comp_info[index], \
+   compptr->component_id = (id), \
+   compptr->h_samp_factor = (hsamp), \
+   compptr->v_samp_factor = (vsamp), \
+   compptr->quant_tbl_no = (quant), \
+   compptr->dc_tbl_no = (dctbl), \
+   compptr->ac_tbl_no = (actbl) )
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* For all colorspaces, we use Q and Huff tables 0 for luminance components,
+   * tables 1 for chrominance components.
+   */
+
+  cinfo->jpeg_color_space = colorspace;
+
+  cinfo->write_JFIF_header = FALSE; /* No marker for non-JFIF colorspaces */
+  cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
+
+  switch (colorspace) {
+  case JCS_GRAYSCALE:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 1;
+    /* JFIF specifies component ID 1 */
+    SET_COMP(0, 1, 1,1, 0, 0,0);
+    break;
+  case JCS_RGB:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
+    cinfo->num_components = 3;
+    SET_COMP(0, 0x52 /* 'R' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
+    SET_COMP(2, 0x42 /* 'B' */, 1,1, 0, 0,0);
+    break;
+  case JCS_YCbCr:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 3;
+    /* JFIF specifies component IDs 1,2,3 */
+    /* We default to 2x2 subsamples of chrominance */
+    SET_COMP(0, 1, 2,2, 0, 0,0);
+    SET_COMP(1, 2, 1,1, 1, 1,1);
+    SET_COMP(2, 3, 1,1, 1, 1,1);
+    break;
+  case JCS_CMYK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 0x43 /* 'C' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x4D /* 'M' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x59 /* 'Y' */, 1,1, 0, 0,0);
+    SET_COMP(3, 0x4B /* 'K' */, 1,1, 0, 0,0);
+    break;
+  case JCS_YCCK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 1, 2,2, 0, 0,0);
+    SET_COMP(1, 2, 1,1, 1, 1,1);
+    SET_COMP(2, 3, 1,1, 1, 1,1);
+    SET_COMP(3, 4, 2,2, 0, 0,0);
+    break;
+  case JCS_UNKNOWN:
+    cinfo->num_components = cinfo->input_components;
+    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPONENTS);
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      SET_COMP(ci, ci, 1,1, 0, 0,0);
+    }
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+  }
+}
+
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+LOCAL(jpeg_scan_info *)
+fill_a_scan (jpeg_scan_info * scanptr, int ci,
+	     int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for specified component */
+{
+  scanptr->comps_in_scan = 1;
+  scanptr->component_index[0] = ci;
+  scanptr->Ss = Ss;
+  scanptr->Se = Se;
+  scanptr->Ah = Ah;
+  scanptr->Al = Al;
+  scanptr++;
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_scans (jpeg_scan_info * scanptr, int ncomps,
+	    int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for each component */
+{
+  int ci;
+
+  for (ci = 0; ci < ncomps; ci++) {
+    scanptr->comps_in_scan = 1;
+    scanptr->component_index[0] = ci;
+    scanptr->Ss = Ss;
+    scanptr->Se = Se;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  }
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al)
+/* Support routine: generate interleaved DC scan if possible, else N scans */
+{
+  int ci;
+
+  if (ncomps <= MAX_COMPS_IN_SCAN) {
+    /* Single interleaved DC scan */
+    scanptr->comps_in_scan = ncomps;
+    for (ci = 0; ci < ncomps; ci++)
+      scanptr->component_index[ci] = ci;
+    scanptr->Ss = scanptr->Se = 0;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  } else {
+    /* Noninterleaved DC scan for each component */
+    scanptr = fill_scans(scanptr, ncomps, 0, 0, Ah, Al);
+  }
+  return scanptr;
+}
+
+
+/*
+ * Create a recommended progressive-JPEG script.
+ * cinfo->num_components and cinfo->jpeg_color_space must be correct.
+ */
+
+GLOBAL(void)
+jpeg_simple_progression (j_compress_ptr cinfo)
+{
+  int ncomps = cinfo->num_components;
+  int nscans;
+  jpeg_scan_info * scanptr;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Figure space needed for script.  Calculation must match code below! */
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    nscans = 10;
+  } else {
+    /* All-purpose script for other color spaces. */
+    if (ncomps > MAX_COMPS_IN_SCAN)
+      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
+    else
+      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
+  }
+
+  /* Allocate space for script.
+   * We need to put it in the permanent pool in case the application performs
+   * multiple compressions without changing the settings.  To avoid a memory
+   * leak if jpeg_simple_progression is called repeatedly for the same JPEG
+   * object, we try to re-use previously allocated space, and we allocate
+   * enough space to handle YCbCr even if initially asked for grayscale.
+   */
+  if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
+    cinfo->script_space_size = MAX(nscans, 10);
+    cinfo->script_space = (jpeg_scan_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
+  }
+  scanptr = cinfo->script_space;
+  cinfo->scan_info = scanptr;
+  cinfo->num_scans = nscans;
+
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    /* Initial DC scan */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    /* Initial AC scan: get some luma data out in a hurry */
+    scanptr = fill_a_scan(scanptr, 0, 1, 5, 0, 2);
+    /* Chroma data is too small to be worth expending many scans on */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 1);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 1);
+    /* Complete spectral selection for luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 6, 63, 0, 2);
+    /* Refine next bit of luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 2, 1);
+    /* Finish DC successive approximation */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    /* Finish AC successive approximation */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 1, 0);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 1, 0);
+    /* Luma bottom bit comes last since it's usually largest scan */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 1, 0);
+  } else {
+    /* All-purpose script for other color spaces. */
+    /* Successive approximation first pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    scanptr = fill_scans(scanptr, ncomps, 1, 5, 0, 2);
+    scanptr = fill_scans(scanptr, ncomps, 6, 63, 0, 2);
+    /* Successive approximation second pass */
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
+    /* Successive approximation final pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
+  }
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jcprepct.c b/plugins/AdvaImg/src/LibJPEG/jcprepct.c
index be44cc4b45..00101e0b57 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcprepct.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcprepct.c
@@ -1,358 +1,358 @@
-/*
- * jcprepct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the compression preprocessing controller.
- * This controller manages the color conversion, downsampling,
- * and edge expansion steps.
- *
- * Most of the complexity here is associated with buffering input rows
- * as required by the downsampler.  See the comments at the head of
- * jcsample.c for the downsampler's needs.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* At present, jcsample.c can request context rows only for smoothing.
- * In the future, we might also need context rows for CCIR601 sampling
- * or other more-complex downsampling procedures.  The code to support
- * context rows should be compiled only if needed.
- */
-#ifdef INPUT_SMOOTHING_SUPPORTED
-#define CONTEXT_ROWS_SUPPORTED
-#endif
-
-
-/*
- * For the simple (no-context-row) case, we just need to buffer one
- * row group's worth of pixels for the downsampling step.  At the bottom of
- * the image, we pad to a full row group by replicating the last pixel row.
- * The downsampler's last output row is then replicated if needed to pad
- * out to a full iMCU row.
- *
- * When providing context rows, we must buffer three row groups' worth of
- * pixels.  Three row groups are physically allocated, but the row pointer
- * arrays are made five row groups high, with the extra pointers above and
- * below "wrapping around" to point to the last and first real row groups.
- * This allows the downsampler to access the proper context rows.
- * At the top and bottom of the image, we create dummy context rows by
- * copying the first or last real pixel row.  This copying could be avoided
- * by pointer hacking as is done in jdmainct.c, but it doesn't seem worth the
- * trouble on the compression side.
- */
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_prep_controller pub; /* public fields */
-
-  /* Downsampling input buffer.  This buffer holds color-converted data
-   * until we have enough to do a downsample step.
-   */
-  JSAMPARRAY color_buf[MAX_COMPONENTS];
-
-  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
-  int next_buf_row;		/* index of next row to store in color_buf */
-
-#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
-  int this_row_group;		/* starting row index of group to process */
-  int next_buf_stop;		/* downsample when we reach this index */
-#endif
-} my_prep_controller;
-
-typedef my_prep_controller * my_prep_ptr;
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-
-  if (pass_mode != JBUF_PASS_THRU)
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  /* Initialize total-height counter for detecting bottom of image */
-  prep->rows_to_go = cinfo->image_height;
-  /* Mark the conversion buffer empty */
-  prep->next_buf_row = 0;
-#ifdef CONTEXT_ROWS_SUPPORTED
-  /* Preset additional state variables for context mode.
-   * These aren't used in non-context mode, so we needn't test which mode.
-   */
-  prep->this_row_group = 0;
-  /* Set next_buf_stop to stop after two row groups have been read in. */
-  prep->next_buf_stop = 2 * cinfo->max_v_samp_factor;
-#endif
-}
-
-
-/*
- * Expand an image vertically from height input_rows to height output_rows,
- * by duplicating the bottom row.
- */
-
-LOCAL(void)
-expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
-		    int input_rows, int output_rows)
-{
-  register int row;
-
-  for (row = input_rows; row < output_rows; row++) {
-    jcopy_sample_rows(image_data, input_rows-1, image_data, row,
-		      1, num_cols);
-  }
-}
-
-
-/*
- * Process some data in the simple no-context case.
- *
- * Preprocessor output data is counted in "row groups".  A row group
- * is defined to be v_samp_factor sample rows of each component.
- * Downsampling will produce this much data from each max_v_samp_factor
- * input rows.
- */
-
-METHODDEF(void)
-pre_process_data (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		  JDIMENSION in_rows_avail,
-		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		  JDIMENSION out_row_groups_avail)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-  int numrows, ci;
-  JDIMENSION inrows;
-  jpeg_component_info * compptr;
-
-  while (*in_row_ctr < in_rows_avail &&
-	 *out_row_group_ctr < out_row_groups_avail) {
-    /* Do color conversion to fill the conversion buffer. */
-    inrows = in_rows_avail - *in_row_ctr;
-    numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
-    numrows = (int) MIN((JDIMENSION) numrows, inrows);
-    (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-				       prep->color_buf,
-				       (JDIMENSION) prep->next_buf_row,
-				       numrows);
-    *in_row_ctr += numrows;
-    prep->next_buf_row += numrows;
-    prep->rows_to_go -= numrows;
-    /* If at bottom of image, pad to fill the conversion buffer. */
-    if (prep->rows_to_go == 0 &&
-	prep->next_buf_row < cinfo->max_v_samp_factor) {
-      for (ci = 0; ci < cinfo->num_components; ci++) {
-	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			   prep->next_buf_row, cinfo->max_v_samp_factor);
-      }
-      prep->next_buf_row = cinfo->max_v_samp_factor;
-    }
-    /* If we've filled the conversion buffer, empty it. */
-    if (prep->next_buf_row == cinfo->max_v_samp_factor) {
-      (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf, (JDIMENSION) 0,
-					output_buf, *out_row_group_ctr);
-      prep->next_buf_row = 0;
-      (*out_row_group_ctr)++;
-    }
-    /* If at bottom of image, pad the output to a full iMCU height.
-     * Note we assume the caller is providing a one-iMCU-height output buffer!
-     */
-    if (prep->rows_to_go == 0 &&
-	*out_row_group_ctr < out_row_groups_avail) {
-      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	numrows = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-		  cinfo->min_DCT_v_scaled_size;
-	expand_bottom_edge(output_buf[ci],
-			   compptr->width_in_blocks * compptr->DCT_h_scaled_size,
-			   (int) (*out_row_group_ctr * numrows),
-			   (int) (out_row_groups_avail * numrows));
-      }
-      *out_row_group_ctr = out_row_groups_avail;
-      break;			/* can exit outer loop without test */
-    }
-  }
-}
-
-
-#ifdef CONTEXT_ROWS_SUPPORTED
-
-/*
- * Process some data in the context case.
- */
-
-METHODDEF(void)
-pre_process_context (j_compress_ptr cinfo,
-		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		     JDIMENSION in_rows_avail,
-		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		     JDIMENSION out_row_groups_avail)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-  int numrows, ci;
-  int buf_height = cinfo->max_v_samp_factor * 3;
-  JDIMENSION inrows;
-
-  while (*out_row_group_ctr < out_row_groups_avail) {
-    if (*in_row_ctr < in_rows_avail) {
-      /* Do color conversion to fill the conversion buffer. */
-      inrows = in_rows_avail - *in_row_ctr;
-      numrows = prep->next_buf_stop - prep->next_buf_row;
-      numrows = (int) MIN((JDIMENSION) numrows, inrows);
-      (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-					 prep->color_buf,
-					 (JDIMENSION) prep->next_buf_row,
-					 numrows);
-      /* Pad at top of image, if first time through */
-      if (prep->rows_to_go == cinfo->image_height) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  int row;
-	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
-	    jcopy_sample_rows(prep->color_buf[ci], 0,
-			      prep->color_buf[ci], -row,
-			      1, cinfo->image_width);
-	  }
-	}
-      }
-      *in_row_ctr += numrows;
-      prep->next_buf_row += numrows;
-      prep->rows_to_go -= numrows;
-    } else {
-      /* Return for more data, unless we are at the bottom of the image. */
-      if (prep->rows_to_go != 0)
-	break;
-      /* When at bottom of image, pad to fill the conversion buffer. */
-      if (prep->next_buf_row < prep->next_buf_stop) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			     prep->next_buf_row, prep->next_buf_stop);
-	}
-	prep->next_buf_row = prep->next_buf_stop;
-      }
-    }
-    /* If we've gotten enough data, downsample a row group. */
-    if (prep->next_buf_row == prep->next_buf_stop) {
-      (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf,
-					(JDIMENSION) prep->this_row_group,
-					output_buf, *out_row_group_ctr);
-      (*out_row_group_ctr)++;
-      /* Advance pointers with wraparound as necessary. */
-      prep->this_row_group += cinfo->max_v_samp_factor;
-      if (prep->this_row_group >= buf_height)
-	prep->this_row_group = 0;
-      if (prep->next_buf_row >= buf_height)
-	prep->next_buf_row = 0;
-      prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
-    }
-  }
-}
-
-
-/*
- * Create the wrapped-around downsampling input buffer needed for context mode.
- */
-
-LOCAL(void)
-create_context_buffer (j_compress_ptr cinfo)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-  int rgroup_height = cinfo->max_v_samp_factor;
-  int ci, i;
-  jpeg_component_info * compptr;
-  JSAMPARRAY true_buffer, fake_buffer;
-
-  /* Grab enough space for fake row pointers for all the components;
-   * we need five row groups' worth of pointers for each component.
-   */
-  fake_buffer = (JSAMPARRAY)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(cinfo->num_components * 5 * rgroup_height) *
-				SIZEOF(JSAMPROW));
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Allocate the actual buffer space (3 row groups) for this component.
-     * We make the buffer wide enough to allow the downsampler to edge-expand
-     * horizontally within the buffer, if it so chooses.
-     */
-    true_buffer = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (JDIMENSION) (((long) compptr->width_in_blocks *
-		      cinfo->min_DCT_h_scaled_size *
-		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
-       (JDIMENSION) (3 * rgroup_height));
-    /* Copy true buffer row pointers into the middle of the fake row array */
-    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
-	    3 * rgroup_height * SIZEOF(JSAMPROW));
-    /* Fill in the above and below wraparound pointers */
-    for (i = 0; i < rgroup_height; i++) {
-      fake_buffer[i] = true_buffer[2 * rgroup_height + i];
-      fake_buffer[4 * rgroup_height + i] = true_buffer[i];
-    }
-    prep->color_buf[ci] = fake_buffer + rgroup_height;
-    fake_buffer += 5 * rgroup_height; /* point to space for next component */
-  }
-}
-
-#endif /* CONTEXT_ROWS_SUPPORTED */
-
-
-/*
- * Initialize preprocessing controller.
- */
-
-GLOBAL(void)
-jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer)
-{
-  my_prep_ptr prep;
-  int ci;
-  jpeg_component_info * compptr;
-
-  if (need_full_buffer)		/* safety check */
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  prep = (my_prep_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_prep_controller));
-  cinfo->prep = (struct jpeg_c_prep_controller *) prep;
-  prep->pub.start_pass = start_pass_prep;
-
-  /* Allocate the color conversion buffer.
-   * We make the buffer wide enough to allow the downsampler to edge-expand
-   * horizontally within the buffer, if it so chooses.
-   */
-  if (cinfo->downsample->need_context_rows) {
-    /* Set up to provide context rows */
-#ifdef CONTEXT_ROWS_SUPPORTED
-    prep->pub.pre_process_data = pre_process_context;
-    create_context_buffer(cinfo);
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    /* No context, just make it tall enough for one row group */
-    prep->pub.pre_process_data = pre_process_data;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 (JDIMENSION) (((long) compptr->width_in_blocks *
-			cinfo->min_DCT_h_scaled_size *
-			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
-	 (JDIMENSION) cinfo->max_v_samp_factor);
-    }
-  }
-}
+/*
+ * jcprepct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the compression preprocessing controller.
+ * This controller manages the color conversion, downsampling,
+ * and edge expansion steps.
+ *
+ * Most of the complexity here is associated with buffering input rows
+ * as required by the downsampler.  See the comments at the head of
+ * jcsample.c for the downsampler's needs.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* At present, jcsample.c can request context rows only for smoothing.
+ * In the future, we might also need context rows for CCIR601 sampling
+ * or other more-complex downsampling procedures.  The code to support
+ * context rows should be compiled only if needed.
+ */
+#ifdef INPUT_SMOOTHING_SUPPORTED
+#define CONTEXT_ROWS_SUPPORTED
+#endif
+
+
+/*
+ * For the simple (no-context-row) case, we just need to buffer one
+ * row group's worth of pixels for the downsampling step.  At the bottom of
+ * the image, we pad to a full row group by replicating the last pixel row.
+ * The downsampler's last output row is then replicated if needed to pad
+ * out to a full iMCU row.
+ *
+ * When providing context rows, we must buffer three row groups' worth of
+ * pixels.  Three row groups are physically allocated, but the row pointer
+ * arrays are made five row groups high, with the extra pointers above and
+ * below "wrapping around" to point to the last and first real row groups.
+ * This allows the downsampler to access the proper context rows.
+ * At the top and bottom of the image, we create dummy context rows by
+ * copying the first or last real pixel row.  This copying could be avoided
+ * by pointer hacking as is done in jdmainct.c, but it doesn't seem worth the
+ * trouble on the compression side.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_prep_controller pub; /* public fields */
+
+  /* Downsampling input buffer.  This buffer holds color-converted data
+   * until we have enough to do a downsample step.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
+  int next_buf_row;		/* index of next row to store in color_buf */
+
+#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
+  int this_row_group;		/* starting row index of group to process */
+  int next_buf_stop;		/* downsample when we reach this index */
+#endif
+} my_prep_controller;
+
+typedef my_prep_controller * my_prep_ptr;
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+
+  if (pass_mode != JBUF_PASS_THRU)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Initialize total-height counter for detecting bottom of image */
+  prep->rows_to_go = cinfo->image_height;
+  /* Mark the conversion buffer empty */
+  prep->next_buf_row = 0;
+#ifdef CONTEXT_ROWS_SUPPORTED
+  /* Preset additional state variables for context mode.
+   * These aren't used in non-context mode, so we needn't test which mode.
+   */
+  prep->this_row_group = 0;
+  /* Set next_buf_stop to stop after two row groups have been read in. */
+  prep->next_buf_stop = 2 * cinfo->max_v_samp_factor;
+#endif
+}
+
+
+/*
+ * Expand an image vertically from height input_rows to height output_rows,
+ * by duplicating the bottom row.
+ */
+
+LOCAL(void)
+expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
+		    int input_rows, int output_rows)
+{
+  register int row;
+
+  for (row = input_rows; row < output_rows; row++) {
+    jcopy_sample_rows(image_data, input_rows-1, image_data, row,
+		      1, num_cols);
+  }
+}
+
+
+/*
+ * Process some data in the simple no-context case.
+ *
+ * Preprocessor output data is counted in "row groups".  A row group
+ * is defined to be v_samp_factor sample rows of each component.
+ * Downsampling will produce this much data from each max_v_samp_factor
+ * input rows.
+ */
+
+METHODDEF(void)
+pre_process_data (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		  JDIMENSION in_rows_avail,
+		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		  JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  JDIMENSION inrows;
+  jpeg_component_info * compptr;
+
+  while (*in_row_ctr < in_rows_avail &&
+	 *out_row_group_ctr < out_row_groups_avail) {
+    /* Do color conversion to fill the conversion buffer. */
+    inrows = in_rows_avail - *in_row_ctr;
+    numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
+    numrows = (int) MIN((JDIMENSION) numrows, inrows);
+    (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+				       prep->color_buf,
+				       (JDIMENSION) prep->next_buf_row,
+				       numrows);
+    *in_row_ctr += numrows;
+    prep->next_buf_row += numrows;
+    prep->rows_to_go -= numrows;
+    /* If at bottom of image, pad to fill the conversion buffer. */
+    if (prep->rows_to_go == 0 &&
+	prep->next_buf_row < cinfo->max_v_samp_factor) {
+      for (ci = 0; ci < cinfo->num_components; ci++) {
+	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			   prep->next_buf_row, cinfo->max_v_samp_factor);
+      }
+      prep->next_buf_row = cinfo->max_v_samp_factor;
+    }
+    /* If we've filled the conversion buffer, empty it. */
+    if (prep->next_buf_row == cinfo->max_v_samp_factor) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf, (JDIMENSION) 0,
+					output_buf, *out_row_group_ctr);
+      prep->next_buf_row = 0;
+      (*out_row_group_ctr)++;
+    }
+    /* If at bottom of image, pad the output to a full iMCU height.
+     * Note we assume the caller is providing a one-iMCU-height output buffer!
+     */
+    if (prep->rows_to_go == 0 &&
+	*out_row_group_ctr < out_row_groups_avail) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	numrows = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+		  cinfo->min_DCT_v_scaled_size;
+	expand_bottom_edge(output_buf[ci],
+			   compptr->width_in_blocks * compptr->DCT_h_scaled_size,
+			   (int) (*out_row_group_ctr * numrows),
+			   (int) (out_row_groups_avail * numrows));
+      }
+      *out_row_group_ctr = out_row_groups_avail;
+      break;			/* can exit outer loop without test */
+    }
+  }
+}
+
+
+#ifdef CONTEXT_ROWS_SUPPORTED
+
+/*
+ * Process some data in the context case.
+ */
+
+METHODDEF(void)
+pre_process_context (j_compress_ptr cinfo,
+		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		     JDIMENSION in_rows_avail,
+		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		     JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  int buf_height = cinfo->max_v_samp_factor * 3;
+  JDIMENSION inrows;
+
+  while (*out_row_group_ctr < out_row_groups_avail) {
+    if (*in_row_ctr < in_rows_avail) {
+      /* Do color conversion to fill the conversion buffer. */
+      inrows = in_rows_avail - *in_row_ctr;
+      numrows = prep->next_buf_stop - prep->next_buf_row;
+      numrows = (int) MIN((JDIMENSION) numrows, inrows);
+      (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+					 prep->color_buf,
+					 (JDIMENSION) prep->next_buf_row,
+					 numrows);
+      /* Pad at top of image, if first time through */
+      if (prep->rows_to_go == cinfo->image_height) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  int row;
+	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
+	    jcopy_sample_rows(prep->color_buf[ci], 0,
+			      prep->color_buf[ci], -row,
+			      1, cinfo->image_width);
+	  }
+	}
+      }
+      *in_row_ctr += numrows;
+      prep->next_buf_row += numrows;
+      prep->rows_to_go -= numrows;
+    } else {
+      /* Return for more data, unless we are at the bottom of the image. */
+      if (prep->rows_to_go != 0)
+	break;
+      /* When at bottom of image, pad to fill the conversion buffer. */
+      if (prep->next_buf_row < prep->next_buf_stop) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			     prep->next_buf_row, prep->next_buf_stop);
+	}
+	prep->next_buf_row = prep->next_buf_stop;
+      }
+    }
+    /* If we've gotten enough data, downsample a row group. */
+    if (prep->next_buf_row == prep->next_buf_stop) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf,
+					(JDIMENSION) prep->this_row_group,
+					output_buf, *out_row_group_ctr);
+      (*out_row_group_ctr)++;
+      /* Advance pointers with wraparound as necessary. */
+      prep->this_row_group += cinfo->max_v_samp_factor;
+      if (prep->this_row_group >= buf_height)
+	prep->this_row_group = 0;
+      if (prep->next_buf_row >= buf_height)
+	prep->next_buf_row = 0;
+      prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
+    }
+  }
+}
+
+
+/*
+ * Create the wrapped-around downsampling input buffer needed for context mode.
+ */
+
+LOCAL(void)
+create_context_buffer (j_compress_ptr cinfo)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int rgroup_height = cinfo->max_v_samp_factor;
+  int ci, i;
+  jpeg_component_info * compptr;
+  JSAMPARRAY true_buffer, fake_buffer;
+
+  /* Grab enough space for fake row pointers for all the components;
+   * we need five row groups' worth of pointers for each component.
+   */
+  fake_buffer = (JSAMPARRAY)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(cinfo->num_components * 5 * rgroup_height) *
+				SIZEOF(JSAMPROW));
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate the actual buffer space (3 row groups) for this component.
+     * We make the buffer wide enough to allow the downsampler to edge-expand
+     * horizontally within the buffer, if it so chooses.
+     */
+    true_buffer = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (JDIMENSION) (((long) compptr->width_in_blocks *
+		      cinfo->min_DCT_h_scaled_size *
+		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+       (JDIMENSION) (3 * rgroup_height));
+    /* Copy true buffer row pointers into the middle of the fake row array */
+    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
+	    3 * rgroup_height * SIZEOF(JSAMPROW));
+    /* Fill in the above and below wraparound pointers */
+    for (i = 0; i < rgroup_height; i++) {
+      fake_buffer[i] = true_buffer[2 * rgroup_height + i];
+      fake_buffer[4 * rgroup_height + i] = true_buffer[i];
+    }
+    prep->color_buf[ci] = fake_buffer + rgroup_height;
+    fake_buffer += 5 * rgroup_height; /* point to space for next component */
+  }
+}
+
+#endif /* CONTEXT_ROWS_SUPPORTED */
+
+
+/*
+ * Initialize preprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_prep_ptr prep;
+  int ci;
+  jpeg_component_info * compptr;
+
+  if (need_full_buffer)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  prep = (my_prep_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_prep_controller));
+  cinfo->prep = (struct jpeg_c_prep_controller *) prep;
+  prep->pub.start_pass = start_pass_prep;
+
+  /* Allocate the color conversion buffer.
+   * We make the buffer wide enough to allow the downsampler to edge-expand
+   * horizontally within the buffer, if it so chooses.
+   */
+  if (cinfo->downsample->need_context_rows) {
+    /* Set up to provide context rows */
+#ifdef CONTEXT_ROWS_SUPPORTED
+    prep->pub.pre_process_data = pre_process_context;
+    create_context_buffer(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* No context, just make it tall enough for one row group */
+    prep->pub.pre_process_data = pre_process_data;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 (JDIMENSION) (((long) compptr->width_in_blocks *
+			cinfo->min_DCT_h_scaled_size *
+			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+	 (JDIMENSION) cinfo->max_v_samp_factor);
+    }
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jcsample.c b/plugins/AdvaImg/src/LibJPEG/jcsample.c
index 4d36f85f35..1aef8a6fc7 100644
--- a/plugins/AdvaImg/src/LibJPEG/jcsample.c
+++ b/plugins/AdvaImg/src/LibJPEG/jcsample.c
@@ -1,545 +1,545 @@
-/*
- * jcsample.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains downsampling routines.
- *
- * Downsampling input data is counted in "row groups".  A row group
- * is defined to be max_v_samp_factor pixel rows of each component,
- * from which the downsampler produces v_samp_factor sample rows.
- * A single row group is processed in each call to the downsampler module.
- *
- * The downsampler is responsible for edge-expansion of its output data
- * to fill an integral number of DCT blocks horizontally.  The source buffer
- * may be modified if it is helpful for this purpose (the source buffer is
- * allocated wide enough to correspond to the desired output width).
- * The caller (the prep controller) is responsible for vertical padding.
- *
- * The downsampler may request "context rows" by setting need_context_rows
- * during startup.  In this case, the input arrays will contain at least
- * one row group's worth of pixels above and below the passed-in data;
- * the caller will create dummy rows at image top and bottom by replicating
- * the first or last real pixel row.
- *
- * An excellent reference for image resampling is
- *   Digital Image Warping, George Wolberg, 1990.
- *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
- *
- * The downsampling algorithm used here is a simple average of the source
- * pixels covered by the output pixel.  The hi-falutin sampling literature
- * refers to this as a "box filter".  In general the characteristics of a box
- * filter are not very good, but for the specific cases we normally use (1:1
- * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
- * nearly so bad.  If you intend to use other sampling ratios, you'd be well
- * advised to improve this code.
- *
- * A simple input-smoothing capability is provided.  This is mainly intended
- * for cleaning up color-dithered GIF input files (if you find it inadequate,
- * we suggest using an external filtering program such as pnmconvol).  When
- * enabled, each input pixel P is replaced by a weighted sum of itself and its
- * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
- * where SF = (smoothing_factor / 1024).
- * Currently, smoothing is only supported for 2h2v sampling factors.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Pointer to routine to downsample a single component */
-typedef JMETHOD(void, downsample1_ptr,
-		(j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data));
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_downsampler pub;	/* public fields */
-
-  /* Downsampling method pointers, one per component */
-  downsample1_ptr methods[MAX_COMPONENTS];
-
-  /* Height of an output row group for each component. */
-  int rowgroup_height[MAX_COMPONENTS];
-
-  /* These arrays save pixel expansion factors so that int_downsample need not
-   * recompute them each time.  They are unused for other downsampling methods.
-   */
-  UINT8 h_expand[MAX_COMPONENTS];
-  UINT8 v_expand[MAX_COMPONENTS];
-} my_downsampler;
-
-typedef my_downsampler * my_downsample_ptr;
-
-
-/*
- * Initialize for a downsampling pass.
- */
-
-METHODDEF(void)
-start_pass_downsample (j_compress_ptr cinfo)
-{
-  /* no work for now */
-}
-
-
-/*
- * Expand a component horizontally from width input_cols to width output_cols,
- * by duplicating the rightmost samples.
- */
-
-LOCAL(void)
-expand_right_edge (JSAMPARRAY image_data, int num_rows,
-		   JDIMENSION input_cols, JDIMENSION output_cols)
-{
-  register JSAMPROW ptr;
-  register JSAMPLE pixval;
-  register int count;
-  int row;
-  int numcols = (int) (output_cols - input_cols);
-
-  if (numcols > 0) {
-    for (row = 0; row < num_rows; row++) {
-      ptr = image_data[row] + input_cols;
-      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
-      for (count = numcols; count > 0; count--)
-	*ptr++ = pixval;
-    }
-  }
-}
-
-
-/*
- * Do downsampling for a whole row group (all components).
- *
- * In this version we simply downsample each component independently.
- */
-
-METHODDEF(void)
-sep_downsample (j_compress_ptr cinfo,
-		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
-{
-  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
-  int ci;
-  jpeg_component_info * compptr;
-  JSAMPARRAY in_ptr, out_ptr;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    in_ptr = input_buf[ci] + in_row_index;
-    out_ptr = output_buf[ci] +
-	      (out_row_group_index * downsample->rowgroup_height[ci]);
-    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * One row group is processed per call.
- * This version handles arbitrary integral sampling ratios, without smoothing.
- * Note that this version is not actually used for customary sampling ratios.
- */
-
-METHODDEF(void)
-int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
-  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
-  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  JSAMPROW inptr, outptr;
-  INT32 outvalue;
-
-  h_expand = downsample->h_expand[compptr->component_index];
-  v_expand = downsample->v_expand[compptr->component_index];
-  numpix = h_expand * v_expand;
-  numpix2 = numpix/2;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * h_expand);
-
-  inrow = outrow = 0;
-  while (inrow < cinfo->max_v_samp_factor) {
-    outptr = output_data[outrow];
-    for (outcol = 0, outcol_h = 0; outcol < output_cols;
-	 outcol++, outcol_h += h_expand) {
-      outvalue = 0;
-      for (v = 0; v < v_expand; v++) {
-	inptr = input_data[inrow+v] + outcol_h;
-	for (h = 0; h < h_expand; h++) {
-	  outvalue += (INT32) GETJSAMPLE(*inptr++);
-	}
-      }
-      *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
-    }
-    inrow += v_expand;
-    outrow++;
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the special case of a full-size component,
- * without smoothing.
- */
-
-METHODDEF(void)
-fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		     JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  /* Copy the data */
-  jcopy_sample_rows(input_data, 0, output_data, 0,
-		    cinfo->max_v_samp_factor, cinfo->image_width);
-  /* Edge-expand */
-  expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width,
-		    compptr->width_in_blocks * compptr->DCT_h_scaled_size);
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the common case of 2:1 horizontal and 1:1 vertical,
- * without smoothing.
- *
- * A note about the "bias" calculations: when rounding fractional values to
- * integer, we do not want to always round 0.5 up to the next integer.
- * If we did that, we'd introduce a noticeable bias towards larger values.
- * Instead, this code is arranged so that 0.5 will be rounded up or down at
- * alternate pixel locations (a simple ordered dither pattern).
- */
-
-METHODDEF(void)
-h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow;
-  JDIMENSION outcol;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr, outptr;
-  register int bias;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
-
-  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
-    outptr = output_data[inrow];
-    inptr = input_data[inrow];
-    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
-    for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
-			      + bias) >> 1);
-      bias ^= 1;		/* 0=>1, 1=>0 */
-      inptr += 2;
-    }
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
- * without smoothing.
- */
-
-METHODDEF(void)
-h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow, outrow;
-  JDIMENSION outcol;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr0, inptr1, outptr;
-  register int bias;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
-
-  inrow = outrow = 0;
-  while (inrow < cinfo->max_v_samp_factor) {
-    outptr = output_data[outrow];
-    inptr0 = input_data[inrow];
-    inptr1 = input_data[inrow+1];
-    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
-    for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
-			      + bias) >> 2);
-      bias ^= 3;		/* 1=>2, 2=>1 */
-      inptr0 += 2; inptr1 += 2;
-    }
-    inrow += 2;
-    outrow++;
-  }
-}
-
-
-#ifdef INPUT_SMOOTHING_SUPPORTED
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
- * with smoothing.  One row of context is required.
- */
-
-METHODDEF(void)
-h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-			JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow, outrow;
-  JDIMENSION colctr;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
-  INT32 membersum, neighsum, memberscale, neighscale;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols * 2);
-
-  /* We don't bother to form the individual "smoothed" input pixel values;
-   * we can directly compute the output which is the average of the four
-   * smoothed values.  Each of the four member pixels contributes a fraction
-   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
-   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
-   * output.  The four corner-adjacent neighbor pixels contribute a fraction
-   * SF to just one smoothed pixel, or SF/4 to the final output; while the
-   * eight edge-adjacent neighbors contribute SF to each of two smoothed
-   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
-   * factors are scaled by 2^16 = 65536.
-   * Also recall that SF = smoothing_factor / 1024.
-   */
-
-  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
-  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
-
-  inrow = outrow = 0;
-  while (inrow < cinfo->max_v_samp_factor) {
-    outptr = output_data[outrow];
-    inptr0 = input_data[inrow];
-    inptr1 = input_data[inrow+1];
-    above_ptr = input_data[inrow-1];
-    below_ptr = input_data[inrow+2];
-
-    /* Special case for first column: pretend column -1 is same as column 0 */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
-	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
-    neighsum += neighsum;
-    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
-		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-    inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
-
-    for (colctr = output_cols - 2; colctr > 0; colctr--) {
-      /* sum of pixels directly mapped to this output element */
-      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-      /* sum of edge-neighbor pixels */
-      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
-		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
-      /* The edge-neighbors count twice as much as corner-neighbors */
-      neighsum += neighsum;
-      /* Add in the corner-neighbors */
-      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
-		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
-      /* form final output scaled up by 2^16 */
-      membersum = membersum * memberscale + neighsum * neighscale;
-      /* round, descale and output it */
-      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-      inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
-    }
-
-    /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
-	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
-    neighsum += neighsum;
-    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
-		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
-
-    inrow += 2;
-    outrow++;
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the special case of a full-size component,
- * with smoothing.  One row of context is required.
- */
-
-METHODDEF(void)
-fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
-			    JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow;
-  JDIMENSION colctr;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
-  INT32 membersum, neighsum, memberscale, neighscale;
-  int colsum, lastcolsum, nextcolsum;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols);
-
-  /* Each of the eight neighbor pixels contributes a fraction SF to the
-   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
-   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
-   * Also recall that SF = smoothing_factor / 1024.
-   */
-
-  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
-  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
-
-  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
-    outptr = output_data[inrow];
-    inptr = input_data[inrow];
-    above_ptr = input_data[inrow-1];
-    below_ptr = input_data[inrow+1];
-
-    /* Special case for first column */
-    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
-	     GETJSAMPLE(*inptr);
-    membersum = GETJSAMPLE(*inptr++);
-    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		 GETJSAMPLE(*inptr);
-    neighsum = colsum + (colsum - membersum) + nextcolsum;
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-    lastcolsum = colsum; colsum = nextcolsum;
-
-    for (colctr = output_cols - 2; colctr > 0; colctr--) {
-      membersum = GETJSAMPLE(*inptr++);
-      above_ptr++; below_ptr++;
-      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		   GETJSAMPLE(*inptr);
-      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
-      membersum = membersum * memberscale + neighsum * neighscale;
-      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-      lastcolsum = colsum; colsum = nextcolsum;
-    }
-
-    /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr);
-    neighsum = lastcolsum + (colsum - membersum) + colsum;
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
-
-  }
-}
-
-#endif /* INPUT_SMOOTHING_SUPPORTED */
-
-
-/*
- * Module initialization routine for downsampling.
- * Note that we must select a routine for each component.
- */
-
-GLOBAL(void)
-jinit_downsampler (j_compress_ptr cinfo)
-{
-  my_downsample_ptr downsample;
-  int ci;
-  jpeg_component_info * compptr;
-  boolean smoothok = TRUE;
-  int h_in_group, v_in_group, h_out_group, v_out_group;
-
-  downsample = (my_downsample_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_downsampler));
-  cinfo->downsample = (struct jpeg_downsampler *) downsample;
-  downsample->pub.start_pass = start_pass_downsample;
-  downsample->pub.downsample = sep_downsample;
-  downsample->pub.need_context_rows = FALSE;
-
-  if (cinfo->CCIR601_sampling)
-    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
-
-  /* Verify we can handle the sampling factors, and set up method pointers */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Compute size of an "output group" for DCT scaling.  This many samples
-     * are to be converted from max_h_samp_factor * max_v_samp_factor pixels.
-     */
-    h_out_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
-		  cinfo->min_DCT_h_scaled_size;
-    v_out_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-		  cinfo->min_DCT_v_scaled_size;
-    h_in_group = cinfo->max_h_samp_factor;
-    v_in_group = cinfo->max_v_samp_factor;
-    downsample->rowgroup_height[ci] = v_out_group; /* save for use later */
-    if (h_in_group == h_out_group && v_in_group == v_out_group) {
-#ifdef INPUT_SMOOTHING_SUPPORTED
-      if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = fullsize_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
-      } else
-#endif
-	downsample->methods[ci] = fullsize_downsample;
-    } else if (h_in_group == h_out_group * 2 &&
-	       v_in_group == v_out_group) {
-      smoothok = FALSE;
-      downsample->methods[ci] = h2v1_downsample;
-    } else if (h_in_group == h_out_group * 2 &&
-	       v_in_group == v_out_group * 2) {
-#ifdef INPUT_SMOOTHING_SUPPORTED
-      if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = h2v2_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
-      } else
-#endif
-	downsample->methods[ci] = h2v2_downsample;
-    } else if ((h_in_group % h_out_group) == 0 &&
-	       (v_in_group % v_out_group) == 0) {
-      smoothok = FALSE;
-      downsample->methods[ci] = int_downsample;
-      downsample->h_expand[ci] = (UINT8) (h_in_group / h_out_group);
-      downsample->v_expand[ci] = (UINT8) (v_in_group / v_out_group);
-    } else
-      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
-  }
-
-#ifdef INPUT_SMOOTHING_SUPPORTED
-  if (cinfo->smoothing_factor && !smoothok)
-    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
-#endif
-}
+/*
+ * jcsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains downsampling routines.
+ *
+ * Downsampling input data is counted in "row groups".  A row group
+ * is defined to be max_v_samp_factor pixel rows of each component,
+ * from which the downsampler produces v_samp_factor sample rows.
+ * A single row group is processed in each call to the downsampler module.
+ *
+ * The downsampler is responsible for edge-expansion of its output data
+ * to fill an integral number of DCT blocks horizontally.  The source buffer
+ * may be modified if it is helpful for this purpose (the source buffer is
+ * allocated wide enough to correspond to the desired output width).
+ * The caller (the prep controller) is responsible for vertical padding.
+ *
+ * The downsampler may request "context rows" by setting need_context_rows
+ * during startup.  In this case, the input arrays will contain at least
+ * one row group's worth of pixels above and below the passed-in data;
+ * the caller will create dummy rows at image top and bottom by replicating
+ * the first or last real pixel row.
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ *
+ * The downsampling algorithm used here is a simple average of the source
+ * pixels covered by the output pixel.  The hi-falutin sampling literature
+ * refers to this as a "box filter".  In general the characteristics of a box
+ * filter are not very good, but for the specific cases we normally use (1:1
+ * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
+ * nearly so bad.  If you intend to use other sampling ratios, you'd be well
+ * advised to improve this code.
+ *
+ * A simple input-smoothing capability is provided.  This is mainly intended
+ * for cleaning up color-dithered GIF input files (if you find it inadequate,
+ * we suggest using an external filtering program such as pnmconvol).  When
+ * enabled, each input pixel P is replaced by a weighted sum of itself and its
+ * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
+ * where SF = (smoothing_factor / 1024).
+ * Currently, smoothing is only supported for 2h2v sampling factors.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to downsample a single component */
+typedef JMETHOD(void, downsample1_ptr,
+		(j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_downsampler pub;	/* public fields */
+
+  /* Downsampling method pointers, one per component */
+  downsample1_ptr methods[MAX_COMPONENTS];
+
+  /* Height of an output row group for each component. */
+  int rowgroup_height[MAX_COMPONENTS];
+
+  /* These arrays save pixel expansion factors so that int_downsample need not
+   * recompute them each time.  They are unused for other downsampling methods.
+   */
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_downsampler;
+
+typedef my_downsampler * my_downsample_ptr;
+
+
+/*
+ * Initialize for a downsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_downsample (j_compress_ptr cinfo)
+{
+  /* no work for now */
+}
+
+
+/*
+ * Expand a component horizontally from width input_cols to width output_cols,
+ * by duplicating the rightmost samples.
+ */
+
+LOCAL(void)
+expand_right_edge (JSAMPARRAY image_data, int num_rows,
+		   JDIMENSION input_cols, JDIMENSION output_cols)
+{
+  register JSAMPROW ptr;
+  register JSAMPLE pixval;
+  register int count;
+  int row;
+  int numcols = (int) (output_cols - input_cols);
+
+  if (numcols > 0) {
+    for (row = 0; row < num_rows; row++) {
+      ptr = image_data[row] + input_cols;
+      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
+      for (count = numcols; count > 0; count--)
+	*ptr++ = pixval;
+    }
+  }
+}
+
+
+/*
+ * Do downsampling for a whole row group (all components).
+ *
+ * In this version we simply downsample each component independently.
+ */
+
+METHODDEF(void)
+sep_downsample (j_compress_ptr cinfo,
+		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
+{
+  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JSAMPARRAY in_ptr, out_ptr;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    in_ptr = input_buf[ci] + in_row_index;
+    out_ptr = output_buf[ci] +
+	      (out_row_group_index * downsample->rowgroup_height[ci]);
+    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * One row group is processed per call.
+ * This version handles arbitrary integral sampling ratios, without smoothing.
+ * Note that this version is not actually used for customary sampling ratios.
+ */
+
+METHODDEF(void)
+int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
+  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
+  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  JSAMPROW inptr, outptr;
+  INT32 outvalue;
+
+  h_expand = downsample->h_expand[compptr->component_index];
+  v_expand = downsample->v_expand[compptr->component_index];
+  numpix = h_expand * v_expand;
+  numpix2 = numpix/2;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * h_expand);
+
+  inrow = outrow = 0;
+  while (inrow < cinfo->max_v_samp_factor) {
+    outptr = output_data[outrow];
+    for (outcol = 0, outcol_h = 0; outcol < output_cols;
+	 outcol++, outcol_h += h_expand) {
+      outvalue = 0;
+      for (v = 0; v < v_expand; v++) {
+	inptr = input_data[inrow+v] + outcol_h;
+	for (h = 0; h < h_expand; h++) {
+	  outvalue += (INT32) GETJSAMPLE(*inptr++);
+	}
+      }
+      *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
+    }
+    inrow += v_expand;
+    outrow++;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  /* Copy the data */
+  jcopy_sample_rows(input_data, 0, output_data, 0,
+		    cinfo->max_v_samp_factor, cinfo->image_width);
+  /* Edge-expand */
+  expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width,
+		    compptr->width_in_blocks * compptr->DCT_h_scaled_size);
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the common case of 2:1 horizontal and 1:1 vertical,
+ * without smoothing.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  register JSAMPROW inptr, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    outptr = output_data[inrow];
+    inptr = input_data[inrow];
+    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
+			      + bias) >> 1);
+      bias ^= 1;		/* 0=>1, 1=>0 */
+      inptr += 2;
+    }
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  register JSAMPROW inptr0, inptr1, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  inrow = outrow = 0;
+  while (inrow < cinfo->max_v_samp_factor) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
+			      + bias) >> 2);
+      bias ^= 3;		/* 1=>2, 2=>1 */
+      inptr0 += 2; inptr1 += 2;
+    }
+    inrow += 2;
+    outrow++;
+  }
+}
+
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+			JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols * 2);
+
+  /* We don't bother to form the individual "smoothed" input pixel values;
+   * we can directly compute the output which is the average of the four
+   * smoothed values.  Each of the four member pixels contributes a fraction
+   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
+   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
+   * output.  The four corner-adjacent neighbor pixels contribute a fraction
+   * SF to just one smoothed pixel, or SF/4 to the final output; while the
+   * eight edge-adjacent neighbors contribute SF to each of two smoothed
+   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
+   * factors are scaled by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
+  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
+
+  inrow = outrow = 0;
+  while (inrow < cinfo->max_v_samp_factor) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    above_ptr = input_data[inrow-1];
+    below_ptr = input_data[inrow+2];
+
+    /* Special case for first column: pretend column -1 is same as column 0 */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
+	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
+		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      /* sum of pixels directly mapped to this output element */
+      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+      /* sum of edge-neighbor pixels */
+      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
+		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
+      /* The edge-neighbors count twice as much as corner-neighbors */
+      neighsum += neighsum;
+      /* Add in the corner-neighbors */
+      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
+		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
+      /* form final output scaled up by 2^16 */
+      membersum = membersum * memberscale + neighsum * neighscale;
+      /* round, descale and output it */
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
+	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
+		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+    inrow += 2;
+    outrow++;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
+			    JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+  int colsum, lastcolsum, nextcolsum;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols);
+
+  /* Each of the eight neighbor pixels contributes a fraction SF to the
+   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
+   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
+  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    outptr = output_data[inrow];
+    inptr = input_data[inrow];
+    above_ptr = input_data[inrow-1];
+    below_ptr = input_data[inrow+1];
+
+    /* Special case for first column */
+    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
+	     GETJSAMPLE(*inptr);
+    membersum = GETJSAMPLE(*inptr++);
+    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		 GETJSAMPLE(*inptr);
+    neighsum = colsum + (colsum - membersum) + nextcolsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    lastcolsum = colsum; colsum = nextcolsum;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      membersum = GETJSAMPLE(*inptr++);
+      above_ptr++; below_ptr++;
+      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		   GETJSAMPLE(*inptr);
+      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
+      membersum = membersum * memberscale + neighsum * neighscale;
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      lastcolsum = colsum; colsum = nextcolsum;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr);
+    neighsum = lastcolsum + (colsum - membersum) + colsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+  }
+}
+
+#endif /* INPUT_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Module initialization routine for downsampling.
+ * Note that we must select a routine for each component.
+ */
+
+GLOBAL(void)
+jinit_downsampler (j_compress_ptr cinfo)
+{
+  my_downsample_ptr downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  boolean smoothok = TRUE;
+  int h_in_group, v_in_group, h_out_group, v_out_group;
+
+  downsample = (my_downsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_downsampler));
+  cinfo->downsample = (struct jpeg_downsampler *) downsample;
+  downsample->pub.start_pass = start_pass_downsample;
+  downsample->pub.downsample = sep_downsample;
+  downsample->pub.need_context_rows = FALSE;
+
+  if (cinfo->CCIR601_sampling)
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* Verify we can handle the sampling factors, and set up method pointers */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Compute size of an "output group" for DCT scaling.  This many samples
+     * are to be converted from max_h_samp_factor * max_v_samp_factor pixels.
+     */
+    h_out_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
+		  cinfo->min_DCT_h_scaled_size;
+    v_out_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+		  cinfo->min_DCT_v_scaled_size;
+    h_in_group = cinfo->max_h_samp_factor;
+    v_in_group = cinfo->max_v_samp_factor;
+    downsample->rowgroup_height[ci] = v_out_group; /* save for use later */
+    if (h_in_group == h_out_group && v_in_group == v_out_group) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = fullsize_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = fullsize_downsample;
+    } else if (h_in_group == h_out_group * 2 &&
+	       v_in_group == v_out_group) {
+      smoothok = FALSE;
+      downsample->methods[ci] = h2v1_downsample;
+    } else if (h_in_group == h_out_group * 2 &&
+	       v_in_group == v_out_group * 2) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = h2v2_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = h2v2_downsample;
+    } else if ((h_in_group % h_out_group) == 0 &&
+	       (v_in_group % v_out_group) == 0) {
+      smoothok = FALSE;
+      downsample->methods[ci] = int_downsample;
+      downsample->h_expand[ci] = (UINT8) (h_in_group / h_out_group);
+      downsample->v_expand[ci] = (UINT8) (v_in_group / v_out_group);
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+  }
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+  if (cinfo->smoothing_factor && !smoothok)
+    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
+#endif
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jctrans.c b/plugins/AdvaImg/src/LibJPEG/jctrans.c
index f7d7b81491..9dc9e5f3a7 100644
--- a/plugins/AdvaImg/src/LibJPEG/jctrans.c
+++ b/plugins/AdvaImg/src/LibJPEG/jctrans.c
@@ -1,382 +1,385 @@
-/*
- * jctrans.c
- *
- * Copyright (C) 1995-1998, Thomas G. Lane.
- * Modified 2000-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains library routines for transcoding compression,
- * that is, writing raw DCT coefficient arrays to an output JPEG file.
- * The routines in jcapimin.c will also be needed by a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Forward declarations */
-LOCAL(void) transencode_master_selection
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
-LOCAL(void) transencode_coef_controller
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
-
-
-/*
- * Compression initialization for writing raw-coefficient data.
- * Before calling this, all parameters and a data destination must be set up.
- * Call jpeg_finish_compress() to actually write the data.
- *
- * The number of passed virtual arrays must match cinfo->num_components.
- * Note that the virtual arrays need not be filled or even realized at
- * the time write_coefficients is called; indeed, if the virtual arrays
- * were requested from this compression object's memory manager, they
- * typically will be realized during this routine and filled afterwards.
- */
-
-GLOBAL(void)
-jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
-{
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Mark all tables to be written */
-  jpeg_suppress_tables(cinfo, FALSE);
-  /* (Re)initialize error mgr and destination modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->dest->init_destination) (cinfo);
-  /* Perform master selection of active modules */
-  transencode_master_selection(cinfo, coef_arrays);
-  /* Wait for jpeg_finish_compress() call */
-  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
-  cinfo->global_state = CSTATE_WRCOEFS;
-}
-
-
-/*
- * Initialize the compression object with default parameters,
- * then copy from the source object all parameters needed for lossless
- * transcoding.  Parameters that can be varied without loss (such as
- * scan script and Huffman optimization) are left in their default states.
- */
-
-GLOBAL(void)
-jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
-			       j_compress_ptr dstinfo)
-{
-  JQUANT_TBL ** qtblptr;
-  jpeg_component_info *incomp, *outcomp;
-  JQUANT_TBL *c_quant, *slot_quant;
-  int tblno, ci, coefi;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (dstinfo->global_state != CSTATE_START)
-    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
-  /* Copy fundamental image dimensions */
-  dstinfo->image_width = srcinfo->image_width;
-  dstinfo->image_height = srcinfo->image_height;
-  dstinfo->input_components = srcinfo->num_components;
-  dstinfo->in_color_space = srcinfo->jpeg_color_space;
-  dstinfo->jpeg_width = srcinfo->output_width;
-  dstinfo->jpeg_height = srcinfo->output_height;
-  dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size;
-  dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size;
-  /* Initialize all parameters to default values */
-  jpeg_set_defaults(dstinfo);
-  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
-   * Fix it to get the right header markers for the image colorspace.
-   */
-  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
-  dstinfo->data_precision = srcinfo->data_precision;
-  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
-  /* Copy the source's quantization tables. */
-  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
-    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
-      qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
-      if (*qtblptr == NULL)
-	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
-      MEMCOPY((*qtblptr)->quantval,
-	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
-	      SIZEOF((*qtblptr)->quantval));
-      (*qtblptr)->sent_table = FALSE;
-    }
-  }
-  /* Copy the source's per-component info.
-   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
-   */
-  dstinfo->num_components = srcinfo->num_components;
-  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
-    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
-	     MAX_COMPONENTS);
-  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
-       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
-    outcomp->component_id = incomp->component_id;
-    outcomp->h_samp_factor = incomp->h_samp_factor;
-    outcomp->v_samp_factor = incomp->v_samp_factor;
-    outcomp->quant_tbl_no = incomp->quant_tbl_no;
-    /* Make sure saved quantization table for component matches the qtable
-     * slot.  If not, the input file re-used this qtable slot.
-     * IJG encoder currently cannot duplicate this.
-     */
-    tblno = outcomp->quant_tbl_no;
-    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
-	srcinfo->quant_tbl_ptrs[tblno] == NULL)
-      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
-    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
-    c_quant = incomp->quant_table;
-    if (c_quant != NULL) {
-      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
-	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
-	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
-      }
-    }
-    /* Note: we do not copy the source's Huffman table assignments;
-     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
-     */
-  }
-  /* Also copy JFIF version and resolution information, if available.
-   * Strictly speaking this isn't "critical" info, but it's nearly
-   * always appropriate to copy it if available.  In particular,
-   * if the application chooses to copy JFIF 1.02 extension markers from
-   * the source file, we need to copy the version to make sure we don't
-   * emit a file that has 1.02 extensions but a claimed version of 1.01.
-   * We will *not*, however, copy version info from mislabeled "2.01" files.
-   */
-  if (srcinfo->saw_JFIF_marker) {
-    if (srcinfo->JFIF_major_version == 1) {
-      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
-      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
-    }
-    dstinfo->density_unit = srcinfo->density_unit;
-    dstinfo->X_density = srcinfo->X_density;
-    dstinfo->Y_density = srcinfo->Y_density;
-  }
-}
-
-
-/*
- * Master selection of compression modules for transcoding.
- * This substitutes for jcinit.c's initialization of the full compressor.
- */
-
-LOCAL(void)
-transencode_master_selection (j_compress_ptr cinfo,
-			      jvirt_barray_ptr * coef_arrays)
-{
-  /* Initialize master control (includes parameter checking/processing) */
-  jinit_c_master_control(cinfo, TRUE /* transcode only */);
-
-  /* Entropy encoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_encoder(cinfo);
-  else {
-    jinit_huff_encoder(cinfo);
-  }
-
-  /* We need a special coefficient buffer controller. */
-  transencode_coef_controller(cinfo, coef_arrays);
-
-  jinit_marker_writer(cinfo);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Write the datastream header (SOI, JFIF) immediately.
-   * Frame and scan headers are postponed till later.
-   * This lets application insert special markers after the SOI.
-   */
-  (*cinfo->marker->write_file_header) (cinfo);
-}
-
-
-/*
- * The rest of this file is a special implementation of the coefficient
- * buffer controller.  This is similar to jccoefct.c, but it handles only
- * output from presupplied virtual arrays.  Furthermore, we generate any
- * dummy padding blocks on-the-fly rather than expecting them to be present
- * in the arrays.
- */
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_coef_controller pub; /* public fields */
-
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
-
-  /* Virtual block array for each component. */
-  jvirt_barray_ptr * whole_image;
-
-  /* Workspace for constructing dummy blocks at right/bottom edges. */
-  JBLOCKROW dummy_buffer[C_MAX_BLOCKS_IN_MCU];
-} my_coef_controller;
-
-typedef my_coef_controller * my_coef_ptr;
-
-
-LOCAL(void)
-start_iMCU_row (j_compress_ptr cinfo)
-/* Reset within-iMCU-row counters for a new row */
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* In an interleaved scan, an MCU row is the same as an iMCU row.
-   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
-   * But at the bottom of the image, process only what's left.
-   */
-  if (cinfo->comps_in_scan > 1) {
-    coef->MCU_rows_per_iMCU_row = 1;
-  } else {
-    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
-    else
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
-  }
-
-  coef->mcu_ctr = 0;
-  coef->MCU_vert_offset = 0;
-}
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  if (pass_mode != JBUF_CRANK_DEST)
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  coef->iMCU_row_num = 0;
-  start_iMCU_row(cinfo);
-}
-
-
-/*
- * Process some data.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the scan.
- * The data is obtained from the virtual arrays and fed to the entropy coder.
- * Returns TRUE if the iMCU row is completed, FALSE if suspended.
- *
- * NB: input_buf is ignored; it is likely to be a NULL pointer.
- */
-
-METHODDEF(boolean)
-compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int blkn, ci, xindex, yindex, yoffset, blockcnt;
-  JDIMENSION start_col;
-  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
-  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
-  JBLOCKROW buffer_ptr;
-  jpeg_component_info *compptr;
-
-  /* Align the virtual buffers for the components used in this scan. */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    buffer[ci] = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       coef->iMCU_row_num * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, FALSE);
-  }
-
-  /* Loop to process one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yindex+yoffset < compptr->last_row_height) {
-	    /* Fill in pointers to real blocks in this row */
-	    buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	    for (xindex = 0; xindex < blockcnt; xindex++)
-	      MCU_buffer[blkn++] = buffer_ptr++;
-	  } else {
-	    /* At bottom of image, need a whole row of dummy blocks */
-	    xindex = 0;
-	  }
-	  /* Fill in any dummy blocks needed in this row.
-	   * Dummy blocks are filled in the same way as in jccoefct.c:
-	   * all zeroes in the AC entries, DC entries equal to previous
-	   * block's DC value.  The init routine has already zeroed the
-	   * AC entries, so we need only set the DC entries correctly.
-	   */
-	  for (; xindex < compptr->MCU_width; xindex++) {
-	    MCU_buffer[blkn] = coef->dummy_buffer[blkn];
-	    MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
-	    blkn++;
-	  }
-	}
-      }
-      /* Try to write the MCU. */
-      if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->mcu_ctr = MCU_col_num;
-	return FALSE;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->mcu_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  coef->iMCU_row_num++;
-  start_iMCU_row(cinfo);
-  return TRUE;
-}
-
-
-/*
- * Initialize coefficient buffer controller.
- *
- * Each passed coefficient array must be the right size for that
- * coefficient: width_in_blocks wide and height_in_blocks high,
- * with unitheight at least v_samp_factor.
- */
-
-LOCAL(void)
-transencode_coef_controller (j_compress_ptr cinfo,
-			     jvirt_barray_ptr * coef_arrays)
-{
-  my_coef_ptr coef;
-  JBLOCKROW buffer;
-  int i;
-
-  coef = (my_coef_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_coef_controller));
-  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
-  coef->pub.start_pass = start_pass_coef;
-  coef->pub.compress_data = compress_output;
-
-  /* Save pointer to virtual arrays */
-  coef->whole_image = coef_arrays;
-
-  /* Allocate and pre-zero space for dummy DCT blocks. */
-  buffer = (JBLOCKROW)
-    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
-  FMEMZERO((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
-  for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
-    coef->dummy_buffer[i] = buffer + i;
-  }
-}
+/*
+ * jctrans.c
+ *
+ * Copyright (C) 1995-1998, Thomas G. Lane.
+ * Modified 2000-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding compression,
+ * that is, writing raw DCT coefficient arrays to an output JPEG file.
+ * The routines in jcapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transencode_master_selection
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+LOCAL(void) transencode_coef_controller
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+
+
+/*
+ * Compression initialization for writing raw-coefficient data.
+ * Before calling this, all parameters and a data destination must be set up.
+ * Call jpeg_finish_compress() to actually write the data.
+ *
+ * The number of passed virtual arrays must match cinfo->num_components.
+ * Note that the virtual arrays need not be filled or even realized at
+ * the time write_coefficients is called; indeed, if the virtual arrays
+ * were requested from this compression object's memory manager, they
+ * typically will be realized during this routine and filled afterwards.
+ */
+
+GLOBAL(void)
+jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Mark all tables to be written */
+  jpeg_suppress_tables(cinfo, FALSE);
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  transencode_master_selection(cinfo, coef_arrays);
+  /* Wait for jpeg_finish_compress() call */
+  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
+  cinfo->global_state = CSTATE_WRCOEFS;
+}
+
+
+/*
+ * Initialize the compression object with default parameters,
+ * then copy from the source object all parameters needed for lossless
+ * transcoding.  Parameters that can be varied without loss (such as
+ * scan script and Huffman optimization) are left in their default states.
+ */
+
+GLOBAL(void)
+jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
+			       j_compress_ptr dstinfo)
+{
+  JQUANT_TBL ** qtblptr;
+  jpeg_component_info *incomp, *outcomp;
+  JQUANT_TBL *c_quant, *slot_quant;
+  int tblno, ci, coefi;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (dstinfo->global_state != CSTATE_START)
+    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
+  /* Copy fundamental image dimensions */
+  dstinfo->image_width = srcinfo->image_width;
+  dstinfo->image_height = srcinfo->image_height;
+  dstinfo->input_components = srcinfo->num_components;
+  dstinfo->in_color_space = srcinfo->jpeg_color_space;
+  dstinfo->jpeg_width = srcinfo->output_width;
+  dstinfo->jpeg_height = srcinfo->output_height;
+  dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size;
+  dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size;
+  /* Initialize all parameters to default values */
+  jpeg_set_defaults(dstinfo);
+  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
+   * Fix it to get the right header markers for the image colorspace.
+   * Note: Entropy table assignment in jpeg_set_colorspace depends
+   * on color_transform.
+   */
+  dstinfo->color_transform = srcinfo->color_transform;
+  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+  dstinfo->data_precision = srcinfo->data_precision;
+  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+  /* Copy the source's quantization tables. */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
+      qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
+      if (*qtblptr == NULL)
+	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
+      MEMCOPY((*qtblptr)->quantval,
+	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
+	      SIZEOF((*qtblptr)->quantval));
+      (*qtblptr)->sent_table = FALSE;
+    }
+  }
+  /* Copy the source's per-component info.
+   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
+   */
+  dstinfo->num_components = srcinfo->num_components;
+  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
+	     MAX_COMPONENTS);
+  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
+       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
+    outcomp->component_id = incomp->component_id;
+    outcomp->h_samp_factor = incomp->h_samp_factor;
+    outcomp->v_samp_factor = incomp->v_samp_factor;
+    outcomp->quant_tbl_no = incomp->quant_tbl_no;
+    /* Make sure saved quantization table for component matches the qtable
+     * slot.  If not, the input file re-used this qtable slot.
+     * IJG encoder currently cannot duplicate this.
+     */
+    tblno = outcomp->quant_tbl_no;
+    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
+	srcinfo->quant_tbl_ptrs[tblno] == NULL)
+      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
+    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
+    c_quant = incomp->quant_table;
+    if (c_quant != NULL) {
+      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
+	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
+	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
+      }
+    }
+    /* Note: we do not copy the source's entropy table assignments;
+     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
+     */
+  }
+  /* Also copy JFIF version and resolution information, if available.
+   * Strictly speaking this isn't "critical" info, but it's nearly
+   * always appropriate to copy it if available.  In particular,
+   * if the application chooses to copy JFIF 1.02 extension markers from
+   * the source file, we need to copy the version to make sure we don't
+   * emit a file that has 1.02 extensions but a claimed version of 1.01.
+   * We will *not*, however, copy version info from mislabeled "2.01" files.
+   */
+  if (srcinfo->saw_JFIF_marker) {
+    if (srcinfo->JFIF_major_version == 1) {
+      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+    }
+    dstinfo->density_unit = srcinfo->density_unit;
+    dstinfo->X_density = srcinfo->X_density;
+    dstinfo->Y_density = srcinfo->Y_density;
+  }
+}
+
+
+/*
+ * Master selection of compression modules for transcoding.
+ * This substitutes for jcinit.c's initialization of the full compressor.
+ */
+
+LOCAL(void)
+transencode_master_selection (j_compress_ptr cinfo,
+			      jvirt_barray_ptr * coef_arrays)
+{
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, TRUE /* transcode only */);
+
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code)
+    jinit_arith_encoder(cinfo);
+  else {
+    jinit_huff_encoder(cinfo);
+  }
+
+  /* We need a special coefficient buffer controller. */
+  transencode_coef_controller(cinfo, coef_arrays);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI, JFIF) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
+
+
+/*
+ * The rest of this file is a special implementation of the coefficient
+ * buffer controller.  This is similar to jccoefct.c, but it handles only
+ * output from presupplied virtual arrays.  Furthermore, we generate any
+ * dummy padding blocks on-the-fly rather than expecting them to be present
+ * in the arrays.
+ */
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* Virtual block array for each component. */
+  jvirt_barray_ptr * whole_image;
+
+  /* Workspace for constructing dummy blocks at right/bottom edges. */
+  JBLOCKROW dummy_buffer[C_MAX_BLOCKS_IN_MCU];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  if (pass_mode != JBUF_CRANK_DEST)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Process some data.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, blockcnt;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yindex+yoffset < compptr->last_row_height) {
+	    /* Fill in pointers to real blocks in this row */
+	    buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	    for (xindex = 0; xindex < blockcnt; xindex++)
+	      MCU_buffer[blkn++] = buffer_ptr++;
+	  } else {
+	    /* At bottom of image, need a whole row of dummy blocks */
+	    xindex = 0;
+	  }
+	  /* Fill in any dummy blocks needed in this row.
+	   * Dummy blocks are filled in the same way as in jccoefct.c:
+	   * all zeroes in the AC entries, DC entries equal to previous
+	   * block's DC value.  The init routine has already zeroed the
+	   * AC entries, so we need only set the DC entries correctly.
+	   */
+	  for (; xindex < compptr->MCU_width; xindex++) {
+	    MCU_buffer[blkn] = coef->dummy_buffer[blkn];
+	    MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
+	    blkn++;
+	  }
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Initialize coefficient buffer controller.
+ *
+ * Each passed coefficient array must be the right size for that
+ * coefficient: width_in_blocks wide and height_in_blocks high,
+ * with unitheight at least v_samp_factor.
+ */
+
+LOCAL(void)
+transencode_coef_controller (j_compress_ptr cinfo,
+			     jvirt_barray_ptr * coef_arrays)
+{
+  my_coef_ptr coef;
+  JBLOCKROW buffer;
+  int i;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = &coef->pub;
+  coef->pub.start_pass = start_pass_coef;
+  coef->pub.compress_data = compress_output;
+
+  /* Save pointer to virtual arrays */
+  coef->whole_image = coef_arrays;
+
+  /* Allocate and pre-zero space for dummy DCT blocks. */
+  buffer = (JBLOCKROW)
+    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  FMEMZERO((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+    coef->dummy_buffer[i] = buffer + i;
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdapimin.c b/plugins/AdvaImg/src/LibJPEG/jdapimin.c
index 7f1ce4c05b..65f8a491bd 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdapimin.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdapimin.c
@@ -1,396 +1,396 @@
-/*
- * jdapimin.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the decompression half
- * of the JPEG library.  These are the "minimum" API routines that may be
- * needed in either the normal full-decompression case or the
- * transcoding-only case.
- *
- * Most of the routines intended to be called directly by an application
- * are in this file or in jdapistd.c.  But also see jcomapi.c for routines
- * shared by compression and decompression, and jdtrans.c for the transcoding
- * case.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Initialization of a JPEG decompression object.
- * The error manager must already be set up (in case memory manager fails).
- */
-
-GLOBAL(void)
-jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
-{
-  int i;
-
-  /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
-  if (version != JPEG_LIB_VERSION)
-    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
-  if (structsize != SIZEOF(struct jpeg_decompress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
-
-  /* For debugging purposes, we zero the whole master structure.
-   * But the application has already set the err pointer, and may have set
-   * client_data, so we have to save and restore those fields.
-   * Note: if application hasn't set client_data, tools like Purify may
-   * complain here.
-   */
-  {
-    struct jpeg_error_mgr * err = cinfo->err;
-    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
-    cinfo->err = err;
-    cinfo->client_data = client_data;
-  }
-  cinfo->is_decompressor = TRUE;
-
-  /* Initialize a memory manager instance for this object */
-  jinit_memory_mgr((j_common_ptr) cinfo);
-
-  /* Zero out pointers to permanent structures. */
-  cinfo->progress = NULL;
-  cinfo->src = NULL;
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++)
-    cinfo->quant_tbl_ptrs[i] = NULL;
-
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    cinfo->dc_huff_tbl_ptrs[i] = NULL;
-    cinfo->ac_huff_tbl_ptrs[i] = NULL;
-  }
-
-  /* Initialize marker processor so application can override methods
-   * for COM, APPn markers before calling jpeg_read_header.
-   */
-  cinfo->marker_list = NULL;
-  jinit_marker_reader(cinfo);
-
-  /* And initialize the overall input controller. */
-  jinit_input_controller(cinfo);
-
-  /* OK, I'm ready */
-  cinfo->global_state = DSTATE_START;
-}
-
-
-/*
- * Destruction of a JPEG decompression object
- */
-
-GLOBAL(void)
-jpeg_destroy_decompress (j_decompress_ptr cinfo)
-{
-  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Abort processing of a JPEG decompression operation,
- * but don't destroy the object itself.
- */
-
-GLOBAL(void)
-jpeg_abort_decompress (j_decompress_ptr cinfo)
-{
-  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Set default decompression parameters.
- */
-
-LOCAL(void)
-default_decompress_parms (j_decompress_ptr cinfo)
-{
-  /* Guess the input colorspace, and set output colorspace accordingly. */
-  /* (Wish JPEG committee had provided a real way to specify this...) */
-  /* Note application may override our guesses. */
-  switch (cinfo->num_components) {
-  case 1:
-    cinfo->jpeg_color_space = JCS_GRAYSCALE;
-    cinfo->out_color_space = JCS_GRAYSCALE;
-    break;
-    
-  case 3:
-    if (cinfo->saw_JFIF_marker) {
-      cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
-    } else if (cinfo->saw_Adobe_marker) {
-      switch (cinfo->Adobe_transform) {
-      case 0:
-	cinfo->jpeg_color_space = JCS_RGB;
-	break;
-      case 1:
-	cinfo->jpeg_color_space = JCS_YCbCr;
-	break;
-      default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
-	break;
-      }
-    } else {
-      /* Saw no special markers, try to guess from the component IDs */
-      int cid0 = cinfo->comp_info[0].component_id;
-      int cid1 = cinfo->comp_info[1].component_id;
-      int cid2 = cinfo->comp_info[2].component_id;
-
-      if (cid0 == 1 && cid1 == 2 && cid2 == 3)
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
-      else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
-	cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
-      else {
-	TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
-      }
-    }
-    /* Always guess RGB is proper output colorspace. */
-    cinfo->out_color_space = JCS_RGB;
-    break;
-    
-  case 4:
-    if (cinfo->saw_Adobe_marker) {
-      switch (cinfo->Adobe_transform) {
-      case 0:
-	cinfo->jpeg_color_space = JCS_CMYK;
-	break;
-      case 2:
-	cinfo->jpeg_color_space = JCS_YCCK;
-	break;
-      default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
-	break;
-      }
-    } else {
-      /* No special markers, assume straight CMYK. */
-      cinfo->jpeg_color_space = JCS_CMYK;
-    }
-    cinfo->out_color_space = JCS_CMYK;
-    break;
-    
-  default:
-    cinfo->jpeg_color_space = JCS_UNKNOWN;
-    cinfo->out_color_space = JCS_UNKNOWN;
-    break;
-  }
-
-  /* Set defaults for other decompression parameters. */
-  cinfo->scale_num = cinfo->block_size;		/* 1:1 scaling */
-  cinfo->scale_denom = cinfo->block_size;
-  cinfo->output_gamma = 1.0;
-  cinfo->buffered_image = FALSE;
-  cinfo->raw_data_out = FALSE;
-  cinfo->dct_method = JDCT_DEFAULT;
-  cinfo->do_fancy_upsampling = TRUE;
-  cinfo->do_block_smoothing = TRUE;
-  cinfo->quantize_colors = FALSE;
-  /* We set these in case application only sets quantize_colors. */
-  cinfo->dither_mode = JDITHER_FS;
-#ifdef QUANT_2PASS_SUPPORTED
-  cinfo->two_pass_quantize = TRUE;
-#else
-  cinfo->two_pass_quantize = FALSE;
-#endif
-  cinfo->desired_number_of_colors = 256;
-  cinfo->colormap = NULL;
-  /* Initialize for no mode change in buffered-image mode. */
-  cinfo->enable_1pass_quant = FALSE;
-  cinfo->enable_external_quant = FALSE;
-  cinfo->enable_2pass_quant = FALSE;
-}
-
-
-/*
- * Decompression startup: read start of JPEG datastream to see what's there.
- * Need only initialize JPEG object and supply a data source before calling.
- *
- * This routine will read as far as the first SOS marker (ie, actual start of
- * compressed data), and will save all tables and parameters in the JPEG
- * object.  It will also initialize the decompression parameters to default
- * values, and finally return JPEG_HEADER_OK.  On return, the application may
- * adjust the decompression parameters and then call jpeg_start_decompress.
- * (Or, if the application only wanted to determine the image parameters,
- * the data need not be decompressed.  In that case, call jpeg_abort or
- * jpeg_destroy to release any temporary space.)
- * If an abbreviated (tables only) datastream is presented, the routine will
- * return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
- * re-use the JPEG object to read the abbreviated image datastream(s).
- * It is unnecessary (but OK) to call jpeg_abort in this case.
- * The JPEG_SUSPENDED return code only occurs if the data source module
- * requests suspension of the decompressor.  In this case the application
- * should load more source data and then re-call jpeg_read_header to resume
- * processing.
- * If a non-suspending data source is used and require_image is TRUE, then the
- * return code need not be inspected since only JPEG_HEADER_OK is possible.
- *
- * This routine is now just a front end to jpeg_consume_input, with some
- * extra error checking.
- */
-
-GLOBAL(int)
-jpeg_read_header (j_decompress_ptr cinfo, boolean require_image)
-{
-  int retcode;
-
-  if (cinfo->global_state != DSTATE_START &&
-      cinfo->global_state != DSTATE_INHEADER)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  retcode = jpeg_consume_input(cinfo);
-
-  switch (retcode) {
-  case JPEG_REACHED_SOS:
-    retcode = JPEG_HEADER_OK;
-    break;
-  case JPEG_REACHED_EOI:
-    if (require_image)		/* Complain if application wanted an image */
-      ERREXIT(cinfo, JERR_NO_IMAGE);
-    /* Reset to start state; it would be safer to require the application to
-     * call jpeg_abort, but we can't change it now for compatibility reasons.
-     * A side effect is to free any temporary memory (there shouldn't be any).
-     */
-    jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */
-    retcode = JPEG_HEADER_TABLES_ONLY;
-    break;
-  case JPEG_SUSPENDED:
-    /* no work */
-    break;
-  }
-
-  return retcode;
-}
-
-
-/*
- * Consume data in advance of what the decompressor requires.
- * This can be called at any time once the decompressor object has
- * been created and a data source has been set up.
- *
- * This routine is essentially a state machine that handles a couple
- * of critical state-transition actions, namely initial setup and
- * transition from header scanning to ready-for-start_decompress.
- * All the actual input is done via the input controller's consume_input
- * method.
- */
-
-GLOBAL(int)
-jpeg_consume_input (j_decompress_ptr cinfo)
-{
-  int retcode = JPEG_SUSPENDED;
-
-  /* NB: every possible DSTATE value should be listed in this switch */
-  switch (cinfo->global_state) {
-  case DSTATE_START:
-    /* Start-of-datastream actions: reset appropriate modules */
-    (*cinfo->inputctl->reset_input_controller) (cinfo);
-    /* Initialize application's data source module */
-    (*cinfo->src->init_source) (cinfo);
-    cinfo->global_state = DSTATE_INHEADER;
-    /*FALLTHROUGH*/
-  case DSTATE_INHEADER:
-    retcode = (*cinfo->inputctl->consume_input) (cinfo);
-    if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
-      /* Set up default parameters based on header data */
-      default_decompress_parms(cinfo);
-      /* Set global state: ready for start_decompress */
-      cinfo->global_state = DSTATE_READY;
-    }
-    break;
-  case DSTATE_READY:
-    /* Can't advance past first SOS until start_decompress is called */
-    retcode = JPEG_REACHED_SOS;
-    break;
-  case DSTATE_PRELOAD:
-  case DSTATE_PRESCAN:
-  case DSTATE_SCANNING:
-  case DSTATE_RAW_OK:
-  case DSTATE_BUFIMAGE:
-  case DSTATE_BUFPOST:
-  case DSTATE_STOPPING:
-    retcode = (*cinfo->inputctl->consume_input) (cinfo);
-    break;
-  default:
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  }
-  return retcode;
-}
-
-
-/*
- * Have we finished reading the input file?
- */
-
-GLOBAL(boolean)
-jpeg_input_complete (j_decompress_ptr cinfo)
-{
-  /* Check for valid jpeg object */
-  if (cinfo->global_state < DSTATE_START ||
-      cinfo->global_state > DSTATE_STOPPING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return cinfo->inputctl->eoi_reached;
-}
-
-
-/*
- * Is there more than one scan?
- */
-
-GLOBAL(boolean)
-jpeg_has_multiple_scans (j_decompress_ptr cinfo)
-{
-  /* Only valid after jpeg_read_header completes */
-  if (cinfo->global_state < DSTATE_READY ||
-      cinfo->global_state > DSTATE_STOPPING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return cinfo->inputctl->has_multiple_scans;
-}
-
-
-/*
- * Finish JPEG decompression.
- *
- * This will normally just verify the file trailer and release temp storage.
- *
- * Returns FALSE if suspended.  The return value need be inspected only if
- * a suspending data source is used.
- */
-
-GLOBAL(boolean)
-jpeg_finish_decompress (j_decompress_ptr cinfo)
-{
-  if ((cinfo->global_state == DSTATE_SCANNING ||
-       cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
-    /* Terminate final pass of non-buffered mode */
-    if (cinfo->output_scanline < cinfo->output_height)
-      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
-    (*cinfo->master->finish_output_pass) (cinfo);
-    cinfo->global_state = DSTATE_STOPPING;
-  } else if (cinfo->global_state == DSTATE_BUFIMAGE) {
-    /* Finishing after a buffered-image operation */
-    cinfo->global_state = DSTATE_STOPPING;
-  } else if (cinfo->global_state != DSTATE_STOPPING) {
-    /* STOPPING = repeat call after a suspension, anything else is error */
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  }
-  /* Read until EOI */
-  while (! cinfo->inputctl->eoi_reached) {
-    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
-  }
-  /* Do final cleanup */
-  (*cinfo->src->term_source) (cinfo);
-  /* We can use jpeg_abort to release memory and reset global_state */
-  jpeg_abort((j_common_ptr) cinfo);
-  return TRUE;
-}
+/*
+ * jdapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-decompression case or the
+ * transcoding-only case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jdapistd.c.  But also see jcomapi.c for routines
+ * shared by compression and decompression, and jdtrans.c for the transcoding
+ * case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG decompression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_decompress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = TRUE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->src = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++)
+    cinfo->quant_tbl_ptrs[i] = NULL;
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  /* Initialize marker processor so application can override methods
+   * for COM, APPn markers before calling jpeg_read_header.
+   */
+  cinfo->marker_list = NULL;
+  jinit_marker_reader(cinfo);
+
+  /* And initialize the overall input controller. */
+  jinit_input_controller(cinfo);
+
+  /* OK, I'm ready */
+  cinfo->global_state = DSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG decompression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG decompression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Set default decompression parameters.
+ */
+
+LOCAL(void)
+default_decompress_parms (j_decompress_ptr cinfo)
+{
+  /* Guess the input colorspace, and set output colorspace accordingly. */
+  /* (Wish JPEG committee had provided a real way to specify this...) */
+  /* Note application may override our guesses. */
+  switch (cinfo->num_components) {
+  case 1:
+    cinfo->jpeg_color_space = JCS_GRAYSCALE;
+    cinfo->out_color_space = JCS_GRAYSCALE;
+    break;
+    
+  case 3:
+    if (cinfo->saw_JFIF_marker) {
+      cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
+    } else if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_RGB;
+	break;
+      case 1:
+	cinfo->jpeg_color_space = JCS_YCbCr;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+	break;
+      }
+    } else {
+      /* Saw no special markers, try to guess from the component IDs */
+      int cid0 = cinfo->comp_info[0].component_id;
+      int cid1 = cinfo->comp_info[1].component_id;
+      int cid2 = cinfo->comp_info[2].component_id;
+
+      if (cid0 == 1 && cid1 == 2 && cid2 == 3)
+	cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
+      else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
+	cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
+      else {
+	TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+      }
+    }
+    /* Always guess RGB is proper output colorspace. */
+    cinfo->out_color_space = JCS_RGB;
+    break;
+    
+  case 4:
+    if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_CMYK;
+	break;
+      case 2:
+	cinfo->jpeg_color_space = JCS_YCCK;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
+	break;
+      }
+    } else {
+      /* No special markers, assume straight CMYK. */
+      cinfo->jpeg_color_space = JCS_CMYK;
+    }
+    cinfo->out_color_space = JCS_CMYK;
+    break;
+    
+  default:
+    cinfo->jpeg_color_space = JCS_UNKNOWN;
+    cinfo->out_color_space = JCS_UNKNOWN;
+    break;
+  }
+
+  /* Set defaults for other decompression parameters. */
+  cinfo->scale_num = cinfo->block_size;		/* 1:1 scaling */
+  cinfo->scale_denom = cinfo->block_size;
+  cinfo->output_gamma = 1.0;
+  cinfo->buffered_image = FALSE;
+  cinfo->raw_data_out = FALSE;
+  cinfo->dct_method = JDCT_DEFAULT;
+  cinfo->do_fancy_upsampling = TRUE;
+  cinfo->do_block_smoothing = TRUE;
+  cinfo->quantize_colors = FALSE;
+  /* We set these in case application only sets quantize_colors. */
+  cinfo->dither_mode = JDITHER_FS;
+#ifdef QUANT_2PASS_SUPPORTED
+  cinfo->two_pass_quantize = TRUE;
+#else
+  cinfo->two_pass_quantize = FALSE;
+#endif
+  cinfo->desired_number_of_colors = 256;
+  cinfo->colormap = NULL;
+  /* Initialize for no mode change in buffered-image mode. */
+  cinfo->enable_1pass_quant = FALSE;
+  cinfo->enable_external_quant = FALSE;
+  cinfo->enable_2pass_quant = FALSE;
+}
+
+
+/*
+ * Decompression startup: read start of JPEG datastream to see what's there.
+ * Need only initialize JPEG object and supply a data source before calling.
+ *
+ * This routine will read as far as the first SOS marker (ie, actual start of
+ * compressed data), and will save all tables and parameters in the JPEG
+ * object.  It will also initialize the decompression parameters to default
+ * values, and finally return JPEG_HEADER_OK.  On return, the application may
+ * adjust the decompression parameters and then call jpeg_start_decompress.
+ * (Or, if the application only wanted to determine the image parameters,
+ * the data need not be decompressed.  In that case, call jpeg_abort or
+ * jpeg_destroy to release any temporary space.)
+ * If an abbreviated (tables only) datastream is presented, the routine will
+ * return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
+ * re-use the JPEG object to read the abbreviated image datastream(s).
+ * It is unnecessary (but OK) to call jpeg_abort in this case.
+ * The JPEG_SUSPENDED return code only occurs if the data source module
+ * requests suspension of the decompressor.  In this case the application
+ * should load more source data and then re-call jpeg_read_header to resume
+ * processing.
+ * If a non-suspending data source is used and require_image is TRUE, then the
+ * return code need not be inspected since only JPEG_HEADER_OK is possible.
+ *
+ * This routine is now just a front end to jpeg_consume_input, with some
+ * extra error checking.
+ */
+
+GLOBAL(int)
+jpeg_read_header (j_decompress_ptr cinfo, boolean require_image)
+{
+  int retcode;
+
+  if (cinfo->global_state != DSTATE_START &&
+      cinfo->global_state != DSTATE_INHEADER)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  retcode = jpeg_consume_input(cinfo);
+
+  switch (retcode) {
+  case JPEG_REACHED_SOS:
+    retcode = JPEG_HEADER_OK;
+    break;
+  case JPEG_REACHED_EOI:
+    if (require_image)		/* Complain if application wanted an image */
+      ERREXIT(cinfo, JERR_NO_IMAGE);
+    /* Reset to start state; it would be safer to require the application to
+     * call jpeg_abort, but we can't change it now for compatibility reasons.
+     * A side effect is to free any temporary memory (there shouldn't be any).
+     */
+    jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */
+    retcode = JPEG_HEADER_TABLES_ONLY;
+    break;
+  case JPEG_SUSPENDED:
+    /* no work */
+    break;
+  }
+
+  return retcode;
+}
+
+
+/*
+ * Consume data in advance of what the decompressor requires.
+ * This can be called at any time once the decompressor object has
+ * been created and a data source has been set up.
+ *
+ * This routine is essentially a state machine that handles a couple
+ * of critical state-transition actions, namely initial setup and
+ * transition from header scanning to ready-for-start_decompress.
+ * All the actual input is done via the input controller's consume_input
+ * method.
+ */
+
+GLOBAL(int)
+jpeg_consume_input (j_decompress_ptr cinfo)
+{
+  int retcode = JPEG_SUSPENDED;
+
+  /* NB: every possible DSTATE value should be listed in this switch */
+  switch (cinfo->global_state) {
+  case DSTATE_START:
+    /* Start-of-datastream actions: reset appropriate modules */
+    (*cinfo->inputctl->reset_input_controller) (cinfo);
+    /* Initialize application's data source module */
+    (*cinfo->src->init_source) (cinfo);
+    cinfo->global_state = DSTATE_INHEADER;
+    /*FALLTHROUGH*/
+  case DSTATE_INHEADER:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
+      /* Set up default parameters based on header data */
+      default_decompress_parms(cinfo);
+      /* Set global state: ready for start_decompress */
+      cinfo->global_state = DSTATE_READY;
+    }
+    break;
+  case DSTATE_READY:
+    /* Can't advance past first SOS until start_decompress is called */
+    retcode = JPEG_REACHED_SOS;
+    break;
+  case DSTATE_PRELOAD:
+  case DSTATE_PRESCAN:
+  case DSTATE_SCANNING:
+  case DSTATE_RAW_OK:
+  case DSTATE_BUFIMAGE:
+  case DSTATE_BUFPOST:
+  case DSTATE_STOPPING:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    break;
+  default:
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  return retcode;
+}
+
+
+/*
+ * Have we finished reading the input file?
+ */
+
+GLOBAL(boolean)
+jpeg_input_complete (j_decompress_ptr cinfo)
+{
+  /* Check for valid jpeg object */
+  if (cinfo->global_state < DSTATE_START ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->eoi_reached;
+}
+
+
+/*
+ * Is there more than one scan?
+ */
+
+GLOBAL(boolean)
+jpeg_has_multiple_scans (j_decompress_ptr cinfo)
+{
+  /* Only valid after jpeg_read_header completes */
+  if (cinfo->global_state < DSTATE_READY ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->has_multiple_scans;
+}
+
+
+/*
+ * Finish JPEG decompression.
+ *
+ * This will normally just verify the file trailer and release temp storage.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_decompress (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
+    /* Terminate final pass of non-buffered mode */
+    if (cinfo->output_scanline < cinfo->output_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state == DSTATE_BUFIMAGE) {
+    /* Finishing after a buffered-image operation */
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state != DSTATE_STOPPING) {
+    /* STOPPING = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read until EOI */
+  while (! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  /* Do final cleanup */
+  (*cinfo->src->term_source) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+  return TRUE;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdapistd.c b/plugins/AdvaImg/src/LibJPEG/jdapistd.c
index 9d74537772..e81bd67dd7 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdapistd.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdapistd.c
@@ -1,275 +1,275 @@
-/*
- * jdapistd.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the decompression half
- * of the JPEG library.  These are the "standard" API routines that are
- * used in the normal full-decompression case.  They are not used by a
- * transcoding-only application.  Note that if an application links in
- * jpeg_start_decompress, it will end up linking in the entire decompressor.
- * We thus must separate this file from jdapimin.c to avoid linking the
- * whole decompression library into a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Forward declarations */
-LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
-
-
-/*
- * Decompression initialization.
- * jpeg_read_header must be completed before calling this.
- *
- * If a multipass operating mode was selected, this will do all but the
- * last pass, and thus may take a great deal of time.
- *
- * Returns FALSE if suspended.  The return value need be inspected only if
- * a suspending data source is used.
- */
-
-GLOBAL(boolean)
-jpeg_start_decompress (j_decompress_ptr cinfo)
-{
-  if (cinfo->global_state == DSTATE_READY) {
-    /* First call: initialize master control, select active modules */
-    jinit_master_decompress(cinfo);
-    if (cinfo->buffered_image) {
-      /* No more work here; expecting jpeg_start_output next */
-      cinfo->global_state = DSTATE_BUFIMAGE;
-      return TRUE;
-    }
-    cinfo->global_state = DSTATE_PRELOAD;
-  }
-  if (cinfo->global_state == DSTATE_PRELOAD) {
-    /* If file has multiple scans, absorb them all into the coef buffer */
-    if (cinfo->inputctl->has_multiple_scans) {
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-      for (;;) {
-	int retcode;
-	/* Call progress monitor hook if present */
-	if (cinfo->progress != NULL)
-	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-	/* Absorb some more input */
-	retcode = (*cinfo->inputctl->consume_input) (cinfo);
-	if (retcode == JPEG_SUSPENDED)
-	  return FALSE;
-	if (retcode == JPEG_REACHED_EOI)
-	  break;
-	/* Advance progress counter if appropriate */
-	if (cinfo->progress != NULL &&
-	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	    /* jdmaster underestimated number of scans; ratchet up one scan */
-	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	  }
-	}
-      }
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-    }
-    cinfo->output_scan_number = cinfo->input_scan_number;
-  } else if (cinfo->global_state != DSTATE_PRESCAN)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Perform any dummy output passes, and set up for the final pass */
-  return output_pass_setup(cinfo);
-}
-
-
-/*
- * Set up for an output pass, and perform any dummy pass(es) needed.
- * Common subroutine for jpeg_start_decompress and jpeg_start_output.
- * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
- * Exit: If done, returns TRUE and sets global_state for proper output mode.
- *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
- */
-
-LOCAL(boolean)
-output_pass_setup (j_decompress_ptr cinfo)
-{
-  if (cinfo->global_state != DSTATE_PRESCAN) {
-    /* First call: do pass setup */
-    (*cinfo->master->prepare_for_output_pass) (cinfo);
-    cinfo->output_scanline = 0;
-    cinfo->global_state = DSTATE_PRESCAN;
-  }
-  /* Loop over any required dummy passes */
-  while (cinfo->master->is_dummy_pass) {
-#ifdef QUANT_2PASS_SUPPORTED
-    /* Crank through the dummy pass */
-    while (cinfo->output_scanline < cinfo->output_height) {
-      JDIMENSION last_scanline;
-      /* Call progress monitor hook if present */
-      if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-	cinfo->progress->pass_limit = (long) cinfo->output_height;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-      }
-      /* Process some data */
-      last_scanline = cinfo->output_scanline;
-      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
-				    &cinfo->output_scanline, (JDIMENSION) 0);
-      if (cinfo->output_scanline == last_scanline)
-	return FALSE;		/* No progress made, must suspend */
-    }
-    /* Finish up dummy pass, and set up for another one */
-    (*cinfo->master->finish_output_pass) (cinfo);
-    (*cinfo->master->prepare_for_output_pass) (cinfo);
-    cinfo->output_scanline = 0;
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif /* QUANT_2PASS_SUPPORTED */
-  }
-  /* Ready for application to drive output pass through
-   * jpeg_read_scanlines or jpeg_read_raw_data.
-   */
-  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
-  return TRUE;
-}
-
-
-/*
- * Read some scanlines of data from the JPEG decompressor.
- *
- * The return value will be the number of lines actually read.
- * This may be less than the number requested in several cases,
- * including bottom of image, data source suspension, and operating
- * modes that emit multiple scanlines at a time.
- *
- * Note: we warn about excess calls to jpeg_read_scanlines() since
- * this likely signals an application programmer error.  However,
- * an oversize buffer (max_lines > scanlines remaining) is not an error.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
-		     JDIMENSION max_lines)
-{
-  JDIMENSION row_ctr;
-
-  if (cinfo->global_state != DSTATE_SCANNING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->output_scanline >= cinfo->output_height) {
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-    return 0;
-  }
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->output_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Process some data */
-  row_ctr = 0;
-  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
-  cinfo->output_scanline += row_ctr;
-  return row_ctr;
-}
-
-
-/*
- * Alternate entry point to read raw data.
- * Processes exactly one iMCU row per call, unless suspended.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
-		    JDIMENSION max_lines)
-{
-  JDIMENSION lines_per_iMCU_row;
-
-  if (cinfo->global_state != DSTATE_RAW_OK)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->output_scanline >= cinfo->output_height) {
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-    return 0;
-  }
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->output_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Verify that at least one iMCU row can be returned. */
-  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
-  if (max_lines < lines_per_iMCU_row)
-    ERREXIT(cinfo, JERR_BUFFER_SIZE);
-
-  /* Decompress directly into user's buffer. */
-  if (! (*cinfo->coef->decompress_data) (cinfo, data))
-    return 0;			/* suspension forced, can do nothing more */
-
-  /* OK, we processed one iMCU row. */
-  cinfo->output_scanline += lines_per_iMCU_row;
-  return lines_per_iMCU_row;
-}
-
-
-/* Additional entry points for buffered-image mode. */
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-
-/*
- * Initialize for an output pass in buffered-image mode.
- */
-
-GLOBAL(boolean)
-jpeg_start_output (j_decompress_ptr cinfo, int scan_number)
-{
-  if (cinfo->global_state != DSTATE_BUFIMAGE &&
-      cinfo->global_state != DSTATE_PRESCAN)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Limit scan number to valid range */
-  if (scan_number <= 0)
-    scan_number = 1;
-  if (cinfo->inputctl->eoi_reached &&
-      scan_number > cinfo->input_scan_number)
-    scan_number = cinfo->input_scan_number;
-  cinfo->output_scan_number = scan_number;
-  /* Perform any dummy output passes, and set up for the real pass */
-  return output_pass_setup(cinfo);
-}
-
-
-/*
- * Finish up after an output pass in buffered-image mode.
- *
- * Returns FALSE if suspended.  The return value need be inspected only if
- * a suspending data source is used.
- */
-
-GLOBAL(boolean)
-jpeg_finish_output (j_decompress_ptr cinfo)
-{
-  if ((cinfo->global_state == DSTATE_SCANNING ||
-       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
-    /* Terminate this pass. */
-    /* We do not require the whole pass to have been completed. */
-    (*cinfo->master->finish_output_pass) (cinfo);
-    cinfo->global_state = DSTATE_BUFPOST;
-  } else if (cinfo->global_state != DSTATE_BUFPOST) {
-    /* BUFPOST = repeat call after a suspension, anything else is error */
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  }
-  /* Read markers looking for SOS or EOI */
-  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
-    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
-  }
-  cinfo->global_state = DSTATE_BUFIMAGE;
-  return TRUE;
-}
-
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
+/*
+ * jdapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-decompression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_decompress, it will end up linking in the entire decompressor.
+ * We thus must separate this file from jdapimin.c to avoid linking the
+ * whole decompression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Decompression initialization.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * If a multipass operating mode was selected, this will do all but the
+ * last pass, and thus may take a great deal of time.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_start_decompress (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize master control, select active modules */
+    jinit_master_decompress(cinfo);
+    if (cinfo->buffered_image) {
+      /* No more work here; expecting jpeg_start_output next */
+      cinfo->global_state = DSTATE_BUFIMAGE;
+      return TRUE;
+    }
+    cinfo->global_state = DSTATE_PRELOAD;
+  }
+  if (cinfo->global_state == DSTATE_PRELOAD) {
+    /* If file has multiple scans, absorb them all into the coef buffer */
+    if (cinfo->inputctl->has_multiple_scans) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+      for (;;) {
+	int retcode;
+	/* Call progress monitor hook if present */
+	if (cinfo->progress != NULL)
+	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+	/* Absorb some more input */
+	retcode = (*cinfo->inputctl->consume_input) (cinfo);
+	if (retcode == JPEG_SUSPENDED)
+	  return FALSE;
+	if (retcode == JPEG_REACHED_EOI)
+	  break;
+	/* Advance progress counter if appropriate */
+	if (cinfo->progress != NULL &&
+	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	    /* jdmaster underestimated number of scans; ratchet up one scan */
+	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	  }
+	}
+      }
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+    }
+    cinfo->output_scan_number = cinfo->input_scan_number;
+  } else if (cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any dummy output passes, and set up for the final pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Set up for an output pass, and perform any dummy pass(es) needed.
+ * Common subroutine for jpeg_start_decompress and jpeg_start_output.
+ * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
+ * Exit: If done, returns TRUE and sets global_state for proper output mode.
+ *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
+ */
+
+LOCAL(boolean)
+output_pass_setup (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state != DSTATE_PRESCAN) {
+    /* First call: do pass setup */
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+    cinfo->global_state = DSTATE_PRESCAN;
+  }
+  /* Loop over any required dummy passes */
+  while (cinfo->master->is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Crank through the dummy pass */
+    while (cinfo->output_scanline < cinfo->output_height) {
+      JDIMENSION last_scanline;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+	cinfo->progress->pass_limit = (long) cinfo->output_height;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* Process some data */
+      last_scanline = cinfo->output_scanline;
+      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
+				    &cinfo->output_scanline, (JDIMENSION) 0);
+      if (cinfo->output_scanline == last_scanline)
+	return FALSE;		/* No progress made, must suspend */
+    }
+    /* Finish up dummy pass, and set up for another one */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  }
+  /* Ready for application to drive output pass through
+   * jpeg_read_scanlines or jpeg_read_raw_data.
+   */
+  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
+  return TRUE;
+}
+
+
+/*
+ * Read some scanlines of data from the JPEG decompressor.
+ *
+ * The return value will be the number of lines actually read.
+ * This may be less than the number requested in several cases,
+ * including bottom of image, data source suspension, and operating
+ * modes that emit multiple scanlines at a time.
+ *
+ * Note: we warn about excess calls to jpeg_read_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * an oversize buffer (max_lines > scanlines remaining) is not an error.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+		     JDIMENSION max_lines)
+{
+  JDIMENSION row_ctr;
+
+  if (cinfo->global_state != DSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Process some data */
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
+  cinfo->output_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to read raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
+		    JDIMENSION max_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != DSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Verify that at least one iMCU row can be returned. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
+  if (max_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Decompress directly into user's buffer. */
+  if (! (*cinfo->coef->decompress_data) (cinfo, data))
+    return 0;			/* suspension forced, can do nothing more */
+
+  /* OK, we processed one iMCU row. */
+  cinfo->output_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
+
+
+/* Additional entry points for buffered-image mode. */
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Initialize for an output pass in buffered-image mode.
+ */
+
+GLOBAL(boolean)
+jpeg_start_output (j_decompress_ptr cinfo, int scan_number)
+{
+  if (cinfo->global_state != DSTATE_BUFIMAGE &&
+      cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Limit scan number to valid range */
+  if (scan_number <= 0)
+    scan_number = 1;
+  if (cinfo->inputctl->eoi_reached &&
+      scan_number > cinfo->input_scan_number)
+    scan_number = cinfo->input_scan_number;
+  cinfo->output_scan_number = scan_number;
+  /* Perform any dummy output passes, and set up for the real pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Finish up after an output pass in buffered-image mode.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_output (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
+    /* Terminate this pass. */
+    /* We do not require the whole pass to have been completed. */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_BUFPOST;
+  } else if (cinfo->global_state != DSTATE_BUFPOST) {
+    /* BUFPOST = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read markers looking for SOS or EOI */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  cinfo->global_state = DSTATE_BUFIMAGE;
+  return TRUE;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jdarith.c b/plugins/AdvaImg/src/LibJPEG/jdarith.c
index 092f8af5fe..5cd294260d 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdarith.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdarith.c
@@ -1,776 +1,782 @@
-/*
- * jdarith.c
- *
- * Developed 1997-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains portable arithmetic entropy decoding routines for JPEG
- * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
- *
- * Both sequential and progressive modes are supported in this single module.
- *
- * Suspension is not currently supported in this module.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Expanded entropy decoder object for arithmetic decoding. */
-
-typedef struct {
-  struct jpeg_entropy_decoder pub; /* public fields */
-
-  INT32 c;       /* C register, base of coding interval + input bit buffer */
-  INT32 a;               /* A register, normalized size of coding interval */
-  int ct;     /* bit shift counter, # of bits left in bit buffer part of C */
-                                                         /* init: ct = -16 */
-                                                         /* run: ct = 0..7 */
-                                                         /* error: ct = -1 */
-  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
-
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-
-  /* Pointers to statistics areas (these workspaces have image lifespan) */
-  unsigned char * dc_stats[NUM_ARITH_TBLS];
-  unsigned char * ac_stats[NUM_ARITH_TBLS];
-
-  /* Statistics bin for coding with fixed probability 0.5 */
-  unsigned char fixed_bin[4];
-} arith_entropy_decoder;
-
-typedef arith_entropy_decoder * arith_entropy_ptr;
-
-/* The following two definitions specify the allocation chunk size
- * for the statistics area.
- * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
- * 49 statistics bins for DC, and 245 statistics bins for AC coding.
- *
- * We use a compact representation with 1 byte per statistics bin,
- * thus the numbers directly represent byte sizes.
- * This 1 byte per statistics bin contains the meaning of the MPS
- * (more probable symbol) in the highest bit (mask 0x80), and the
- * index into the probability estimation state machine table
- * in the lower bits (mask 0x7F).
- */
-
-#define DC_STAT_BINS 64
-#define AC_STAT_BINS 256
-
-
-LOCAL(int)
-get_byte (j_decompress_ptr cinfo)
-/* Read next input byte; we do not support suspension in this module. */
-{
-  struct jpeg_source_mgr * src = cinfo->src;
-
-  if (src->bytes_in_buffer == 0)
-    if (! (*src->fill_input_buffer) (cinfo))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-  src->bytes_in_buffer--;
-  return GETJOCTET(*src->next_input_byte++);
-}
-
-
-/*
- * The core arithmetic decoding routine (common in JPEG and JBIG).
- * This needs to go as fast as possible.
- * Machine-dependent optimization facilities
- * are not utilized in this portable implementation.
- * However, this code should be fairly efficient and
- * may be a good base for further optimizations anyway.
- *
- * Return value is 0 or 1 (binary decision).
- *
- * Note: I've changed the handling of the code base & bit
- * buffer register C compared to other implementations
- * based on the standards layout & procedures.
- * While it also contains both the actual base of the
- * coding interval (16 bits) and the next-bits buffer,
- * the cut-point between these two parts is floating
- * (instead of fixed) with the bit shift counter CT.
- * Thus, we also need only one (variable instead of
- * fixed size) shift for the LPS/MPS decision, and
- * we can get away with any renormalization update
- * of C (except for new data insertion, of course).
- *
- * I've also introduced a new scheme for accessing
- * the probability estimation state machine table,
- * derived from Markus Kuhn's JBIG implementation.
- */
-
-LOCAL(int)
-arith_decode (j_decompress_ptr cinfo, unsigned char *st)
-{
-  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  register unsigned char nl, nm;
-  register INT32 qe, temp;
-  register int sv, data;
-
-  /* Renormalization & data input per section D.2.6 */
-  while (e->a < 0x8000L) {
-    if (--e->ct < 0) {
-      /* Need to fetch next data byte */
-      if (cinfo->unread_marker)
-	data = 0;		/* stuff zero data */
-      else {
-	data = get_byte(cinfo);	/* read next input byte */
-	if (data == 0xFF) {	/* zero stuff or marker code */
-	  do data = get_byte(cinfo);
-	  while (data == 0xFF);	/* swallow extra 0xFF bytes */
-	  if (data == 0)
-	    data = 0xFF;	/* discard stuffed zero byte */
-	  else {
-	    /* Note: Different from the Huffman decoder, hitting
-	     * a marker while processing the compressed data
-	     * segment is legal in arithmetic coding.
-	     * The convention is to supply zero data
-	     * then until decoding is complete.
-	     */
-	    cinfo->unread_marker = data;
-	    data = 0;
-	  }
-	}
-      }
-      e->c = (e->c << 8) | data; /* insert data into C register */
-      if ((e->ct += 8) < 0)	 /* update bit shift counter */
-	/* Need more initial bytes */
-	if (++e->ct == 0)
-	  /* Got 2 initial bytes -> re-init A and exit loop */
-	  e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
-    }
-    e->a <<= 1;
-  }
-
-  /* Fetch values from our compact representation of Table D.3(D.2):
-   * Qe values and probability estimation state machine
-   */
-  sv = *st;
-  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
-  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
-  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
-
-  /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
-  temp = e->a - qe;
-  e->a = temp;
-  temp <<= e->ct;
-  if (e->c >= temp) {
-    e->c -= temp;
-    /* Conditional LPS (less probable symbol) exchange */
-    if (e->a < qe) {
-      e->a = qe;
-      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
-    } else {
-      e->a = qe;
-      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-      sv ^= 0x80;		/* Exchange LPS/MPS */
-    }
-  } else if (e->a < 0x8000L) {
-    /* Conditional MPS (more probable symbol) exchange */
-    if (e->a < qe) {
-      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-      sv ^= 0x80;		/* Exchange LPS/MPS */
-    } else {
-      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
-    }
-  }
-
-  return sv >> 7;
-}
-
-
-/*
- * Check for a restart marker & resynchronize decoder.
- */
-
-LOCAL(void)
-process_restart (j_decompress_ptr cinfo)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci;
-  jpeg_component_info * compptr;
-
-  /* Advance past the RSTn marker */
-  if (! (*cinfo->marker->read_restart_marker) (cinfo))
-    ERREXIT(cinfo, JERR_CANT_SUSPEND);
-
-  /* Re-initialize statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
-      /* Reset DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
-	(cinfo->progressive_mode && cinfo->Ss)) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
-    }
-  }
-
-  /* Reset arithmetic decoding variables */
-  entropy->c = 0;
-  entropy->a = 0;
-  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
-
-  /* Reset restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-}
-
-
-/*
- * Arithmetic MCU decoding.
- * Each of these routines decodes and returns one MCU's worth of
- * arithmetic-compressed coefficients.
- * The coefficients are reordered from zigzag order into natural array order,
- * but are not dequantized.
- *
- * The i'th block of the MCU is stored into the block pointed to by
- * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
- */
-
-/*
- * MCU decoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  unsigned char *st;
-  int blkn, ci, tbl, sign;
-  int v, m;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
-
-    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.19: Decode_DC_DIFF */
-    if (arith_decode(cinfo, st) == 0)
-      entropy->dc_context[ci] = 0;
-    else {
-      /* Figure F.21: Decoding nonzero value v */
-      /* Figure F.22: Decoding the sign of v */
-      sign = arith_decode(cinfo, st + 1);
-      st += 2; st += sign;
-      /* Figure F.23: Decoding the magnitude category of v */
-      if ((m = arith_decode(cinfo, st)) != 0) {
-	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == 0x8000) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
-      }
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;		   /* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
-      else
-	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
-      v = m;
-      /* Figure F.24: Decoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
-      v += 1; if (sign) v = -v;
-      entropy->last_dc_val[ci] += v;
-    }
-
-    /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */
-    (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, sign, k;
-  int v, m;
-  const int * natural_order;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  natural_order = cinfo->natural_order;
-
-  /* There is always only one block per MCU */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
-
-  /* Figure F.20: Decode_AC_coefficients */
-  for (k = cinfo->Ss; k <= cinfo->Se; k++) {
-    st = entropy->ac_stats[tbl] + 3 * (k - 1);
-    if (arith_decode(cinfo, st)) break;		/* EOB flag */
-    while (arith_decode(cinfo, st + 1) == 0) {
-      st += 3; k++;
-      if (k > cinfo->Se) {
-	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	entropy->ct = -1;			/* spectral overflow */
-	return TRUE;
-      }
-    }
-    /* Figure F.21: Decoding nonzero value v */
-    /* Figure F.22: Decoding the sign of v */
-    sign = arith_decode(cinfo, entropy->fixed_bin);
-    st += 2;
-    /* Figure F.23: Decoding the magnitude category of v */
-    if ((m = arith_decode(cinfo, st)) != 0) {
-      if (arith_decode(cinfo, st)) {
-	m <<= 1;
-	st = entropy->ac_stats[tbl] +
-	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == 0x8000) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
-      }
-    }
-    v = m;
-    /* Figure F.24: Decoding the magnitude bit pattern of v */
-    st += 14;
-    while (m >>= 1)
-      if (arith_decode(cinfo, st)) v |= m;
-    v += 1; if (sign) v = -v;
-    /* Scale and output coefficient in natural (dezigzagged) order */
-    (*block)[natural_order[k]] = (JCOEF) (v << cinfo->Al);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for DC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  unsigned char *st;
-  int p1, blkn;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  st = entropy->fixed_bin;	/* use fixed probability estimation */
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* Encoded data is simply the next bit of the two's-complement DC value */
-    if (arith_decode(cinfo, st))
-      MCU_data[blkn][0][0] |= p1;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  JCOEFPTR thiscoef;
-  unsigned char *st;
-  int tbl, k, kex;
-  int p1, m1;
-  const int * natural_order;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  natural_order = cinfo->natural_order;
-
-  /* There is always only one block per MCU */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
-  m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
-
-  /* Establish EOBx (previous stage end-of-block) index */
-  for (kex = cinfo->Se; kex > 0; kex--)
-    if ((*block)[natural_order[kex]]) break;
-
-  for (k = cinfo->Ss; k <= cinfo->Se; k++) {
-    st = entropy->ac_stats[tbl] + 3 * (k - 1);
-    if (k > kex)
-      if (arith_decode(cinfo, st)) break;	/* EOB flag */
-    for (;;) {
-      thiscoef = *block + natural_order[k];
-      if (*thiscoef) {				/* previously nonzero coef */
-	if (arith_decode(cinfo, st + 2)) {
-	  if (*thiscoef < 0)
-	    *thiscoef += m1;
-	  else
-	    *thiscoef += p1;
-	}
-	break;
-      }
-      if (arith_decode(cinfo, st + 1)) {	/* newly nonzero coef */
-	if (arith_decode(cinfo, entropy->fixed_bin))
-	  *thiscoef = m1;
-	else
-	  *thiscoef = p1;
-	break;
-      }
-      st += 3; k++;
-      if (k > cinfo->Se) {
-	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	entropy->ct = -1;			/* spectral overflow */
-	return TRUE;
-      }
-    }
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Decode one MCU's worth of arithmetic-compressed coefficients.
- */
-
-METHODDEF(boolean)
-decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  jpeg_component_info * compptr;
-  JBLOCKROW block;
-  unsigned char *st;
-  int blkn, ci, tbl, sign, k;
-  int v, m;
-  const int * natural_order;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  natural_order = cinfo->natural_order;
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-
-    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
-
-    tbl = compptr->dc_tbl_no;
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.19: Decode_DC_DIFF */
-    if (arith_decode(cinfo, st) == 0)
-      entropy->dc_context[ci] = 0;
-    else {
-      /* Figure F.21: Decoding nonzero value v */
-      /* Figure F.22: Decoding the sign of v */
-      sign = arith_decode(cinfo, st + 1);
-      st += 2; st += sign;
-      /* Figure F.23: Decoding the magnitude category of v */
-      if ((m = arith_decode(cinfo, st)) != 0) {
-	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == 0x8000) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
-      }
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;		   /* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
-      else
-	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
-      v = m;
-      /* Figure F.24: Decoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
-      v += 1; if (sign) v = -v;
-      entropy->last_dc_val[ci] += v;
-    }
-
-    (*block)[0] = (JCOEF) entropy->last_dc_val[ci];
-
-    /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
-
-    if (cinfo->lim_Se == 0) continue;
-    tbl = compptr->ac_tbl_no;
-    k = 0;
-
-    /* Figure F.20: Decode_AC_coefficients */
-    do {
-      st = entropy->ac_stats[tbl] + 3 * k;
-      if (arith_decode(cinfo, st)) break;	/* EOB flag */
-      for (;;) {
-	k++;
-	if (arith_decode(cinfo, st + 1)) break;
-	st += 3;
-	if (k >= cinfo->lim_Se) {
-	  WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	  entropy->ct = -1;			/* spectral overflow */
-	  return TRUE;
-	}
-      }
-      /* Figure F.21: Decoding nonzero value v */
-      /* Figure F.22: Decoding the sign of v */
-      sign = arith_decode(cinfo, entropy->fixed_bin);
-      st += 2;
-      /* Figure F.23: Decoding the magnitude category of v */
-      if ((m = arith_decode(cinfo, st)) != 0) {
-	if (arith_decode(cinfo, st)) {
-	  m <<= 1;
-	  st = entropy->ac_stats[tbl] +
-	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	  while (arith_decode(cinfo, st)) {
-	    if ((m <<= 1) == 0x8000) {
-	      WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	      entropy->ct = -1;			/* magnitude overflow */
-	      return TRUE;
-	    }
-	    st += 1;
-	  }
-	}
-      }
-      v = m;
-      /* Figure F.24: Decoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
-      v += 1; if (sign) v = -v;
-      (*block)[natural_order[k]] = (JCOEF) v;
-    } while (k < cinfo->lim_Se);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Initialize for an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-start_pass (j_decompress_ptr cinfo)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-
-  if (cinfo->progressive_mode) {
-    /* Validate progressive scan parameters */
-    if (cinfo->Ss == 0) {
-      if (cinfo->Se != 0)
-	goto bad;
-    } else {
-      /* need not check Ss/Se < 0 since they came from unsigned bytes */
-      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
-	goto bad;
-      /* AC scans may have only one component */
-      if (cinfo->comps_in_scan != 1)
-	goto bad;
-    }
-    if (cinfo->Ah != 0) {
-      /* Successive approximation refinement scan: must have Al = Ah-1. */
-      if (cinfo->Ah-1 != cinfo->Al)
-	goto bad;
-    }
-    if (cinfo->Al > 13) {	/* need not check for < 0 */
-      bad:
-      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
-    }
-    /* Update progression status, and verify that scan order is legal.
-     * Note that inter-scan inconsistencies are treated as warnings
-     * not fatal errors ... not clear if this is right way to behave.
-     */
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
-      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
-      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
-	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
-      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
-	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
-	if (cinfo->Ah != expected)
-	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
-	coef_bit_ptr[coefi] = cinfo->Al;
-      }
-    }
-    /* Select MCU decoding routine */
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_first;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_refine;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_refine;
-    }
-  } else {
-    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
-     * This ought to be an error condition, but we make it a warning.
-     */
-    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
-	(cinfo->Se < DCTSIZE2 && cinfo->Se != cinfo->lim_Se))
-      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
-    /* Select MCU decoding routine */
-    entropy->pub.decode_mcu = decode_mcu;
-  }
-
-  /* Allocate & initialize requested statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      tbl = compptr->dc_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->dc_stats[tbl] == NULL)
-	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
-      /* Initialize DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
-	(cinfo->progressive_mode && cinfo->Ss)) {
-      tbl = compptr->ac_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->ac_stats[tbl] == NULL)
-	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
-    }
-  }
-
-  /* Initialize arithmetic decoding variables */
-  entropy->c = 0;
-  entropy->a = 0;
-  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
-
-  /* Initialize restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-}
-
-
-/*
- * Module initialization routine for arithmetic entropy decoding.
- */
-
-GLOBAL(void)
-jinit_arith_decoder (j_decompress_ptr cinfo)
-{
-  arith_entropy_ptr entropy;
-  int i;
-
-  entropy = (arith_entropy_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(arith_entropy_decoder));
-  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
-  entropy->pub.start_pass = start_pass;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    entropy->dc_stats[i] = NULL;
-    entropy->ac_stats[i] = NULL;
-  }
-
-  /* Initialize index for fixed probability estimation */
-  entropy->fixed_bin[0] = 113;
-
-  if (cinfo->progressive_mode) {
-    /* Create progression status table */
-    int *coef_bit_ptr, ci;
-    cinfo->coef_bits = (int (*)[DCTSIZE2])
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  cinfo->num_components*DCTSIZE2*SIZEOF(int));
-    coef_bit_ptr = & cinfo->coef_bits[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++) 
-      for (i = 0; i < DCTSIZE2; i++)
-	*coef_bit_ptr++ = -1;
-  }
-}
+/*
+ * jdarith.c
+ *
+ * Developed 1997-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains portable arithmetic entropy decoding routines for JPEG
+ * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Expanded entropy decoder object for arithmetic decoding. */
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  INT32 c;       /* C register, base of coding interval + input bit buffer */
+  INT32 a;               /* A register, normalized size of coding interval */
+  int ct;     /* bit shift counter, # of bits left in bit buffer part of C */
+                                                         /* init: ct = -16 */
+                                                         /* run: ct = 0..7 */
+                                                         /* error: ct = -1 */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
+
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Pointers to statistics areas (these workspaces have image lifespan) */
+  unsigned char * dc_stats[NUM_ARITH_TBLS];
+  unsigned char * ac_stats[NUM_ARITH_TBLS];
+
+  /* Statistics bin for coding with fixed probability 0.5 */
+  unsigned char fixed_bin[4];
+} arith_entropy_decoder;
+
+typedef arith_entropy_decoder * arith_entropy_ptr;
+
+/* The following two definitions specify the allocation chunk size
+ * for the statistics area.
+ * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
+ * 49 statistics bins for DC, and 245 statistics bins for AC coding.
+ *
+ * We use a compact representation with 1 byte per statistics bin,
+ * thus the numbers directly represent byte sizes.
+ * This 1 byte per statistics bin contains the meaning of the MPS
+ * (more probable symbol) in the highest bit (mask 0x80), and the
+ * index into the probability estimation state machine table
+ * in the lower bits (mask 0x7F).
+ */
+
+#define DC_STAT_BINS 64
+#define AC_STAT_BINS 256
+
+
+LOCAL(int)
+get_byte (j_decompress_ptr cinfo)
+/* Read next input byte; we do not support suspension in this module. */
+{
+  struct jpeg_source_mgr * src = cinfo->src;
+
+  if (src->bytes_in_buffer == 0)
+    if (! (*src->fill_input_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  src->bytes_in_buffer--;
+  return GETJOCTET(*src->next_input_byte++);
+}
+
+
+/*
+ * The core arithmetic decoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Return value is 0 or 1 (binary decision).
+ *
+ * Note: I've changed the handling of the code base & bit
+ * buffer register C compared to other implementations
+ * based on the standards layout & procedures.
+ * While it also contains both the actual base of the
+ * coding interval (16 bits) and the next-bits buffer,
+ * the cut-point between these two parts is floating
+ * (instead of fixed) with the bit shift counter CT.
+ * Thus, we also need only one (variable instead of
+ * fixed size) shift for the LPS/MPS decision, and
+ * we can get away with any renormalization update
+ * of C (except for new data insertion, of course).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+LOCAL(int)
+arith_decode (j_decompress_ptr cinfo, unsigned char *st)
+{
+  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  register unsigned char nl, nm;
+  register INT32 qe, temp;
+  register int sv, data;
+
+  /* Renormalization & data input per section D.2.6 */
+  while (e->a < 0x8000L) {
+    if (--e->ct < 0) {
+      /* Need to fetch next data byte */
+      if (cinfo->unread_marker)
+	data = 0;		/* stuff zero data */
+      else {
+	data = get_byte(cinfo);	/* read next input byte */
+	if (data == 0xFF) {	/* zero stuff or marker code */
+	  do data = get_byte(cinfo);
+	  while (data == 0xFF);	/* swallow extra 0xFF bytes */
+	  if (data == 0)
+	    data = 0xFF;	/* discard stuffed zero byte */
+	  else {
+	    /* Note: Different from the Huffman decoder, hitting
+	     * a marker while processing the compressed data
+	     * segment is legal in arithmetic coding.
+	     * The convention is to supply zero data
+	     * then until decoding is complete.
+	     */
+	    cinfo->unread_marker = data;
+	    data = 0;
+	  }
+	}
+      }
+      e->c = (e->c << 8) | data; /* insert data into C register */
+      if ((e->ct += 8) < 0)	 /* update bit shift counter */
+	/* Need more initial bytes */
+	if (++e->ct == 0)
+	  /* Got 2 initial bytes -> re-init A and exit loop */
+	  e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
+    }
+    e->a <<= 1;
+  }
+
+  /* Fetch values from our compact representation of Table D.3(D.2):
+   * Qe values and probability estimation state machine
+   */
+  sv = *st;
+  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
+  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
+
+  /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
+  temp = e->a - qe;
+  e->a = temp;
+  temp <<= e->ct;
+  if (e->c >= temp) {
+    e->c -= temp;
+    /* Conditional LPS (less probable symbol) exchange */
+    if (e->a < qe) {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+    } else {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
+      sv ^= 0x80;		/* Exchange LPS/MPS */
+    }
+  } else if (e->a < 0x8000L) {
+    /* Conditional MPS (more probable symbol) exchange */
+    if (e->a < qe) {
+      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
+      sv ^= 0x80;		/* Exchange LPS/MPS */
+    } else {
+      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+    }
+  }
+
+  return sv >> 7;
+}
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ */
+
+LOCAL(void)
+process_restart (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci;
+  jpeg_component_info * compptr;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+  /* Re-initialize statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      /* Reset DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
+	(cinfo->progressive_mode && cinfo->Ss)) {
+      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+    }
+  }
+
+  /* Reset arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Arithmetic MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * arithmetic-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign;
+  int v, m;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2; st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
+	while (arith_decode(cinfo, st)) {
+	  if ((m <<= 1) == 0x8000) {
+	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	    entropy->ct = -1;			/* magnitude overflow */
+	    return TRUE;
+	  }
+	  st += 1;
+	}
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;		   /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      entropy->last_dc_val[ci] += v;
+    }
+
+    /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */
+    (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, sign, k;
+  int v, m;
+  const int * natural_order;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  natural_order = cinfo->natural_order;
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+  /* Figure F.20: Decode_AC_coefficients */
+  k = cinfo->Ss - 1;
+  do {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    if (arith_decode(cinfo, st)) break;		/* EOB flag */
+    for (;;) {
+      k++;
+      if (arith_decode(cinfo, st + 1)) break;
+      st += 3;
+      if (k >= cinfo->Se) {
+	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	entropy->ct = -1;			/* spectral overflow */
+	return TRUE;
+      }
+    }
+    /* Figure F.21: Decoding nonzero value v */
+    /* Figure F.22: Decoding the sign of v */
+    sign = arith_decode(cinfo, entropy->fixed_bin);
+    st += 2;
+    /* Figure F.23: Decoding the magnitude category of v */
+    if ((m = arith_decode(cinfo, st)) != 0) {
+      if (arith_decode(cinfo, st)) {
+	m <<= 1;
+	st = entropy->ac_stats[tbl] +
+	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	while (arith_decode(cinfo, st)) {
+	  if ((m <<= 1) == 0x8000) {
+	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	    entropy->ct = -1;			/* magnitude overflow */
+	    return TRUE;
+	  }
+	  st += 1;
+	}
+      }
+    }
+    v = m;
+    /* Figure F.24: Decoding the magnitude bit pattern of v */
+    st += 14;
+    while (m >>= 1)
+      if (arith_decode(cinfo, st)) v |= m;
+    v += 1; if (sign) v = -v;
+    /* Scale and output coefficient in natural (dezigzagged) order */
+    (*block)[natural_order[k]] = (JCOEF) (v << cinfo->Al);
+  } while (k < cinfo->Se);
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  unsigned char *st;
+  int p1, blkn;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  st = entropy->fixed_bin;	/* use fixed probability estimation */
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    if (arith_decode(cinfo, st))
+      MCU_data[blkn][0][0] |= p1;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  unsigned char *st;
+  int tbl, k, kex;
+  int p1, m1;
+  const int * natural_order;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  natural_order = cinfo->natural_order;
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+  m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+
+  /* Establish EOBx (previous stage end-of-block) index */
+  kex = cinfo->Se;
+  do {
+    if ((*block)[natural_order[kex]]) break;
+  } while (--kex);
+
+  k = cinfo->Ss - 1;
+  do {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    if (k >= kex)
+      if (arith_decode(cinfo, st)) break;	/* EOB flag */
+    for (;;) {
+      thiscoef = *block + natural_order[++k];
+      if (*thiscoef) {				/* previously nonzero coef */
+	if (arith_decode(cinfo, st + 2)) {
+	  if (*thiscoef < 0)
+	    *thiscoef += m1;
+	  else
+	    *thiscoef += p1;
+	}
+	break;
+      }
+      if (arith_decode(cinfo, st + 1)) {	/* newly nonzero coef */
+	if (arith_decode(cinfo, entropy->fixed_bin))
+	  *thiscoef = m1;
+	else
+	  *thiscoef = p1;
+	break;
+      }
+      st += 3;
+      if (k >= cinfo->Se) {
+	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	entropy->ct = -1;			/* spectral overflow */
+	return TRUE;
+      }
+    }
+  } while (k < cinfo->Se);
+
+  return TRUE;
+}
+
+
+/*
+ * Decode one MCU's worth of arithmetic-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  jpeg_component_info * compptr;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign, k;
+  int v, m;
+  const int * natural_order;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  natural_order = cinfo->natural_order;
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    tbl = compptr->dc_tbl_no;
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2; st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
+	while (arith_decode(cinfo, st)) {
+	  if ((m <<= 1) == 0x8000) {
+	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	    entropy->ct = -1;			/* magnitude overflow */
+	    return TRUE;
+	  }
+	  st += 1;
+	}
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;		   /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      entropy->last_dc_val[ci] += v;
+    }
+
+    (*block)[0] = (JCOEF) entropy->last_dc_val[ci];
+
+    /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+    if (cinfo->lim_Se == 0) continue;
+    tbl = compptr->ac_tbl_no;
+    k = 0;
+
+    /* Figure F.20: Decode_AC_coefficients */
+    do {
+      st = entropy->ac_stats[tbl] + 3 * k;
+      if (arith_decode(cinfo, st)) break;	/* EOB flag */
+      for (;;) {
+	k++;
+	if (arith_decode(cinfo, st + 1)) break;
+	st += 3;
+	if (k >= cinfo->lim_Se) {
+	  WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	  entropy->ct = -1;			/* spectral overflow */
+	  return TRUE;
+	}
+      }
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, entropy->fixed_bin);
+      st += 2;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+	if (arith_decode(cinfo, st)) {
+	  m <<= 1;
+	  st = entropy->ac_stats[tbl] +
+	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	  while (arith_decode(cinfo, st)) {
+	    if ((m <<= 1) == 0x8000) {
+	      WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	      entropy->ct = -1;			/* magnitude overflow */
+	      return TRUE;
+	    }
+	    st += 1;
+	  }
+	}
+      }
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      (*block)[natural_order[k]] = (JCOEF) v;
+    } while (k < cinfo->lim_Se);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  if (cinfo->progressive_mode) {
+    /* Validate progressive scan parameters */
+    if (cinfo->Ss == 0) {
+      if (cinfo->Se != 0)
+	goto bad;
+    } else {
+      /* need not check Ss/Se < 0 since they came from unsigned bytes */
+      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
+	goto bad;
+      /* AC scans may have only one component */
+      if (cinfo->comps_in_scan != 1)
+	goto bad;
+    }
+    if (cinfo->Ah != 0) {
+      /* Successive approximation refinement scan: must have Al = Ah-1. */
+      if (cinfo->Ah-1 != cinfo->Al)
+	goto bad;
+    }
+    if (cinfo->Al > 13) {	/* need not check for < 0 */
+      bad:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+    }
+    /* Update progression status, and verify that scan order is legal.
+     * Note that inter-scan inconsistencies are treated as warnings
+     * not fatal errors ... not clear if this is right way to behave.
+     */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
+      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+	if (cinfo->Ah != expected)
+	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+	coef_bit_ptr[coefi] = cinfo->Al;
+      }
+    }
+    /* Select MCU decoding routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_first;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_refine;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_refine;
+    }
+  } else {
+    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+     * This ought to be an error condition, but we make it a warning.
+     */
+    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
+	(cinfo->Se < DCTSIZE2 && cinfo->Se != cinfo->lim_Se))
+      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+    /* Select MCU decoding routine */
+    entropy->pub.decode_mcu = decode_mcu;
+  }
+
+  /* Allocate & initialize requested statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      tbl = compptr->dc_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->dc_stats[tbl] == NULL)
+	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      /* Initialize DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
+	(cinfo->progressive_mode && cinfo->Ss)) {
+      tbl = compptr->ac_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->ac_stats[tbl] == NULL)
+	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+    }
+  }
+
+  /* Initialize arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Module initialization routine for arithmetic entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_arith_decoder (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy;
+  int i;
+
+  entropy = (arith_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(arith_entropy_decoder));
+  cinfo->entropy = &entropy->pub;
+  entropy->pub.start_pass = start_pass;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    entropy->dc_stats[i] = NULL;
+    entropy->ac_stats[i] = NULL;
+  }
+
+  /* Initialize index for fixed probability estimation */
+  entropy->fixed_bin[0] = 113;
+
+  if (cinfo->progressive_mode) {
+    /* Create progression status table */
+    int *coef_bit_ptr, ci;
+    cinfo->coef_bits = (int (*)[DCTSIZE2])
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  cinfo->num_components*DCTSIZE2*SIZEOF(int));
+    coef_bit_ptr = & cinfo->coef_bits[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      for (i = 0; i < DCTSIZE2; i++)
+	*coef_bit_ptr++ = -1;
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdatadst.c b/plugins/AdvaImg/src/LibJPEG/jdatadst.c
index 6981fb73de..677e46346b 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdatadst.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdatadst.c
@@ -1,267 +1,270 @@
-/*
- * jdatadst.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009-2012 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains compression data destination routines for the case of
- * emitting JPEG data to memory or to a file (or any stdio stream).
- * While these routines are sufficient for most applications,
- * some will want to use a different destination manager.
- * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
- * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
- * than 8 bits on your machine, you may need to do some tweaking.
- */
-
-/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jerror.h"
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-#endif
-
-
-/* Expanded data destination object for stdio output */
-
-typedef struct {
-  struct jpeg_destination_mgr pub; /* public fields */
-
-  FILE * outfile;		/* target stream */
-  JOCTET * buffer;		/* start of buffer */
-} my_destination_mgr;
-
-typedef my_destination_mgr * my_dest_ptr;
-
-#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
-
-
-/* Expanded data destination object for memory output */
-
-typedef struct {
-  struct jpeg_destination_mgr pub; /* public fields */
-
-  unsigned char ** outbuffer;	/* target buffer */
-  unsigned long * outsize;
-  unsigned char * newbuffer;	/* newly allocated buffer */
-  JOCTET * buffer;		/* start of buffer */
-  size_t bufsize;
-} my_mem_destination_mgr;
-
-typedef my_mem_destination_mgr * my_mem_dest_ptr;
-
-
-/*
- * Initialize destination --- called by jpeg_start_compress
- * before any data is actually written.
- */
-
-METHODDEF(void)
-init_destination (j_compress_ptr cinfo)
-{
-  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
-
-  /* Allocate the output buffer --- it will be released when done with image */
-  dest->buffer = (JOCTET *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  OUTPUT_BUF_SIZE * SIZEOF(JOCTET));
-
-  dest->pub.next_output_byte = dest->buffer;
-  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
-}
-
-METHODDEF(void)
-init_mem_destination (j_compress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Empty the output buffer --- called whenever buffer fills up.
- *
- * In typical applications, this should write the entire output buffer
- * (ignoring the current state of next_output_byte & free_in_buffer),
- * reset the pointer & count to the start of the buffer, and return TRUE
- * indicating that the buffer has been dumped.
- *
- * In applications that need to be able to suspend compression due to output
- * overrun, a FALSE return indicates that the buffer cannot be emptied now.
- * In this situation, the compressor will return to its caller (possibly with
- * an indication that it has not accepted all the supplied scanlines).  The
- * application should resume compression after it has made more room in the
- * output buffer.  Note that there are substantial restrictions on the use of
- * suspension --- see the documentation.
- *
- * When suspending, the compressor will back up to a convenient restart point
- * (typically the start of the current MCU). next_output_byte & free_in_buffer
- * indicate where the restart point will be if the current call returns FALSE.
- * Data beyond this point will be regenerated after resumption, so do not
- * write it out when emptying the buffer externally.
- */
-
-METHODDEF(boolean)
-empty_output_buffer (j_compress_ptr cinfo)
-{
-  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
-
-  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
-      (size_t) OUTPUT_BUF_SIZE)
-    ERREXIT(cinfo, JERR_FILE_WRITE);
-
-  dest->pub.next_output_byte = dest->buffer;
-  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
-
-  return TRUE;
-}
-
-METHODDEF(boolean)
-empty_mem_output_buffer (j_compress_ptr cinfo)
-{
-  size_t nextsize;
-  JOCTET * nextbuffer;
-  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
-
-  /* Try to allocate new buffer with double size */
-  nextsize = dest->bufsize * 2;
-  nextbuffer = (JOCTET *) malloc(nextsize);
-
-  if (nextbuffer == NULL)
-    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
-
-  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
-
-  if (dest->newbuffer != NULL)
-    free(dest->newbuffer);
-
-  dest->newbuffer = nextbuffer;
-
-  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
-  dest->pub.free_in_buffer = dest->bufsize;
-
-  dest->buffer = nextbuffer;
-  dest->bufsize = nextsize;
-
-  return TRUE;
-}
-
-
-/*
- * Terminate destination --- called by jpeg_finish_compress
- * after all data has been written.  Usually needs to flush buffer.
- *
- * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
- * application must deal with any cleanup that should happen even
- * for error exit.
- */
-
-METHODDEF(void)
-term_destination (j_compress_ptr cinfo)
-{
-  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
-  size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
-
-  /* Write any data remaining in the buffer */
-  if (datacount > 0) {
-    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
-      ERREXIT(cinfo, JERR_FILE_WRITE);
-  }
-  fflush(dest->outfile);
-  /* Make sure we wrote the output file OK */
-  if (ferror(dest->outfile))
-    ERREXIT(cinfo, JERR_FILE_WRITE);
-}
-
-METHODDEF(void)
-term_mem_destination (j_compress_ptr cinfo)
-{
-  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
-
-  *dest->outbuffer = dest->buffer;
-  *dest->outsize = dest->bufsize - dest->pub.free_in_buffer;
-}
-
-
-/*
- * Prepare for output to a stdio stream.
- * The caller must have already opened the stream, and is responsible
- * for closing it after finishing compression.
- */
-
-GLOBAL(void)
-jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile)
-{
-  my_dest_ptr dest;
-
-  /* The destination object is made permanent so that multiple JPEG images
-   * can be written to the same file without re-executing jpeg_stdio_dest.
-   * This makes it dangerous to use this manager and a different destination
-   * manager serially with the same JPEG object, because their private object
-   * sizes may be different.  Caveat programmer.
-   */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
-    cinfo->dest = (struct jpeg_destination_mgr *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(my_destination_mgr));
-  }
-
-  dest = (my_dest_ptr) cinfo->dest;
-  dest->pub.init_destination = init_destination;
-  dest->pub.empty_output_buffer = empty_output_buffer;
-  dest->pub.term_destination = term_destination;
-  dest->outfile = outfile;
-}
-
-
-/*
- * Prepare for output to a memory buffer.
- * The caller may supply an own initial buffer with appropriate size.
- * Otherwise, or when the actual data output exceeds the given size,
- * the library adapts the buffer size as necessary.
- * The standard library functions malloc/free are used for allocating
- * larger memory, so the buffer is available to the application after
- * finishing compression, and then the application is responsible for
- * freeing the requested memory.
- */
-
-GLOBAL(void)
-jpeg_mem_dest (j_compress_ptr cinfo,
-	       unsigned char ** outbuffer, unsigned long * outsize)
-{
-  my_mem_dest_ptr dest;
-
-  if (outbuffer == NULL || outsize == NULL)	/* sanity check */
-    ERREXIT(cinfo, JERR_BUFFER_SIZE);
-
-  /* The destination object is made permanent so that multiple JPEG images
-   * can be written to the same buffer without re-executing jpeg_mem_dest.
-   */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
-    cinfo->dest = (struct jpeg_destination_mgr *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(my_mem_destination_mgr));
-  }
-
-  dest = (my_mem_dest_ptr) cinfo->dest;
-  dest->pub.init_destination = init_mem_destination;
-  dest->pub.empty_output_buffer = empty_mem_output_buffer;
-  dest->pub.term_destination = term_mem_destination;
-  dest->outbuffer = outbuffer;
-  dest->outsize = outsize;
-  dest->newbuffer = NULL;
-
-  if (*outbuffer == NULL || *outsize == 0) {
-    /* Allocate initial buffer */
-    dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
-    if (dest->newbuffer == NULL)
-      ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
-    *outsize = OUTPUT_BUF_SIZE;
-  }
-
-  dest->pub.next_output_byte = dest->buffer = *outbuffer;
-  dest->pub.free_in_buffer = dest->bufsize = *outsize;
-}
+/*
+ * jdatadst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains compression data destination routines for the case of
+ * emitting JPEG data to memory or to a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different destination manager.
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
+ * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+extern void * malloc JPP((size_t size));
+extern void free JPP((void *ptr));
+#endif
+
+
+/* Expanded data destination object for stdio output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  FILE * outfile;		/* target stream */
+  JOCTET * buffer;		/* start of buffer */
+} my_destination_mgr;
+
+typedef my_destination_mgr * my_dest_ptr;
+
+#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
+
+
+/* Expanded data destination object for memory output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  unsigned char ** outbuffer;	/* target buffer */
+  unsigned long * outsize;
+  unsigned char * newbuffer;	/* newly allocated buffer */
+  JOCTET * buffer;		/* start of buffer */
+  size_t bufsize;
+} my_mem_destination_mgr;
+
+typedef my_mem_destination_mgr * my_mem_dest_ptr;
+
+
+/*
+ * Initialize destination --- called by jpeg_start_compress
+ * before any data is actually written.
+ */
+
+METHODDEF(void)
+init_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  /* Allocate the output buffer --- it will be released when done with image */
+  dest->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  OUTPUT_BUF_SIZE * SIZEOF(JOCTET));
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+}
+
+METHODDEF(void)
+init_mem_destination (j_compress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Empty the output buffer --- called whenever buffer fills up.
+ *
+ * In typical applications, this should write the entire output buffer
+ * (ignoring the current state of next_output_byte & free_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been dumped.
+ *
+ * In applications that need to be able to suspend compression due to output
+ * overrun, a FALSE return indicates that the buffer cannot be emptied now.
+ * In this situation, the compressor will return to its caller (possibly with
+ * an indication that it has not accepted all the supplied scanlines).  The
+ * application should resume compression after it has made more room in the
+ * output buffer.  Note that there are substantial restrictions on the use of
+ * suspension --- see the documentation.
+ *
+ * When suspending, the compressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_output_byte & free_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point will be regenerated after resumption, so do not
+ * write it out when emptying the buffer externally.
+ */
+
+METHODDEF(boolean)
+empty_output_buffer (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
+      (size_t) OUTPUT_BUF_SIZE)
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+
+  return TRUE;
+}
+
+METHODDEF(boolean)
+empty_mem_output_buffer (j_compress_ptr cinfo)
+{
+  size_t nextsize;
+  JOCTET * nextbuffer;
+  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
+
+  /* Try to allocate new buffer with double size */
+  nextsize = dest->bufsize * 2;
+  nextbuffer = (JOCTET *) malloc(nextsize);
+
+  if (nextbuffer == NULL)
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+
+  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
+
+  if (dest->newbuffer != NULL)
+    free(dest->newbuffer);
+
+  dest->newbuffer = nextbuffer;
+
+  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
+  dest->pub.free_in_buffer = dest->bufsize;
+
+  dest->buffer = nextbuffer;
+  dest->bufsize = nextsize;
+
+  return TRUE;
+}
+
+
+/*
+ * Terminate destination --- called by jpeg_finish_compress
+ * after all data has been written.  Usually needs to flush buffer.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+  size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
+
+  /* Write any data remaining in the buffer */
+  if (datacount > 0) {
+    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
+      ERREXIT(cinfo, JERR_FILE_WRITE);
+  }
+  fflush(dest->outfile);
+  /* Make sure we wrote the output file OK */
+  if (ferror(dest->outfile))
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+}
+
+METHODDEF(void)
+term_mem_destination (j_compress_ptr cinfo)
+{
+  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
+
+  *dest->outbuffer = dest->buffer;
+  *dest->outsize = dest->bufsize - dest->pub.free_in_buffer;
+}
+
+
+/*
+ * Prepare for output to a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing compression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile)
+{
+  my_dest_ptr dest;
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same file without re-executing jpeg_stdio_dest.
+   * This makes it dangerous to use this manager and a different destination
+   * manager serially with the same JPEG object, because their private object
+   * sizes may be different.  Caveat programmer.
+   */
+  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_destination_mgr));
+  }
+
+  dest = (my_dest_ptr) cinfo->dest;
+  dest->pub.init_destination = init_destination;
+  dest->pub.empty_output_buffer = empty_output_buffer;
+  dest->pub.term_destination = term_destination;
+  dest->outfile = outfile;
+}
+
+
+/*
+ * Prepare for output to a memory buffer.
+ * The caller may supply an own initial buffer with appropriate size.
+ * Otherwise, or when the actual data output exceeds the given size,
+ * the library adapts the buffer size as necessary.
+ * The standard library functions malloc/free are used for allocating
+ * larger memory, so the buffer is available to the application after
+ * finishing compression, and then the application is responsible for
+ * freeing the requested memory.
+ * Note:  An initial buffer supplied by the caller is expected to be
+ * managed by the application.  The library does not free such buffer
+ * when allocating a larger buffer.
+ */
+
+GLOBAL(void)
+jpeg_mem_dest (j_compress_ptr cinfo,
+	       unsigned char ** outbuffer, unsigned long * outsize)
+{
+  my_mem_dest_ptr dest;
+
+  if (outbuffer == NULL || outsize == NULL)	/* sanity check */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same buffer without re-executing jpeg_mem_dest.
+   */
+  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_mem_destination_mgr));
+  }
+
+  dest = (my_mem_dest_ptr) cinfo->dest;
+  dest->pub.init_destination = init_mem_destination;
+  dest->pub.empty_output_buffer = empty_mem_output_buffer;
+  dest->pub.term_destination = term_mem_destination;
+  dest->outbuffer = outbuffer;
+  dest->outsize = outsize;
+  dest->newbuffer = NULL;
+
+  if (*outbuffer == NULL || *outsize == 0) {
+    /* Allocate initial buffer */
+    dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
+    if (dest->newbuffer == NULL)
+      ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+    *outsize = OUTPUT_BUF_SIZE;
+  }
+
+  dest->pub.next_output_byte = dest->buffer = *outbuffer;
+  dest->pub.free_in_buffer = dest->bufsize = *outsize;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdatasrc.c b/plugins/AdvaImg/src/LibJPEG/jdatasrc.c
index 7be59a88a1..e8bca76cdb 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdatasrc.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdatasrc.c
@@ -1,275 +1,275 @@
-/*
- * jdatasrc.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains decompression data source routines for the case of
- * reading JPEG data from memory or from a file (or any stdio stream).
- * While these routines are sufficient for most applications,
- * some will want to use a different source manager.
- * IMPORTANT: we assume that fread() will correctly transcribe an array of
- * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
- * than 8 bits on your machine, you may need to do some tweaking.
- */
-
-/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jerror.h"
-
-
-/* Expanded data source object for stdio input */
-
-typedef struct {
-  struct jpeg_source_mgr pub;	/* public fields */
-
-  FILE * infile;		/* source stream */
-  JOCTET * buffer;		/* start of buffer */
-  boolean start_of_file;	/* have we gotten any data yet? */
-} my_source_mgr;
-
-typedef my_source_mgr * my_src_ptr;
-
-#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
-
-
-/*
- * Initialize source --- called by jpeg_read_header
- * before any data is actually read.
- */
-
-METHODDEF(void)
-init_source (j_decompress_ptr cinfo)
-{
-  my_src_ptr src = (my_src_ptr) cinfo->src;
-
-  /* We reset the empty-input-file flag for each image,
-   * but we don't clear the input buffer.
-   * This is correct behavior for reading a series of images from one source.
-   */
-  src->start_of_file = TRUE;
-}
-
-METHODDEF(void)
-init_mem_source (j_decompress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Fill the input buffer --- called whenever buffer is emptied.
- *
- * In typical applications, this should read fresh data into the buffer
- * (ignoring the current state of next_input_byte & bytes_in_buffer),
- * reset the pointer & count to the start of the buffer, and return TRUE
- * indicating that the buffer has been reloaded.  It is not necessary to
- * fill the buffer entirely, only to obtain at least one more byte.
- *
- * There is no such thing as an EOF return.  If the end of the file has been
- * reached, the routine has a choice of ERREXIT() or inserting fake data into
- * the buffer.  In most cases, generating a warning message and inserting a
- * fake EOI marker is the best course of action --- this will allow the
- * decompressor to output however much of the image is there.  However,
- * the resulting error message is misleading if the real problem is an empty
- * input file, so we handle that case specially.
- *
- * In applications that need to be able to suspend compression due to input
- * not being available yet, a FALSE return indicates that no more data can be
- * obtained right now, but more may be forthcoming later.  In this situation,
- * the decompressor will return to its caller (with an indication of the
- * number of scanlines it has read, if any).  The application should resume
- * decompression after it has loaded more data into the input buffer.  Note
- * that there are substantial restrictions on the use of suspension --- see
- * the documentation.
- *
- * When suspending, the decompressor will back up to a convenient restart point
- * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
- * indicate where the restart point will be if the current call returns FALSE.
- * Data beyond this point must be rescanned after resumption, so move it to
- * the front of the buffer rather than discarding it.
- */
-
-METHODDEF(boolean)
-fill_input_buffer (j_decompress_ptr cinfo)
-{
-  my_src_ptr src = (my_src_ptr) cinfo->src;
-  size_t nbytes;
-
-  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
-
-  if (nbytes <= 0) {
-    if (src->start_of_file)	/* Treat empty input file as fatal error */
-      ERREXIT(cinfo, JERR_INPUT_EMPTY);
-    WARNMS(cinfo, JWRN_JPEG_EOF);
-    /* Insert a fake EOI marker */
-    src->buffer[0] = (JOCTET) 0xFF;
-    src->buffer[1] = (JOCTET) JPEG_EOI;
-    nbytes = 2;
-  }
-
-  src->pub.next_input_byte = src->buffer;
-  src->pub.bytes_in_buffer = nbytes;
-  src->start_of_file = FALSE;
-
-  return TRUE;
-}
-
-METHODDEF(boolean)
-fill_mem_input_buffer (j_decompress_ptr cinfo)
-{
-  static const JOCTET mybuffer[4] = {
-    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
-  };
-
-  /* The whole JPEG data is expected to reside in the supplied memory
-   * buffer, so any request for more data beyond the given buffer size
-   * is treated as an error.
-   */
-  WARNMS(cinfo, JWRN_JPEG_EOF);
-
-  /* Insert a fake EOI marker */
-
-  cinfo->src->next_input_byte = mybuffer;
-  cinfo->src->bytes_in_buffer = 2;
-
-  return TRUE;
-}
-
-
-/*
- * Skip data --- used to skip over a potentially large amount of
- * uninteresting data (such as an APPn marker).
- *
- * Writers of suspendable-input applications must note that skip_input_data
- * is not granted the right to give a suspension return.  If the skip extends
- * beyond the data currently in the buffer, the buffer can be marked empty so
- * that the next read will cause a fill_input_buffer call that can suspend.
- * Arranging for additional bytes to be discarded before reloading the input
- * buffer is the application writer's problem.
- */
-
-METHODDEF(void)
-skip_input_data (j_decompress_ptr cinfo, long num_bytes)
-{
-  struct jpeg_source_mgr * src = cinfo->src;
-
-  /* Just a dumb implementation for now.  Could use fseek() except
-   * it doesn't work on pipes.  Not clear that being smart is worth
-   * any trouble anyway --- large skips are infrequent.
-   */
-  if (num_bytes > 0) {
-    while (num_bytes > (long) src->bytes_in_buffer) {
-      num_bytes -= (long) src->bytes_in_buffer;
-      (void) (*src->fill_input_buffer) (cinfo);
-      /* note we assume that fill_input_buffer will never return FALSE,
-       * so suspension need not be handled.
-       */
-    }
-    src->next_input_byte += (size_t) num_bytes;
-    src->bytes_in_buffer -= (size_t) num_bytes;
-  }
-}
-
-
-/*
- * An additional method that can be provided by data source modules is the
- * resync_to_restart method for error recovery in the presence of RST markers.
- * For the moment, this source module just uses the default resync method
- * provided by the JPEG library.  That method assumes that no backtracking
- * is possible.
- */
-
-
-/*
- * Terminate source --- called by jpeg_finish_decompress
- * after all data has been read.  Often a no-op.
- *
- * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
- * application must deal with any cleanup that should happen even
- * for error exit.
- */
-
-METHODDEF(void)
-term_source (j_decompress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Prepare for input from a stdio stream.
- * The caller must have already opened the stream, and is responsible
- * for closing it after finishing decompression.
- */
-
-GLOBAL(void)
-jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
-{
-  my_src_ptr src;
-
-  /* The source object and input buffer are made permanent so that a series
-   * of JPEG images can be read from the same file by calling jpeg_stdio_src
-   * only before the first one.  (If we discarded the buffer at the end of
-   * one image, we'd likely lose the start of the next one.)
-   * This makes it unsafe to use this manager and a different source
-   * manager serially with the same JPEG object.  Caveat programmer.
-   */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
-    cinfo->src = (struct jpeg_source_mgr *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(my_source_mgr));
-    src = (my_src_ptr) cinfo->src;
-    src->buffer = (JOCTET *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  INPUT_BUF_SIZE * SIZEOF(JOCTET));
-  }
-
-  src = (my_src_ptr) cinfo->src;
-  src->pub.init_source = init_source;
-  src->pub.fill_input_buffer = fill_input_buffer;
-  src->pub.skip_input_data = skip_input_data;
-  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
-  src->pub.term_source = term_source;
-  src->infile = infile;
-  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
-  src->pub.next_input_byte = NULL; /* until buffer loaded */
-}
-
-
-/*
- * Prepare for input from a supplied memory buffer.
- * The buffer must contain the whole JPEG data.
- */
-
-GLOBAL(void)
-jpeg_mem_src (j_decompress_ptr cinfo,
-	      unsigned char * inbuffer, unsigned long insize)
-{
-  struct jpeg_source_mgr * src;
-
-  if (inbuffer == NULL || insize == 0)	/* Treat empty input as fatal error */
-    ERREXIT(cinfo, JERR_INPUT_EMPTY);
-
-  /* The source object is made permanent so that a series of JPEG images
-   * can be read from the same buffer by calling jpeg_mem_src only before
-   * the first one.
-   */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
-    cinfo->src = (struct jpeg_source_mgr *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(struct jpeg_source_mgr));
-  }
-
-  src = cinfo->src;
-  src->init_source = init_mem_source;
-  src->fill_input_buffer = fill_mem_input_buffer;
-  src->skip_input_data = skip_input_data;
-  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
-  src->term_source = term_source;
-  src->bytes_in_buffer = (size_t) insize;
-  src->next_input_byte = (JOCTET *) inbuffer;
-}
+/*
+ * jdatasrc.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from memory or from a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data source object for stdio input */
+
+typedef struct {
+  struct jpeg_source_mgr pub;	/* public fields */
+
+  FILE * infile;		/* source stream */
+  JOCTET * buffer;		/* start of buffer */
+  boolean start_of_file;	/* have we gotten any data yet? */
+} my_source_mgr;
+
+typedef my_source_mgr * my_src_ptr;
+
+#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
+
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_source (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+
+  /* We reset the empty-input-file flag for each image,
+   * but we don't clear the input buffer.
+   * This is correct behavior for reading a series of images from one source.
+   */
+  src->start_of_file = TRUE;
+}
+
+METHODDEF(void)
+init_mem_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_input_buffer (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+  size_t nbytes;
+
+  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
+
+  if (nbytes <= 0) {
+    if (src->start_of_file)	/* Treat empty input file as fatal error */
+      ERREXIT(cinfo, JERR_INPUT_EMPTY);
+    WARNMS(cinfo, JWRN_JPEG_EOF);
+    /* Insert a fake EOI marker */
+    src->buffer[0] = (JOCTET) 0xFF;
+    src->buffer[1] = (JOCTET) JPEG_EOI;
+    nbytes = 2;
+  }
+
+  src->pub.next_input_byte = src->buffer;
+  src->pub.bytes_in_buffer = nbytes;
+  src->start_of_file = FALSE;
+
+  return TRUE;
+}
+
+METHODDEF(boolean)
+fill_mem_input_buffer (j_decompress_ptr cinfo)
+{
+  static const JOCTET mybuffer[4] = {
+    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
+  };
+
+  /* The whole JPEG data is expected to reside in the supplied memory
+   * buffer, so any request for more data beyond the given buffer size
+   * is treated as an error.
+   */
+  WARNMS(cinfo, JWRN_JPEG_EOF);
+
+  /* Insert a fake EOI marker */
+
+  cinfo->src->next_input_byte = mybuffer;
+  cinfo->src->bytes_in_buffer = 2;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data (j_decompress_ptr cinfo, long num_bytes)
+{
+  struct jpeg_source_mgr * src = cinfo->src;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    while (num_bytes > (long) src->bytes_in_buffer) {
+      num_bytes -= (long) src->bytes_in_buffer;
+      (void) (*src->fill_input_buffer) (cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->next_input_byte += (size_t) num_bytes;
+    src->bytes_in_buffer -= (size_t) num_bytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing decompression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
+{
+  my_src_ptr src;
+
+  /* The source object and input buffer are made permanent so that a series
+   * of JPEG images can be read from the same file by calling jpeg_stdio_src
+   * only before the first one.  (If we discarded the buffer at the end of
+   * one image, we'd likely lose the start of the next one.)
+   * This makes it unsafe to use this manager and a different source
+   * manager serially with the same JPEG object.  Caveat programmer.
+   */
+  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_source_mgr));
+    src = (my_src_ptr) cinfo->src;
+    src->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  INPUT_BUF_SIZE * SIZEOF(JOCTET));
+  }
+
+  src = (my_src_ptr) cinfo->src;
+  src->pub.init_source = init_source;
+  src->pub.fill_input_buffer = fill_input_buffer;
+  src->pub.skip_input_data = skip_input_data;
+  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->pub.term_source = term_source;
+  src->infile = infile;
+  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
+  src->pub.next_input_byte = NULL; /* until buffer loaded */
+}
+
+
+/*
+ * Prepare for input from a supplied memory buffer.
+ * The buffer must contain the whole JPEG data.
+ */
+
+GLOBAL(void)
+jpeg_mem_src (j_decompress_ptr cinfo,
+	      unsigned char * inbuffer, unsigned long insize)
+{
+  struct jpeg_source_mgr * src;
+
+  if (inbuffer == NULL || insize == 0)	/* Treat empty input as fatal error */
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+
+  /* The source object is made permanent so that a series of JPEG images
+   * can be read from the same buffer by calling jpeg_mem_src only before
+   * the first one.
+   */
+  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(struct jpeg_source_mgr));
+  }
+
+  src = cinfo->src;
+  src->init_source = init_mem_source;
+  src->fill_input_buffer = fill_mem_input_buffer;
+  src->skip_input_data = skip_input_data;
+  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->term_source = term_source;
+  src->bytes_in_buffer = (size_t) insize;
+  src->next_input_byte = (JOCTET *) inbuffer;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdcoefct.c b/plugins/AdvaImg/src/LibJPEG/jdcoefct.c
index ed02fc378f..75ee51f0f2 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdcoefct.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdcoefct.c
@@ -1,741 +1,741 @@
-/*
- * jdcoefct.c
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2002-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the coefficient buffer controller for decompression.
- * This controller is the top level of the JPEG decompressor proper.
- * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
- *
- * In buffered-image mode, this controller is the interface between
- * input-oriented processing and output-oriented processing.
- * Also, the input side (only) is used when reading a file for transcoding.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-/* Block smoothing is only applicable for progressive JPEG, so: */
-#ifndef D_PROGRESSIVE_SUPPORTED
-#undef BLOCK_SMOOTHING_SUPPORTED
-#endif
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_d_coef_controller pub; /* public fields */
-
-  /* These variables keep track of the current location of the input side. */
-  /* cinfo->input_iMCU_row is also used for this. */
-  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
-
-  /* The output side's location is represented by cinfo->output_iMCU_row. */
-
-  /* In single-pass modes, it's sufficient to buffer just one MCU.
-   * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
-   * and let the entropy decoder write into that workspace each time.
-   * (On 80x86, the workspace is FAR even though it's not really very big;
-   * this is to keep the module interfaces unchanged when a large coefficient
-   * buffer is necessary.)
-   * In multi-pass modes, this array points to the current MCU's blocks
-   * within the virtual arrays; it is used only by the input side.
-   */
-  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-  /* In multi-pass modes, we need a virtual block array for each component. */
-  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
-#endif
-
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-  /* When doing block smoothing, we latch coefficient Al values here */
-  int * coef_bits_latch;
-#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
-#endif
-} my_coef_controller;
-
-typedef my_coef_controller * my_coef_ptr;
-
-/* Forward declarations */
-METHODDEF(int) decompress_onepass
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-METHODDEF(int) decompress_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
-#endif
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
-METHODDEF(int) decompress_smooth_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
-#endif
-
-
-LOCAL(void)
-start_iMCU_row (j_decompress_ptr cinfo)
-/* Reset within-iMCU-row counters for a new row (input side) */
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* In an interleaved scan, an MCU row is the same as an iMCU row.
-   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
-   * But at the bottom of the image, process only what's left.
-   */
-  if (cinfo->comps_in_scan > 1) {
-    coef->MCU_rows_per_iMCU_row = 1;
-  } else {
-    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
-    else
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
-  }
-
-  coef->MCU_ctr = 0;
-  coef->MCU_vert_offset = 0;
-}
-
-
-/*
- * Initialize for an input processing pass.
- */
-
-METHODDEF(void)
-start_input_pass (j_decompress_ptr cinfo)
-{
-  cinfo->input_iMCU_row = 0;
-  start_iMCU_row(cinfo);
-}
-
-
-/*
- * Initialize for an output processing pass.
- */
-
-METHODDEF(void)
-start_output_pass (j_decompress_ptr cinfo)
-{
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* If multipass, check to see whether to use block smoothing on this pass */
-  if (coef->pub.coef_arrays != NULL) {
-    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
-      coef->pub.decompress_data = decompress_smooth_data;
-    else
-      coef->pub.decompress_data = decompress_data;
-  }
-#endif
-  cinfo->output_iMCU_row = 0;
-}
-
-
-/*
- * Decompress and return some data in the single-pass case.
- * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
- * Input and output must run in lockstep since we have only a one-MCU buffer.
- * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
- *
- * NB: output_buf contains a plane for each component in image,
- * which we index according to the component's SOF position.
- */
-
-METHODDEF(int)
-decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int blkn, ci, xindex, yindex, yoffset, useful_width;
-  JSAMPARRAY output_ptr;
-  JDIMENSION start_col, output_col;
-  jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
-
-  /* Loop to process as much as one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
-      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
-      if (cinfo->lim_Se)	/* can bypass in DC only case */
-	FMEMZERO((void FAR *) coef->MCU_buffer[0],
-		 (size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK)));
-      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
-      }
-      /* Determine where data should go in output_buf and do the IDCT thing.
-       * We skip dummy blocks at the right and bottom edges (but blkn gets
-       * incremented past them!).  Note the inner loop relies on having
-       * allocated the MCU_buffer[] blocks sequentially.
-       */
-      blkn = 0;			/* index of current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	/* Don't bother to IDCT an uninteresting component. */
-	if (! compptr->component_needed) {
-	  blkn += compptr->MCU_blocks;
-	  continue;
-	}
-	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
-	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						    : compptr->last_col_width;
-	output_ptr = output_buf[compptr->component_index] +
-	  yoffset * compptr->DCT_v_scaled_size;
-	start_col = MCU_col_num * compptr->MCU_sample_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (cinfo->input_iMCU_row < last_iMCU_row ||
-	      yoffset+yindex < compptr->last_row_height) {
-	    output_col = start_col;
-	    for (xindex = 0; xindex < useful_width; xindex++) {
-	      (*inverse_DCT) (cinfo, compptr,
-			      (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
-			      output_ptr, output_col);
-	      output_col += compptr->DCT_h_scaled_size;
-	    }
-	  }
-	  blkn += compptr->MCU_width;
-	  output_ptr += compptr->DCT_v_scaled_size;
-	}
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  cinfo->output_iMCU_row++;
-  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
-    start_iMCU_row(cinfo);
-    return JPEG_ROW_COMPLETED;
-  }
-  /* Completed the scan */
-  (*cinfo->inputctl->finish_input_pass) (cinfo);
-  return JPEG_SCAN_COMPLETED;
-}
-
-
-/*
- * Dummy consume-input routine for single-pass operation.
- */
-
-METHODDEF(int)
-dummy_consume_data (j_decompress_ptr cinfo)
-{
-  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
-}
-
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-
-/*
- * Consume input data and store it in the full-image coefficient buffer.
- * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
- * ie, v_samp_factor block rows for each component in the scan.
- * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
- */
-
-METHODDEF(int)
-consume_data (j_decompress_ptr cinfo)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  int blkn, ci, xindex, yindex, yoffset;
-  JDIMENSION start_col;
-  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
-  JBLOCKROW buffer_ptr;
-  jpeg_component_info *compptr;
-
-  /* Align the virtual buffers for the components used in this scan. */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    buffer[ci] = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       cinfo->input_iMCU_row * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, TRUE);
-    /* Note: entropy decoder expects buffer to be zeroed,
-     * but this is handled automatically by the memory manager
-     * because we requested a pre-zeroed array.
-     */
-  }
-
-  /* Loop to process one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
-	    coef->MCU_buffer[blkn++] = buffer_ptr++;
-	  }
-	}
-      }
-      /* Try to fetch the MCU. */
-      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
-    start_iMCU_row(cinfo);
-    return JPEG_ROW_COMPLETED;
-  }
-  /* Completed the scan */
-  (*cinfo->inputctl->finish_input_pass) (cinfo);
-  return JPEG_SCAN_COMPLETED;
-}
-
-
-/*
- * Decompress and return some data in the multi-pass case.
- * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
- * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
- *
- * NB: output_buf contains a plane for each component in image.
- */
-
-METHODDEF(int)
-decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  JDIMENSION block_num;
-  int ci, block_row, block_rows;
-  JBLOCKARRAY buffer;
-  JBLOCKROW buffer_ptr;
-  JSAMPARRAY output_ptr;
-  JDIMENSION output_col;
-  jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
-
-  /* Force some input to be done if we are getting ahead of the input. */
-  while (cinfo->input_scan_number < cinfo->output_scan_number ||
-	 (cinfo->input_scan_number == cinfo->output_scan_number &&
-	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
-    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
-      return JPEG_SUSPENDED;
-  }
-
-  /* OK, output from the virtual arrays. */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Don't bother to IDCT an uninteresting component. */
-    if (! compptr->component_needed)
-      continue;
-    /* Align the virtual buffer for this component. */
-    buffer = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[ci],
-       cinfo->output_iMCU_row * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, FALSE);
-    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row)
-      block_rows = compptr->v_samp_factor;
-    else {
-      /* NB: can't use last_row_height here; it is input-side-dependent! */
-      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-      if (block_rows == 0) block_rows = compptr->v_samp_factor;
-    }
-    inverse_DCT = cinfo->idct->inverse_DCT[ci];
-    output_ptr = output_buf[ci];
-    /* Loop over all DCT blocks to be processed. */
-    for (block_row = 0; block_row < block_rows; block_row++) {
-      buffer_ptr = buffer[block_row];
-      output_col = 0;
-      for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
-			output_ptr, output_col);
-	buffer_ptr++;
-	output_col += compptr->DCT_h_scaled_size;
-      }
-      output_ptr += compptr->DCT_v_scaled_size;
-    }
-  }
-
-  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
-    return JPEG_ROW_COMPLETED;
-  return JPEG_SCAN_COMPLETED;
-}
-
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-
-
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-
-/*
- * This code applies interblock smoothing as described by section K.8
- * of the JPEG standard: the first 5 AC coefficients are estimated from
- * the DC values of a DCT block and its 8 neighboring blocks.
- * We apply smoothing only for progressive JPEG decoding, and only if
- * the coefficients it can estimate are not yet known to full precision.
- */
-
-/* Natural-order array positions of the first 5 zigzag-order coefficients */
-#define Q01_POS  1
-#define Q10_POS  8
-#define Q20_POS  16
-#define Q11_POS  9
-#define Q02_POS  2
-
-/*
- * Determine whether block smoothing is applicable and safe.
- * We also latch the current states of the coef_bits[] entries for the
- * AC coefficients; otherwise, if the input side of the decompressor
- * advances into a new scan, we might think the coefficients are known
- * more accurately than they really are.
- */
-
-LOCAL(boolean)
-smoothing_ok (j_decompress_ptr cinfo)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  boolean smoothing_useful = FALSE;
-  int ci, coefi;
-  jpeg_component_info *compptr;
-  JQUANT_TBL * qtable;
-  int * coef_bits;
-  int * coef_bits_latch;
-
-  if (! cinfo->progressive_mode || cinfo->coef_bits == NULL)
-    return FALSE;
-
-  /* Allocate latch area if not already done */
-  if (coef->coef_bits_latch == NULL)
-    coef->coef_bits_latch = (int *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  cinfo->num_components *
-				  (SAVED_COEFS * SIZEOF(int)));
-  coef_bits_latch = coef->coef_bits_latch;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* All components' quantization values must already be latched. */
-    if ((qtable = compptr->quant_table) == NULL)
-      return FALSE;
-    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
-    if (qtable->quantval[0] == 0 ||
-	qtable->quantval[Q01_POS] == 0 ||
-	qtable->quantval[Q10_POS] == 0 ||
-	qtable->quantval[Q20_POS] == 0 ||
-	qtable->quantval[Q11_POS] == 0 ||
-	qtable->quantval[Q02_POS] == 0)
-      return FALSE;
-    /* DC values must be at least partly known for all components. */
-    coef_bits = cinfo->coef_bits[ci];
-    if (coef_bits[0] < 0)
-      return FALSE;
-    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
-    for (coefi = 1; coefi <= 5; coefi++) {
-      coef_bits_latch[coefi] = coef_bits[coefi];
-      if (coef_bits[coefi] != 0)
-	smoothing_useful = TRUE;
-    }
-    coef_bits_latch += SAVED_COEFS;
-  }
-
-  return smoothing_useful;
-}
-
-
-/*
- * Variant of decompress_data for use when doing block smoothing.
- */
-
-METHODDEF(int)
-decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  JDIMENSION block_num, last_block_column;
-  int ci, block_row, block_rows, access_rows;
-  JBLOCKARRAY buffer;
-  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
-  JSAMPARRAY output_ptr;
-  JDIMENSION output_col;
-  jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
-  boolean first_row, last_row;
-  JBLOCK workspace;
-  int *coef_bits;
-  JQUANT_TBL *quanttbl;
-  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
-  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
-  int Al, pred;
-
-  /* Force some input to be done if we are getting ahead of the input. */
-  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
-    if (cinfo->input_scan_number == cinfo->output_scan_number) {
-      /* If input is working on current scan, we ordinarily want it to
-       * have completed the current row.  But if input scan is DC,
-       * we want it to keep one row ahead so that next block row's DC
-       * values are up to date.
-       */
-      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
-      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
-	break;
-    }
-    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
-      return JPEG_SUSPENDED;
-  }
-
-  /* OK, output from the virtual arrays. */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Don't bother to IDCT an uninteresting component. */
-    if (! compptr->component_needed)
-      continue;
-    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row) {
-      block_rows = compptr->v_samp_factor;
-      access_rows = block_rows * 2; /* this and next iMCU row */
-      last_row = FALSE;
-    } else {
-      /* NB: can't use last_row_height here; it is input-side-dependent! */
-      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-      if (block_rows == 0) block_rows = compptr->v_samp_factor;
-      access_rows = block_rows; /* this iMCU row only */
-      last_row = TRUE;
-    }
-    /* Align the virtual buffer for this component. */
-    if (cinfo->output_iMCU_row > 0) {
-      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
-      buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
-	 (JDIMENSION) access_rows, FALSE);
-      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
-      first_row = FALSE;
-    } else {
-      buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
-      first_row = TRUE;
-    }
-    /* Fetch component-dependent info */
-    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
-    quanttbl = compptr->quant_table;
-    Q00 = quanttbl->quantval[0];
-    Q01 = quanttbl->quantval[Q01_POS];
-    Q10 = quanttbl->quantval[Q10_POS];
-    Q20 = quanttbl->quantval[Q20_POS];
-    Q11 = quanttbl->quantval[Q11_POS];
-    Q02 = quanttbl->quantval[Q02_POS];
-    inverse_DCT = cinfo->idct->inverse_DCT[ci];
-    output_ptr = output_buf[ci];
-    /* Loop over all DCT blocks to be processed. */
-    for (block_row = 0; block_row < block_rows; block_row++) {
-      buffer_ptr = buffer[block_row];
-      if (first_row && block_row == 0)
-	prev_block_row = buffer_ptr;
-      else
-	prev_block_row = buffer[block_row-1];
-      if (last_row && block_row == block_rows-1)
-	next_block_row = buffer_ptr;
-      else
-	next_block_row = buffer[block_row+1];
-      /* We fetch the surrounding DC values using a sliding-register approach.
-       * Initialize all nine here so as to do the right thing on narrow pics.
-       */
-      DC1 = DC2 = DC3 = (int) prev_block_row[0][0];
-      DC4 = DC5 = DC6 = (int) buffer_ptr[0][0];
-      DC7 = DC8 = DC9 = (int) next_block_row[0][0];
-      output_col = 0;
-      last_block_column = compptr->width_in_blocks - 1;
-      for (block_num = 0; block_num <= last_block_column; block_num++) {
-	/* Fetch current DCT block into workspace so we can modify it. */
-	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
-	/* Update DC values */
-	if (block_num < last_block_column) {
-	  DC3 = (int) prev_block_row[1][0];
-	  DC6 = (int) buffer_ptr[1][0];
-	  DC9 = (int) next_block_row[1][0];
-	}
-	/* Compute coefficient estimates per K.8.
-	 * An estimate is applied only if coefficient is still zero,
-	 * and is not known to be fully accurate.
-	 */
-	/* AC01 */
-	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
-	  num = 36 * Q00 * (DC4 - DC6);
-	  if (num >= 0) {
-	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[1] = (JCOEF) pred;
-	}
-	/* AC10 */
-	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
-	  num = 36 * Q00 * (DC2 - DC8);
-	  if (num >= 0) {
-	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[8] = (JCOEF) pred;
-	}
-	/* AC20 */
-	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
-	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[16] = (JCOEF) pred;
-	}
-	/* AC11 */
-	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
-	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
-	  if (num >= 0) {
-	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[9] = (JCOEF) pred;
-	}
-	/* AC02 */
-	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
-	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[2] = (JCOEF) pred;
-	}
-	/* OK, do the IDCT */
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
-			output_ptr, output_col);
-	/* Advance for next column */
-	DC1 = DC2; DC2 = DC3;
-	DC4 = DC5; DC5 = DC6;
-	DC7 = DC8; DC8 = DC9;
-	buffer_ptr++, prev_block_row++, next_block_row++;
-	output_col += compptr->DCT_h_scaled_size;
-      }
-      output_ptr += compptr->DCT_v_scaled_size;
-    }
-  }
-
-  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
-    return JPEG_ROW_COMPLETED;
-  return JPEG_SCAN_COMPLETED;
-}
-
-#endif /* BLOCK_SMOOTHING_SUPPORTED */
-
-
-/*
- * Initialize coefficient buffer controller.
- */
-
-GLOBAL(void)
-jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
-{
-  my_coef_ptr coef;
-
-  coef = (my_coef_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_coef_controller));
-  cinfo->coef = (struct jpeg_d_coef_controller *) coef;
-  coef->pub.start_input_pass = start_input_pass;
-  coef->pub.start_output_pass = start_output_pass;
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-  coef->coef_bits_latch = NULL;
-#endif
-
-  /* Create the coefficient buffer. */
-  if (need_full_buffer) {
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-    /* Allocate a full-image virtual array for each component, */
-    /* padded to a multiple of samp_factor DCT blocks in each direction. */
-    /* Note we ask for a pre-zeroed array. */
-    int ci, access_rows;
-    jpeg_component_info *compptr;
-
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      access_rows = compptr->v_samp_factor;
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-      /* If block smoothing could be used, need a bigger window */
-      if (cinfo->progressive_mode)
-	access_rows *= 3;
-#endif
-      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) access_rows);
-    }
-    coef->pub.consume_data = consume_data;
-    coef->pub.decompress_data = decompress_data;
-    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    /* We only need a single-MCU buffer. */
-    JBLOCKROW buffer;
-    int i;
-
-    buffer = (JBLOCKROW)
-      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
-    for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
-      coef->MCU_buffer[i] = buffer + i;
-    }
-    if (cinfo->lim_Se == 0)	/* DC only case: want to bypass later */
-      FMEMZERO((void FAR *) buffer,
-	       (size_t) (D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)));
-    coef->pub.consume_data = dummy_consume_data;
-    coef->pub.decompress_data = decompress_onepass;
-    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
-  }
-}
+/*
+ * jdcoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2002-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for decompression.
+ * This controller is the top level of the JPEG decompressor proper.
+ * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
+ *
+ * In buffered-image mode, this controller is the interface between
+ * input-oriented processing and output-oriented processing.
+ * Also, the input side (only) is used when reading a file for transcoding.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* Block smoothing is only applicable for progressive JPEG, so: */
+#ifndef D_PROGRESSIVE_SUPPORTED
+#undef BLOCK_SMOOTHING_SUPPORTED
+#endif
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_coef_controller pub; /* public fields */
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  /* In single-pass modes, it's sufficient to buffer just one MCU.
+   * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
+   * and let the entropy decoder write into that workspace each time.
+   * (On 80x86, the workspace is FAR even though it's not really very big;
+   * this is to keep the module interfaces unchanged when a large coefficient
+   * buffer is necessary.)
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays; it is used only by the input side.
+   */
+  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+#endif
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  /* When doing block smoothing, we latch coefficient Al values here */
+  int * coef_bits_latch;
+#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
+#endif
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+/* Forward declarations */
+METHODDEF(int) decompress_onepass
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+METHODDEF(int) decompress_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
+METHODDEF(int) decompress_smooth_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_decompress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row (input side) */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->MCU_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for an input processing pass.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->input_iMCU_row = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Initialize for an output processing pass.
+ */
+
+METHODDEF(void)
+start_output_pass (j_decompress_ptr cinfo)
+{
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* If multipass, check to see whether to use block smoothing on this pass */
+  if (coef->pub.coef_arrays != NULL) {
+    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
+      coef->pub.decompress_data = decompress_smooth_data;
+    else
+      coef->pub.decompress_data = decompress_data;
+  }
+#endif
+  cinfo->output_iMCU_row = 0;
+}
+
+
+/*
+ * Decompress and return some data in the single-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Input and output must run in lockstep since we have only a one-MCU buffer.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(int)
+decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, useful_width;
+  JSAMPARRAY output_ptr;
+  JDIMENSION start_col, output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Loop to process as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
+      if (cinfo->lim_Se)	/* can bypass in DC only case */
+	FMEMZERO((void FAR *) coef->MCU_buffer[0],
+		 (size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK)));
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+      /* Determine where data should go in output_buf and do the IDCT thing.
+       * We skip dummy blocks at the right and bottom edges (but blkn gets
+       * incremented past them!).  Note the inner loop relies on having
+       * allocated the MCU_buffer[] blocks sequentially.
+       */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	/* Don't bother to IDCT an uninteresting component. */
+	if (! compptr->component_needed) {
+	  blkn += compptr->MCU_blocks;
+	  continue;
+	}
+	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						    : compptr->last_col_width;
+	output_ptr = output_buf[compptr->component_index] +
+	  yoffset * compptr->DCT_v_scaled_size;
+	start_col = MCU_col_num * compptr->MCU_sample_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (cinfo->input_iMCU_row < last_iMCU_row ||
+	      yoffset+yindex < compptr->last_row_height) {
+	    output_col = start_col;
+	    for (xindex = 0; xindex < useful_width; xindex++) {
+	      (*inverse_DCT) (cinfo, compptr,
+			      (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
+			      output_ptr, output_col);
+	      output_col += compptr->DCT_h_scaled_size;
+	    }
+	  }
+	  blkn += compptr->MCU_width;
+	  output_ptr += compptr->DCT_v_scaled_size;
+	}
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  cinfo->output_iMCU_row++;
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Dummy consume-input routine for single-pass operation.
+ */
+
+METHODDEF(int)
+dummy_consume_data (j_decompress_ptr cinfo)
+{
+  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Consume input data and store it in the full-image coefficient buffer.
+ * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
+ * ie, v_samp_factor block rows for each component in the scan.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ */
+
+METHODDEF(int)
+consume_data (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       cinfo->input_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Note: entropy decoder expects buffer to be zeroed,
+     * but this is handled automatically by the memory manager
+     * because we requested a pre-zeroed array.
+     */
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+	    coef->MCU_buffer[blkn++] = buffer_ptr++;
+	  }
+	}
+      }
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Decompress and return some data in the multi-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image.
+ */
+
+METHODDEF(int)
+decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num;
+  int ci, block_row, block_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number < cinfo->output_scan_number ||
+	 (cinfo->input_scan_number == cinfo->output_scan_number &&
+	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       cinfo->output_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      output_col = 0;
+      for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
+			output_ptr, output_col);
+	buffer_ptr++;
+	output_col += compptr->DCT_h_scaled_size;
+      }
+      output_ptr += compptr->DCT_v_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+
+/*
+ * This code applies interblock smoothing as described by section K.8
+ * of the JPEG standard: the first 5 AC coefficients are estimated from
+ * the DC values of a DCT block and its 8 neighboring blocks.
+ * We apply smoothing only for progressive JPEG decoding, and only if
+ * the coefficients it can estimate are not yet known to full precision.
+ */
+
+/* Natural-order array positions of the first 5 zigzag-order coefficients */
+#define Q01_POS  1
+#define Q10_POS  8
+#define Q20_POS  16
+#define Q11_POS  9
+#define Q02_POS  2
+
+/*
+ * Determine whether block smoothing is applicable and safe.
+ * We also latch the current states of the coef_bits[] entries for the
+ * AC coefficients; otherwise, if the input side of the decompressor
+ * advances into a new scan, we might think the coefficients are known
+ * more accurately than they really are.
+ */
+
+LOCAL(boolean)
+smoothing_ok (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  boolean smoothing_useful = FALSE;
+  int ci, coefi;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtable;
+  int * coef_bits;
+  int * coef_bits_latch;
+
+  if (! cinfo->progressive_mode || cinfo->coef_bits == NULL)
+    return FALSE;
+
+  /* Allocate latch area if not already done */
+  if (coef->coef_bits_latch == NULL)
+    coef->coef_bits_latch = (int *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  cinfo->num_components *
+				  (SAVED_COEFS * SIZEOF(int)));
+  coef_bits_latch = coef->coef_bits_latch;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* All components' quantization values must already be latched. */
+    if ((qtable = compptr->quant_table) == NULL)
+      return FALSE;
+    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
+    if (qtable->quantval[0] == 0 ||
+	qtable->quantval[Q01_POS] == 0 ||
+	qtable->quantval[Q10_POS] == 0 ||
+	qtable->quantval[Q20_POS] == 0 ||
+	qtable->quantval[Q11_POS] == 0 ||
+	qtable->quantval[Q02_POS] == 0)
+      return FALSE;
+    /* DC values must be at least partly known for all components. */
+    coef_bits = cinfo->coef_bits[ci];
+    if (coef_bits[0] < 0)
+      return FALSE;
+    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
+    for (coefi = 1; coefi <= 5; coefi++) {
+      coef_bits_latch[coefi] = coef_bits[coefi];
+      if (coef_bits[coefi] != 0)
+	smoothing_useful = TRUE;
+    }
+    coef_bits_latch += SAVED_COEFS;
+  }
+
+  return smoothing_useful;
+}
+
+
+/*
+ * Variant of decompress_data for use when doing block smoothing.
+ */
+
+METHODDEF(int)
+decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num, last_block_column;
+  int ci, block_row, block_rows, access_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+  boolean first_row, last_row;
+  JBLOCK workspace;
+  int *coef_bits;
+  JQUANT_TBL *quanttbl;
+  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
+  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
+  int Al, pred;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if (cinfo->input_scan_number == cinfo->output_scan_number) {
+      /* If input is working on current scan, we ordinarily want it to
+       * have completed the current row.  But if input scan is DC,
+       * we want it to keep one row ahead so that next block row's DC
+       * values are up to date.
+       */
+      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
+      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
+	break;
+    }
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row) {
+      block_rows = compptr->v_samp_factor;
+      access_rows = block_rows * 2; /* this and next iMCU row */
+      last_row = FALSE;
+    } else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+      access_rows = block_rows; /* this iMCU row only */
+      last_row = TRUE;
+    }
+    /* Align the virtual buffer for this component. */
+    if (cinfo->output_iMCU_row > 0) {
+      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
+	 (JDIMENSION) access_rows, FALSE);
+      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
+      first_row = FALSE;
+    } else {
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
+      first_row = TRUE;
+    }
+    /* Fetch component-dependent info */
+    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+    quanttbl = compptr->quant_table;
+    Q00 = quanttbl->quantval[0];
+    Q01 = quanttbl->quantval[Q01_POS];
+    Q10 = quanttbl->quantval[Q10_POS];
+    Q20 = quanttbl->quantval[Q20_POS];
+    Q11 = quanttbl->quantval[Q11_POS];
+    Q02 = quanttbl->quantval[Q02_POS];
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      if (first_row && block_row == 0)
+	prev_block_row = buffer_ptr;
+      else
+	prev_block_row = buffer[block_row-1];
+      if (last_row && block_row == block_rows-1)
+	next_block_row = buffer_ptr;
+      else
+	next_block_row = buffer[block_row+1];
+      /* We fetch the surrounding DC values using a sliding-register approach.
+       * Initialize all nine here so as to do the right thing on narrow pics.
+       */
+      DC1 = DC2 = DC3 = (int) prev_block_row[0][0];
+      DC4 = DC5 = DC6 = (int) buffer_ptr[0][0];
+      DC7 = DC8 = DC9 = (int) next_block_row[0][0];
+      output_col = 0;
+      last_block_column = compptr->width_in_blocks - 1;
+      for (block_num = 0; block_num <= last_block_column; block_num++) {
+	/* Fetch current DCT block into workspace so we can modify it. */
+	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
+	/* Update DC values */
+	if (block_num < last_block_column) {
+	  DC3 = (int) prev_block_row[1][0];
+	  DC6 = (int) buffer_ptr[1][0];
+	  DC9 = (int) next_block_row[1][0];
+	}
+	/* Compute coefficient estimates per K.8.
+	 * An estimate is applied only if coefficient is still zero,
+	 * and is not known to be fully accurate.
+	 */
+	/* AC01 */
+	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
+	  num = 36 * Q00 * (DC4 - DC6);
+	  if (num >= 0) {
+	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[1] = (JCOEF) pred;
+	}
+	/* AC10 */
+	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
+	  num = 36 * Q00 * (DC2 - DC8);
+	  if (num >= 0) {
+	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[8] = (JCOEF) pred;
+	}
+	/* AC20 */
+	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
+	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[16] = (JCOEF) pred;
+	}
+	/* AC11 */
+	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
+	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+	  if (num >= 0) {
+	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[9] = (JCOEF) pred;
+	}
+	/* AC02 */
+	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
+	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[2] = (JCOEF) pred;
+	}
+	/* OK, do the IDCT */
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
+			output_ptr, output_col);
+	/* Advance for next column */
+	DC1 = DC2; DC2 = DC3;
+	DC4 = DC5; DC5 = DC6;
+	DC7 = DC8; DC8 = DC9;
+	buffer_ptr++, prev_block_row++, next_block_row++;
+	output_col += compptr->DCT_h_scaled_size;
+      }
+      output_ptr += compptr->DCT_v_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* BLOCK_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_d_coef_controller *) coef;
+  coef->pub.start_input_pass = start_input_pass;
+  coef->pub.start_output_pass = start_output_pass;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  coef->coef_bits_latch = NULL;
+#endif
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    /* Note we ask for a pre-zeroed array. */
+    int ci, access_rows;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      access_rows = compptr->v_samp_factor;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+      /* If block smoothing could be used, need a bigger window */
+      if (cinfo->progressive_mode)
+	access_rows *= 3;
+#endif
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) access_rows);
+    }
+    coef->pub.consume_data = consume_data;
+    coef->pub.decompress_data = decompress_data;
+    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+    for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    if (cinfo->lim_Se == 0)	/* DC only case: want to bypass later */
+      FMEMZERO((void FAR *) buffer,
+	       (size_t) (D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK)));
+    coef->pub.consume_data = dummy_consume_data;
+    coef->pub.decompress_data = decompress_onepass;
+    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdcolor.c b/plugins/AdvaImg/src/LibJPEG/jdcolor.c
index 83e4d069ab..fe3f1d8d1f 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdcolor.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdcolor.c
@@ -1,512 +1,618 @@
-/*
- * jdcolor.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains output colorspace conversion routines.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_color_deconverter pub; /* public fields */
-
-  /* Private state for YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
-
-  /* Private state for RGB->Y conversion */
-  INT32 * rgb_y_tab;		/* => table for RGB to Y conversion */
-} my_color_deconverter;
-
-typedef my_color_deconverter * my_cconvert_ptr;
-
-
-/**************** YCbCr -> RGB conversion: most common case **************/
-/****************   RGB -> Y   conversion: less common case **************/
-
-/*
- * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * The conversion equations to be implemented are therefore
- *
- *	R = Y                + 1.40200 * Cr
- *	G = Y - 0.34414 * Cb - 0.71414 * Cr
- *	B = Y + 1.77200 * Cb
- *
- *	Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
- *
- * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
- * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
- *
- * To avoid floating-point arithmetic, we represent the fractional constants
- * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
- * the products by 2^16, with appropriate rounding, to get the correct answer.
- * Notice that Y, being an integral input, does not contribute any fraction
- * so it need not participate in the rounding.
- *
- * For even more speed, we avoid doing any multiplications in the inner loop
- * by precalculating the constants times Cb and Cr for all possible values.
- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 12-bit samples it is still acceptable.  It's not very reasonable for
- * 16-bit samples, but if you want lossless storage you shouldn't be changing
- * colorspace anyway.
- * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
- * values for the G calculation are left scaled up, since we must add them
- * together before rounding.
- */
-
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
-
-/* We allocate one big table for RGB->Y conversion and divide it up into
- * three parts, instead of doing three alloc_small requests.  This lets us
- * use a single table base address, which can be held in a register in the
- * inner loops on many machines (more than can hold all three addresses,
- * anyway).
- */
-
-#define R_Y_OFF		0			/* offset to R => Y section */
-#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
-#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
-#define TABLE_SIZE	(3*(MAXJSAMPLE+1))
-
-
-/*
- * Initialize tables for YCC->RGB colorspace conversion.
- */
-
-LOCAL(void)
-build_ycc_rgb_table (j_decompress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  cconvert->Cr_r_tab = (int *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cb_b_tab = (int *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cr_g_tab = (INT32 *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
-  cconvert->Cb_g_tab = (INT32 *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.40200 * x */
-    cconvert->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.77200 * x */
-    cconvert->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.71414 * x */
-    cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
-    /* Cb=>G value is scaled-up -0.34414 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
-  }
-}
-
-
-/*
- * Convert some rows of samples to the output colorspace.
- *
- * Note that we change from noninterleaved, one-plane-per-component format
- * to interleaved-pixel format.  The output buffer is therefore three times
- * as wide as the input buffer.
- * A starting row offset is provided only for the input buffer.  The caller
- * can easily adjust the passed output_buf value to accommodate any row
- * offset required on that side.
- */
-
-METHODDEF(void)
-ycc_rgb_convert (j_decompress_ptr cinfo,
-		 JSAMPIMAGE input_buf, JDIMENSION input_row,
-		 JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  register int * Crrtab = cconvert->Cr_r_tab;
-  register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
-  SHIFT_TEMPS
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses. */
-      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
-      outptr[RGB_GREEN] = range_limit[y +
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS))];
-      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/**************** Cases other than YCbCr -> RGB **************/
-
-
-/*
- * Initialize for RGB->grayscale colorspace conversion.
- */
-
-LOCAL(void)
-build_rgb_y_table (j_decompress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  INT32 * rgb_y_tab;
-  INT32 i;
-
-  /* Allocate and fill in the conversion tables. */
-  cconvert->rgb_y_tab = rgb_y_tab = (INT32 *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(TABLE_SIZE * SIZEOF(INT32)));
-
-  for (i = 0; i <= MAXJSAMPLE; i++) {
-    rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i;
-    rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i;
-    rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
-  }
-}
-
-
-/*
- * Convert RGB to grayscale.
- */
-
-METHODDEF(void)
-rgb_gray_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_y_tab;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr0[col]);
-      g = GETJSAMPLE(inptr1[col]);
-      b = GETJSAMPLE(inptr2[col]);
-      /* Y */
-      outptr[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * No colorspace change, but conversion from separate-planes
- * to interleaved representation.
- */
-
-METHODDEF(void)
-rgb_convert (j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION input_row,
-	     JSAMPARRAY output_buf, int num_rows)
-{
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr[RGB_RED]   = inptr0[col];
-      outptr[RGB_GREEN] = inptr1[col];
-      outptr[RGB_BLUE]  = inptr2[col];
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * Color conversion for no colorspace change: just copy the data,
- * converting from separate-planes to interleaved representation.
- */
-
-METHODDEF(void)
-null_convert (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION input_row,
-	      JSAMPARRAY output_buf, int num_rows)
-{
-  register JSAMPROW inptr, outptr;
-  register JDIMENSION count;
-  register int num_components = cinfo->num_components;
-  JDIMENSION num_cols = cinfo->output_width;
-  int ci;
-
-  while (--num_rows >= 0) {
-    for (ci = 0; ci < num_components; ci++) {
-      inptr = input_buf[ci][input_row];
-      outptr = output_buf[0] + ci;
-      for (count = num_cols; count > 0; count--) {
-	*outptr = *inptr++;	/* needn't bother with GETJSAMPLE() here */
-	outptr += num_components;
-      }
-    }
-    input_row++;
-    output_buf++;
-  }
-}
-
-
-/*
- * Color conversion for grayscale: just copy the data.
- * This also works for YCbCr -> grayscale conversion, in which
- * we just copy the Y (luminance) component and ignore chrominance.
- */
-
-METHODDEF(void)
-grayscale_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
-{
-  jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
-		    num_rows, cinfo->output_width);
-}
-
-
-/*
- * Convert grayscale to RGB: just duplicate the graylevel three times.
- * This is provided to support applications that don't want to cope
- * with grayscale as a separate case.
- */
-
-METHODDEF(void)
-gray_rgb_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  register JSAMPROW inptr, outptr;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr = input_buf[0][input_row++];
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * Adobe-style YCCK->CMYK conversion.
- * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
- * conversion as above, while passing K (black) unchanged.
- * We assume build_ycc_rgb_table has been called.
- */
-
-METHODDEF(void)
-ycck_cmyk_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  register int * Crrtab = cconvert->Cr_r_tab;
-  register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
-  SHIFT_TEMPS
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    inptr3 = input_buf[3][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses. */
-      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
-      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS)))];
-      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
-      /* K passes through unchanged */
-      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
-      outptr += 4;
-    }
-  }
-}
-
-
-/*
- * Empty method for start_pass.
- */
-
-METHODDEF(void)
-start_pass_dcolor (j_decompress_ptr cinfo)
-{
-  /* no work needed */
-}
-
-
-/*
- * Module initialization routine for output colorspace conversion.
- */
-
-GLOBAL(void)
-jinit_color_deconverter (j_decompress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert;
-  int ci;
-
-  cconvert = (my_cconvert_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_color_deconverter));
-  cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert;
-  cconvert->pub.start_pass = start_pass_dcolor;
-
-  /* Make sure num_components agrees with jpeg_color_space */
-  switch (cinfo->jpeg_color_space) {
-  case JCS_GRAYSCALE:
-    if (cinfo->num_components != 1)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-
-  case JCS_RGB:
-  case JCS_YCbCr:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-
-  case JCS_CMYK:
-  case JCS_YCCK:
-    if (cinfo->num_components != 4)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-
-  default:			/* JCS_UNKNOWN can be anything */
-    if (cinfo->num_components < 1)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-  }
-
-  /* Set out_color_components and conversion method based on requested space.
-   * Also clear the component_needed flags for any unused components,
-   * so that earlier pipeline stages can avoid useless computation.
-   */
-
-  switch (cinfo->out_color_space) {
-  case JCS_GRAYSCALE:
-    cinfo->out_color_components = 1;
-    if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
-	cinfo->jpeg_color_space == JCS_YCbCr) {
-      cconvert->pub.color_convert = grayscale_convert;
-      /* For color->grayscale conversion, only the Y (0) component is needed */
-      for (ci = 1; ci < cinfo->num_components; ci++)
-	cinfo->comp_info[ci].component_needed = FALSE;
-    } else if (cinfo->jpeg_color_space == JCS_RGB) {
-      cconvert->pub.color_convert = rgb_gray_convert;
-      build_rgb_y_table(cinfo);
-    } else
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    break;
-
-  case JCS_RGB:
-    cinfo->out_color_components = RGB_PIXELSIZE;
-    if (cinfo->jpeg_color_space == JCS_YCbCr) {
-      cconvert->pub.color_convert = ycc_rgb_convert;
-      build_ycc_rgb_table(cinfo);
-    } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
-      cconvert->pub.color_convert = gray_rgb_convert;
-    } else if (cinfo->jpeg_color_space == JCS_RGB) {
-      cconvert->pub.color_convert = rgb_convert;
-    } else
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    break;
-
-  case JCS_CMYK:
-    cinfo->out_color_components = 4;
-    if (cinfo->jpeg_color_space == JCS_YCCK) {
-      cconvert->pub.color_convert = ycck_cmyk_convert;
-      build_ycc_rgb_table(cinfo);
-    } else if (cinfo->jpeg_color_space == JCS_CMYK) {
-      cconvert->pub.color_convert = null_convert;
-    } else
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    break;
-
-  default:
-    /* Permit null conversion to same output space */
-    if (cinfo->out_color_space == cinfo->jpeg_color_space) {
-      cinfo->out_color_components = cinfo->num_components;
-      cconvert->pub.color_convert = null_convert;
-    } else			/* unsupported non-null conversion */
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    break;
-  }
-
-  if (cinfo->quantize_colors)
-    cinfo->output_components = 1; /* single colormapped output component */
-  else
-    cinfo->output_components = cinfo->out_color_components;
-}
+/*
+ * jdcolor.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2011-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_deconverter pub; /* public fields */
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+
+  /* Private state for RGB->Y conversion */
+  INT32 * rgb_y_tab;		/* => table for RGB to Y conversion */
+} my_color_deconverter;
+
+typedef my_color_deconverter * my_cconvert_ptr;
+
+
+/**************** YCbCr -> RGB conversion: most common case **************/
+/****************   RGB -> Y   conversion: less common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *
+ *	R = Y                + 1.40200 * Cr
+ *	G = Y - 0.34414 * Cb - 0.71414 * Cr
+ *	B = Y + 1.77200 * Cb
+ *
+ *	Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ *
+ * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ * Notice that Y, being an integral input, does not contribute any fraction
+ * so it need not participate in the rounding.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times Cb and Cr for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
+ * values for the G calculation are left scaled up, since we must add them
+ * together before rounding.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+/* We allocate one big table for RGB->Y conversion and divide it up into
+ * three parts, instead of doing three alloc_small requests.  This lets us
+ * use a single table base address, which can be held in a register in the
+ * inner loops on many machines (more than can hold all three addresses,
+ * anyway).
+ */
+
+#define R_Y_OFF		0			/* offset to R => Y section */
+#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
+#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
+#define TABLE_SIZE	(3*(MAXJSAMPLE+1))
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    cconvert->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    cconvert->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ *
+ * Note that we change from noninterleaved, one-plane-per-component format
+ * to interleaved-pixel format.  The output buffer is therefore three times
+ * as wide as the input buffer.
+ * A starting row offset is provided only for the input buffer.  The caller
+ * can easily adjust the passed output_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+ycc_rgb_convert (j_decompress_ptr cinfo,
+		 JSAMPIMAGE input_buf, JDIMENSION input_row,
+		 JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS))];
+      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/**************** Cases other than YCbCr -> RGB **************/
+
+
+/*
+ * Initialize for RGB->grayscale colorspace conversion.
+ */
+
+LOCAL(void)
+build_rgb_y_table (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  INT32 * rgb_y_tab;
+  INT32 i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_y_tab = rgb_y_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(TABLE_SIZE * SIZEOF(INT32)));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i;
+    rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i;
+    rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert RGB to grayscale.
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 * ctab = cconvert->rgb_y_tab;
+  register int r, g, b;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Y */
+      outptr[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * [R-G,G,B-G] to [R,G,B] conversion with modulo calculation
+ * (inverse color transform).
+ */
+
+METHODDEF(void)
+rgb1_rgb_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  register int r, g, b;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
+       * (modulo) operator is equivalent to the bitmask operator AND.
+       */
+      outptr[RGB_RED]   = (JSAMPLE) ((r + g - CENTERJSAMPLE) & MAXJSAMPLE);
+      outptr[RGB_GREEN] = (JSAMPLE) g;
+      outptr[RGB_BLUE]  = (JSAMPLE) ((b + g - CENTERJSAMPLE) & MAXJSAMPLE);
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * [R-G,G,B-G] to grayscale conversion with modulo calculation
+ * (inverse color transform).
+ */
+
+METHODDEF(void)
+rgb1_gray_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 * ctab = cconvert->rgb_y_tab;
+  register int r, g, b;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
+       * (modulo) operator is equivalent to the bitmask operator AND.
+       */
+      r = (r + g - CENTERJSAMPLE) & MAXJSAMPLE;
+      b = (b + g - CENTERJSAMPLE) & MAXJSAMPLE;
+      /* Y */
+      outptr[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * No colorspace change, but conversion from separate-planes
+ * to interleaved representation.
+ */
+
+METHODDEF(void)
+rgb_convert (j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION input_row,
+	     JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED]   = inptr0[col];
+      outptr[RGB_GREEN] = inptr1[col];
+      outptr[RGB_BLUE]  = inptr2[col];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Color conversion for no colorspace change: just copy the data,
+ * converting from separate-planes to interleaved representation.
+ */
+
+METHODDEF(void)
+null_convert (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION input_row,
+	      JSAMPARRAY output_buf, int num_rows)
+{
+  int ci;
+  register int nc = cinfo->num_components;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    for (ci = 0; ci < nc; ci++) {
+      inptr = input_buf[ci][input_row];
+      outptr = output_buf[0] + ci;
+      for (col = 0; col < num_cols; col++) {
+	*outptr = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+	outptr += nc;
+      }
+    }
+    input_row++;
+    output_buf++;
+  }
+}
+
+
+/*
+ * Color conversion for grayscale: just copy the data.
+ * This also works for YCbCr -> grayscale conversion, in which
+ * we just copy the Y (luminance) component and ignore chrominance.
+ */
+
+METHODDEF(void)
+grayscale_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
+		    num_rows, cinfo->output_width);
+}
+
+
+/*
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
+ * This is provided to support applications that don't want to cope
+ * with grayscale as a separate case.
+ */
+
+METHODDEF(void)
+gray_rgb_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Adobe-style YCCK->CMYK conversion.
+ * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume build_ycc_rgb_table has been called.
+ */
+
+METHODDEF(void)
+ycck_cmyk_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    inptr3 = input_buf[3][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
+      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS)))];
+      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
+      /* K passes through unchanged */
+      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
+      outptr += 4;
+    }
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+start_pass_dcolor (j_decompress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for output colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_deconverter (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+  int ci;
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_color_deconverter));
+  cinfo->cconvert = &cconvert->pub;
+  cconvert->pub.start_pass = start_pass_dcolor;
+
+  /* Make sure num_components agrees with jpeg_color_space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->num_components < 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+  }
+
+  /* Support color transform only for RGB colorspace */
+  if (cinfo->color_transform && cinfo->jpeg_color_space != JCS_RGB)
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+
+  /* Set out_color_components and conversion method based on requested space.
+   * Also clear the component_needed flags for any unused components,
+   * so that earlier pipeline stages can avoid useless computation.
+   */
+
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
+	cinfo->jpeg_color_space == JCS_YCbCr) {
+      cconvert->pub.color_convert = grayscale_convert;
+      /* For color->grayscale conversion, only the Y (0) component is needed */
+      for (ci = 1; ci < cinfo->num_components; ci++)
+	cinfo->comp_info[ci].component_needed = FALSE;
+    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_gray_convert;
+	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb1_gray_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+	break;
+      }
+      build_rgb_y_table(cinfo);
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    if (cinfo->jpeg_color_space == JCS_YCbCr) {
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+      cconvert->pub.color_convert = gray_rgb_convert;
+    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_convert;
+	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb1_rgb_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+	break;
+      }
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_CMYK:
+    cinfo->out_color_components = 4;
+    if (cinfo->jpeg_color_space == JCS_YCCK) {
+      cconvert->pub.color_convert = ycck_cmyk_convert;
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+      cconvert->pub.color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:
+    /* Permit null conversion to same output space */
+    if (cinfo->out_color_space == cinfo->jpeg_color_space) {
+      cinfo->out_color_components = cinfo->num_components;
+      cconvert->pub.color_convert = null_convert;
+    } else			/* unsupported non-null conversion */
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+  }
+
+  if (cinfo->quantize_colors)
+    cinfo->output_components = 1; /* single colormapped output component */
+  else
+    cinfo->output_components = cinfo->out_color_components;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdct.h b/plugins/AdvaImg/src/LibJPEG/jdct.h
index 360dec80c9..b1ff91250b 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdct.h
+++ b/plugins/AdvaImg/src/LibJPEG/jdct.h
@@ -1,393 +1,393 @@
-/*
- * jdct.h
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This include file contains common declarations for the forward and
- * inverse DCT modules.  These declarations are private to the DCT managers
- * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
- * The individual DCT algorithms are kept in separate files to ease 
- * machine-dependent tuning (e.g., assembly coding).
- */
-
-
-/*
- * A forward DCT routine is given a pointer to an input sample array and
- * a pointer to a work area of type DCTELEM[]; the DCT is to be performed
- * in-place in that buffer.  Type DCTELEM is int for 8-bit samples, INT32
- * for 12-bit samples.  (NOTE: Floating-point DCT implementations use an
- * array of type FAST_FLOAT, instead.)
- * The input data is to be fetched from the sample array starting at a
- * specified column.  (Any row offset needed will be applied to the array
- * pointer before it is passed to the FDCT code.)
- * Note that the number of samples fetched by the FDCT routine is
- * DCT_h_scaled_size * DCT_v_scaled_size.
- * The DCT outputs are returned scaled up by a factor of 8; they therefore
- * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
- * convention improves accuracy in integer implementations and saves some
- * work in floating-point ones.
- * Quantization of the output coefficients is done by jcdctmgr.c.
- */
-
-#if BITS_IN_JSAMPLE == 8
-typedef int DCTELEM;		/* 16 or 32 bits is fine */
-#else
-typedef INT32 DCTELEM;		/* must have 32 bits */
-#endif
-
-typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data,
-					       JSAMPARRAY sample_data,
-					       JDIMENSION start_col));
-typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data,
-					     JSAMPARRAY sample_data,
-					     JDIMENSION start_col));
-
-
-/*
- * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
- * to an output sample array.  The routine must dequantize the input data as
- * well as perform the IDCT; for dequantization, it uses the multiplier table
- * pointed to by compptr->dct_table.  The output data is to be placed into the
- * sample array starting at a specified column.  (Any row offset needed will
- * be applied to the array pointer before it is passed to the IDCT code.)
- * Note that the number of samples emitted by the IDCT routine is
- * DCT_h_scaled_size * DCT_v_scaled_size.
- */
-
-/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
-
-/*
- * Each IDCT routine has its own ideas about the best dct_table element type.
- */
-
-typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
-#if BITS_IN_JSAMPLE == 8
-typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
-#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
-#else
-typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
-#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
-#endif
-typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
-
-
-/*
- * Each IDCT routine is responsible for range-limiting its results and
- * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
- * be quite far out of range if the input data is corrupt, so a bulletproof
- * range-limiting step is required.  We use a mask-and-table-lookup method
- * to do the combined operations quickly.  See the comments with
- * prepare_range_limit_table (in jdmaster.c) for more info.
- */
-
-#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit + CENTERJSAMPLE)
-
-#define RANGE_MASK  (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_fdct_islow		jFDislow
-#define jpeg_fdct_ifast		jFDifast
-#define jpeg_fdct_float		jFDfloat
-#define jpeg_fdct_7x7		jFD7x7
-#define jpeg_fdct_6x6		jFD6x6
-#define jpeg_fdct_5x5		jFD5x5
-#define jpeg_fdct_4x4		jFD4x4
-#define jpeg_fdct_3x3		jFD3x3
-#define jpeg_fdct_2x2		jFD2x2
-#define jpeg_fdct_1x1		jFD1x1
-#define jpeg_fdct_9x9		jFD9x9
-#define jpeg_fdct_10x10		jFD10x10
-#define jpeg_fdct_11x11		jFD11x11
-#define jpeg_fdct_12x12		jFD12x12
-#define jpeg_fdct_13x13		jFD13x13
-#define jpeg_fdct_14x14		jFD14x14
-#define jpeg_fdct_15x15		jFD15x15
-#define jpeg_fdct_16x16		jFD16x16
-#define jpeg_fdct_16x8		jFD16x8
-#define jpeg_fdct_14x7		jFD14x7
-#define jpeg_fdct_12x6		jFD12x6
-#define jpeg_fdct_10x5		jFD10x5
-#define jpeg_fdct_8x4		jFD8x4
-#define jpeg_fdct_6x3		jFD6x3
-#define jpeg_fdct_4x2		jFD4x2
-#define jpeg_fdct_2x1		jFD2x1
-#define jpeg_fdct_8x16		jFD8x16
-#define jpeg_fdct_7x14		jFD7x14
-#define jpeg_fdct_6x12		jFD6x12
-#define jpeg_fdct_5x10		jFD5x10
-#define jpeg_fdct_4x8		jFD4x8
-#define jpeg_fdct_3x6		jFD3x6
-#define jpeg_fdct_2x4		jFD2x4
-#define jpeg_fdct_1x2		jFD1x2
-#define jpeg_idct_islow		jRDislow
-#define jpeg_idct_ifast		jRDifast
-#define jpeg_idct_float		jRDfloat
-#define jpeg_idct_7x7		jRD7x7
-#define jpeg_idct_6x6		jRD6x6
-#define jpeg_idct_5x5		jRD5x5
-#define jpeg_idct_4x4		jRD4x4
-#define jpeg_idct_3x3		jRD3x3
-#define jpeg_idct_2x2		jRD2x2
-#define jpeg_idct_1x1		jRD1x1
-#define jpeg_idct_9x9		jRD9x9
-#define jpeg_idct_10x10		jRD10x10
-#define jpeg_idct_11x11		jRD11x11
-#define jpeg_idct_12x12		jRD12x12
-#define jpeg_idct_13x13		jRD13x13
-#define jpeg_idct_14x14		jRD14x14
-#define jpeg_idct_15x15		jRD15x15
-#define jpeg_idct_16x16		jRD16x16
-#define jpeg_idct_16x8		jRD16x8
-#define jpeg_idct_14x7		jRD14x7
-#define jpeg_idct_12x6		jRD12x6
-#define jpeg_idct_10x5		jRD10x5
-#define jpeg_idct_8x4		jRD8x4
-#define jpeg_idct_6x3		jRD6x3
-#define jpeg_idct_4x2		jRD4x2
-#define jpeg_idct_2x1		jRD2x1
-#define jpeg_idct_8x16		jRD8x16
-#define jpeg_idct_7x14		jRD7x14
-#define jpeg_idct_6x12		jRD6x12
-#define jpeg_idct_5x10		jRD5x10
-#define jpeg_idct_4x8		jRD4x8
-#define jpeg_idct_3x6		jRD3x8
-#define jpeg_idct_2x4		jRD2x4
-#define jpeg_idct_1x2		jRD1x2
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-/* Extern declarations for the forward and inverse DCT routines. */
-
-EXTERN(void) jpeg_fdct_islow
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_ifast
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_float
-    JPP((FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_7x7
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_6x6
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_5x5
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_4x4
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_3x3
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_2x2
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_1x1
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_9x9
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_10x10
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_11x11
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_12x12
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_13x13
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_14x14
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_15x15
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_16x16
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_16x8
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_14x7
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_12x6
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_10x5
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_8x4
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_6x3
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_4x2
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_2x1
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_8x16
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_7x14
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_6x12
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_5x10
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_4x8
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_3x6
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_2x4
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_1x2
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-
-EXTERN(void) jpeg_idct_islow
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_ifast
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_float
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_7x7
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_6x6
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_5x5
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_4x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_3x3
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_2x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_1x1
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_9x9
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_10x10
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_11x11
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_12x12
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_13x13
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_14x14
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_15x15
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_16x16
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_16x8
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_14x7
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_12x6
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_10x5
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_8x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_6x3
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_4x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_2x1
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_8x16
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_7x14
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_6x12
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_5x10
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_4x8
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_3x6
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_2x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_1x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-
-
-/*
- * Macros for handling fixed-point arithmetic; these are used by many
- * but not all of the DCT/IDCT modules.
- *
- * All values are expected to be of type INT32.
- * Fractional constants are scaled left by CONST_BITS bits.
- * CONST_BITS is defined within each module using these macros,
- * and may differ from one module to the next.
- */
-
-#define ONE	((INT32) 1)
-#define CONST_SCALE (ONE << CONST_BITS)
-
-/* Convert a positive real constant to an integer scaled by CONST_SCALE.
- * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
- * thus causing a lot of useless floating-point operations at run time.
- */
-
-#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
-
-/* Descale and correctly round an INT32 value that's scaled by N bits.
- * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
- * the fudge factor is correct for either sign of X.
- */
-
-#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
-
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
- * This macro is used only when the two inputs will actually be no more than
- * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
- * full 32x32 multiply.  This provides a useful speedup on many machines.
- * Unfortunately there is no way to specify a 16x16->32 multiply portably
- * in C, but some C compilers will do the right thing if you provide the
- * correct combination of casts.
- */
-
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
-#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
-#endif
-#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
-#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
-#endif
-
-#ifndef MULTIPLY16C16		/* default definition */
-#define MULTIPLY16C16(var,const)  ((var) * (const))
-#endif
-
-/* Same except both inputs are variables. */
-
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
-#define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
-#endif
-
-#ifndef MULTIPLY16V16		/* default definition */
-#define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
-#endif
+/*
+ * jdct.h
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file contains common declarations for the forward and
+ * inverse DCT modules.  These declarations are private to the DCT managers
+ * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
+ * The individual DCT algorithms are kept in separate files to ease 
+ * machine-dependent tuning (e.g., assembly coding).
+ */
+
+
+/*
+ * A forward DCT routine is given a pointer to an input sample array and
+ * a pointer to a work area of type DCTELEM[]; the DCT is to be performed
+ * in-place in that buffer.  Type DCTELEM is int for 8-bit samples, INT32
+ * for 12-bit samples.  (NOTE: Floating-point DCT implementations use an
+ * array of type FAST_FLOAT, instead.)
+ * The input data is to be fetched from the sample array starting at a
+ * specified column.  (Any row offset needed will be applied to the array
+ * pointer before it is passed to the FDCT code.)
+ * Note that the number of samples fetched by the FDCT routine is
+ * DCT_h_scaled_size * DCT_v_scaled_size.
+ * The DCT outputs are returned scaled up by a factor of 8; they therefore
+ * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
+ * convention improves accuracy in integer implementations and saves some
+ * work in floating-point ones.
+ * Quantization of the output coefficients is done by jcdctmgr.c.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef int DCTELEM;		/* 16 or 32 bits is fine */
+#else
+typedef INT32 DCTELEM;		/* must have 32 bits */
+#endif
+
+typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data,
+					       JSAMPARRAY sample_data,
+					       JDIMENSION start_col));
+typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data,
+					     JSAMPARRAY sample_data,
+					     JDIMENSION start_col));
+
+
+/*
+ * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
+ * to an output sample array.  The routine must dequantize the input data as
+ * well as perform the IDCT; for dequantization, it uses the multiplier table
+ * pointed to by compptr->dct_table.  The output data is to be placed into the
+ * sample array starting at a specified column.  (Any row offset needed will
+ * be applied to the array pointer before it is passed to the IDCT code.)
+ * Note that the number of samples emitted by the IDCT routine is
+ * DCT_h_scaled_size * DCT_v_scaled_size.
+ */
+
+/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
+
+/*
+ * Each IDCT routine has its own ideas about the best dct_table element type.
+ */
+
+typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
+#if BITS_IN_JSAMPLE == 8
+typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
+#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
+#else
+typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
+#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
+#endif
+typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
+
+
+/*
+ * Each IDCT routine is responsible for range-limiting its results and
+ * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
+ * be quite far out of range if the input data is corrupt, so a bulletproof
+ * range-limiting step is required.  We use a mask-and-table-lookup method
+ * to do the combined operations quickly.  See the comments with
+ * prepare_range_limit_table (in jdmaster.c) for more info.
+ */
+
+#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit + CENTERJSAMPLE)
+
+#define RANGE_MASK  (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_fdct_islow		jFDislow
+#define jpeg_fdct_ifast		jFDifast
+#define jpeg_fdct_float		jFDfloat
+#define jpeg_fdct_7x7		jFD7x7
+#define jpeg_fdct_6x6		jFD6x6
+#define jpeg_fdct_5x5		jFD5x5
+#define jpeg_fdct_4x4		jFD4x4
+#define jpeg_fdct_3x3		jFD3x3
+#define jpeg_fdct_2x2		jFD2x2
+#define jpeg_fdct_1x1		jFD1x1
+#define jpeg_fdct_9x9		jFD9x9
+#define jpeg_fdct_10x10		jFD10x10
+#define jpeg_fdct_11x11		jFD11x11
+#define jpeg_fdct_12x12		jFD12x12
+#define jpeg_fdct_13x13		jFD13x13
+#define jpeg_fdct_14x14		jFD14x14
+#define jpeg_fdct_15x15		jFD15x15
+#define jpeg_fdct_16x16		jFD16x16
+#define jpeg_fdct_16x8		jFD16x8
+#define jpeg_fdct_14x7		jFD14x7
+#define jpeg_fdct_12x6		jFD12x6
+#define jpeg_fdct_10x5		jFD10x5
+#define jpeg_fdct_8x4		jFD8x4
+#define jpeg_fdct_6x3		jFD6x3
+#define jpeg_fdct_4x2		jFD4x2
+#define jpeg_fdct_2x1		jFD2x1
+#define jpeg_fdct_8x16		jFD8x16
+#define jpeg_fdct_7x14		jFD7x14
+#define jpeg_fdct_6x12		jFD6x12
+#define jpeg_fdct_5x10		jFD5x10
+#define jpeg_fdct_4x8		jFD4x8
+#define jpeg_fdct_3x6		jFD3x6
+#define jpeg_fdct_2x4		jFD2x4
+#define jpeg_fdct_1x2		jFD1x2
+#define jpeg_idct_islow		jRDislow
+#define jpeg_idct_ifast		jRDifast
+#define jpeg_idct_float		jRDfloat
+#define jpeg_idct_7x7		jRD7x7
+#define jpeg_idct_6x6		jRD6x6
+#define jpeg_idct_5x5		jRD5x5
+#define jpeg_idct_4x4		jRD4x4
+#define jpeg_idct_3x3		jRD3x3
+#define jpeg_idct_2x2		jRD2x2
+#define jpeg_idct_1x1		jRD1x1
+#define jpeg_idct_9x9		jRD9x9
+#define jpeg_idct_10x10		jRD10x10
+#define jpeg_idct_11x11		jRD11x11
+#define jpeg_idct_12x12		jRD12x12
+#define jpeg_idct_13x13		jRD13x13
+#define jpeg_idct_14x14		jRD14x14
+#define jpeg_idct_15x15		jRD15x15
+#define jpeg_idct_16x16		jRD16x16
+#define jpeg_idct_16x8		jRD16x8
+#define jpeg_idct_14x7		jRD14x7
+#define jpeg_idct_12x6		jRD12x6
+#define jpeg_idct_10x5		jRD10x5
+#define jpeg_idct_8x4		jRD8x4
+#define jpeg_idct_6x3		jRD6x3
+#define jpeg_idct_4x2		jRD4x2
+#define jpeg_idct_2x1		jRD2x1
+#define jpeg_idct_8x16		jRD8x16
+#define jpeg_idct_7x14		jRD7x14
+#define jpeg_idct_6x12		jRD6x12
+#define jpeg_idct_5x10		jRD5x10
+#define jpeg_idct_4x8		jRD4x8
+#define jpeg_idct_3x6		jRD3x8
+#define jpeg_idct_2x4		jRD2x4
+#define jpeg_idct_1x2		jRD1x2
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Extern declarations for the forward and inverse DCT routines. */
+
+EXTERN(void) jpeg_fdct_islow
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_ifast
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_float
+    JPP((FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_7x7
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x6
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_5x5
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x4
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_3x3
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x2
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_1x1
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_9x9
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_10x10
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_11x11
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_12x12
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_13x13
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_14x14
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_15x15
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_16x16
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_16x8
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_14x7
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_12x6
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_10x5
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_8x4
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x3
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x2
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x1
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_8x16
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_7x14
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x12
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_5x10
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x8
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_3x6
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x4
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_1x2
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+
+EXTERN(void) jpeg_idct_islow
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_ifast
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_float
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_7x7
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_5x5
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_3x3
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x1
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_9x9
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_10x10
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_11x11
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_12x12
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_13x13
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_14x14
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_15x15
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_16x16
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_16x8
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_14x7
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_12x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_10x5
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_8x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x3
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x1
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_8x16
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_7x14
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x12
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_5x10
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x8
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_3x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+
+
+/*
+ * Macros for handling fixed-point arithmetic; these are used by many
+ * but not all of the DCT/IDCT modules.
+ *
+ * All values are expected to be of type INT32.
+ * Fractional constants are scaled left by CONST_BITS bits.
+ * CONST_BITS is defined within each module using these macros,
+ * and may differ from one module to the next.
+ */
+
+#define ONE	((INT32) 1)
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
+ * thus causing a lot of useless floating-point operations at run time.
+ */
+
+#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
+
+/* Descale and correctly round an INT32 value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * This macro is used only when the two inputs will actually be no more than
+ * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
+ * full 32x32 multiply.  This provides a useful speedup on many machines.
+ * Unfortunately there is no way to specify a 16x16->32 multiply portably
+ * in C, but some C compilers will do the right thing if you provide the
+ * correct combination of casts.
+ */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
+#endif
+#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
+#endif
+
+#ifndef MULTIPLY16C16		/* default definition */
+#define MULTIPLY16C16(var,const)  ((var) * (const))
+#endif
+
+/* Same except both inputs are variables. */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
+#endif
+
+#ifndef MULTIPLY16V16		/* default definition */
+#define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
+#endif
diff --git a/plugins/AdvaImg/src/LibJPEG/jddctmgr.c b/plugins/AdvaImg/src/LibJPEG/jddctmgr.c
index 0ded9d5741..5e4f1dc440 100644
--- a/plugins/AdvaImg/src/LibJPEG/jddctmgr.c
+++ b/plugins/AdvaImg/src/LibJPEG/jddctmgr.c
@@ -1,384 +1,384 @@
-/*
- * jddctmgr.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2010 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the inverse-DCT management logic.
- * This code selects a particular IDCT implementation to be used,
- * and it performs related housekeeping chores.  No code in this file
- * is executed per IDCT step, only during output pass setup.
- *
- * Note that the IDCT routines are responsible for performing coefficient
- * dequantization as well as the IDCT proper.  This module sets up the
- * dequantization multiplier table needed by the IDCT routine.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-
-/*
- * The decompressor input side (jdinput.c) saves away the appropriate
- * quantization table for each component at the start of the first scan
- * involving that component.  (This is necessary in order to correctly
- * decode files that reuse Q-table slots.)
- * When we are ready to make an output pass, the saved Q-table is converted
- * to a multiplier table that will actually be used by the IDCT routine.
- * The multiplier table contents are IDCT-method-dependent.  To support
- * application changes in IDCT method between scans, we can remake the
- * multiplier tables if necessary.
- * In buffered-image mode, the first output pass may occur before any data
- * has been seen for some components, and thus before their Q-tables have
- * been saved away.  To handle this case, multiplier tables are preset
- * to zeroes; the result of the IDCT will be a neutral gray level.
- */
-
-
-/* Private subobject for this module */
-
-typedef struct {
-  struct jpeg_inverse_dct pub;	/* public fields */
-
-  /* This array contains the IDCT method code that each multiplier table
-   * is currently set up for, or -1 if it's not yet set up.
-   * The actual multiplier tables are pointed to by dct_table in the
-   * per-component comp_info structures.
-   */
-  int cur_method[MAX_COMPONENTS];
-} my_idct_controller;
-
-typedef my_idct_controller * my_idct_ptr;
-
-
-/* Allocated multiplier tables: big enough for any supported variant */
-
-typedef union {
-  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
-#ifdef DCT_IFAST_SUPPORTED
-  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-  FLOAT_MULT_TYPE float_array[DCTSIZE2];
-#endif
-} multiplier_table;
-
-
-/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
- * so be sure to compile that code if either ISLOW or SCALING is requested.
- */
-#ifdef DCT_ISLOW_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#else
-#ifdef IDCT_SCALING_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#endif
-#endif
-
-
-/*
- * Prepare for an output pass.
- * Here we select the proper IDCT routine for each component and build
- * a matching multiplier table.
- */
-
-METHODDEF(void)
-start_pass (j_decompress_ptr cinfo)
-{
-  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
-  int ci, i;
-  jpeg_component_info *compptr;
-  int method = 0;
-  inverse_DCT_method_ptr method_ptr = NULL;
-  JQUANT_TBL * qtbl;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Select the proper IDCT routine for this component's scaling */
-    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
-#ifdef IDCT_SCALING_SUPPORTED
-    case ((1 << 8) + 1):
-      method_ptr = jpeg_idct_1x1;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((2 << 8) + 2):
-      method_ptr = jpeg_idct_2x2;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((3 << 8) + 3):
-      method_ptr = jpeg_idct_3x3;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((4 << 8) + 4):
-      method_ptr = jpeg_idct_4x4;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((5 << 8) + 5):
-      method_ptr = jpeg_idct_5x5;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((6 << 8) + 6):
-      method_ptr = jpeg_idct_6x6;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((7 << 8) + 7):
-      method_ptr = jpeg_idct_7x7;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((9 << 8) + 9):
-      method_ptr = jpeg_idct_9x9;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((10 << 8) + 10):
-      method_ptr = jpeg_idct_10x10;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((11 << 8) + 11):
-      method_ptr = jpeg_idct_11x11;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((12 << 8) + 12):
-      method_ptr = jpeg_idct_12x12;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((13 << 8) + 13):
-      method_ptr = jpeg_idct_13x13;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((14 << 8) + 14):
-      method_ptr = jpeg_idct_14x14;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((15 << 8) + 15):
-      method_ptr = jpeg_idct_15x15;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((16 << 8) + 16):
-      method_ptr = jpeg_idct_16x16;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((16 << 8) + 8):
-      method_ptr = jpeg_idct_16x8;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((14 << 8) + 7):
-      method_ptr = jpeg_idct_14x7;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((12 << 8) + 6):
-      method_ptr = jpeg_idct_12x6;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((10 << 8) + 5):
-      method_ptr = jpeg_idct_10x5;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((8 << 8) + 4):
-      method_ptr = jpeg_idct_8x4;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((6 << 8) + 3):
-      method_ptr = jpeg_idct_6x3;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((4 << 8) + 2):
-      method_ptr = jpeg_idct_4x2;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((2 << 8) + 1):
-      method_ptr = jpeg_idct_2x1;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((8 << 8) + 16):
-      method_ptr = jpeg_idct_8x16;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((7 << 8) + 14):
-      method_ptr = jpeg_idct_7x14;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((6 << 8) + 12):
-      method_ptr = jpeg_idct_6x12;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((5 << 8) + 10):
-      method_ptr = jpeg_idct_5x10;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((4 << 8) + 8):
-      method_ptr = jpeg_idct_4x8;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((3 << 8) + 6):
-      method_ptr = jpeg_idct_3x6;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((2 << 8) + 4):
-      method_ptr = jpeg_idct_2x4;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((1 << 8) + 2):
-      method_ptr = jpeg_idct_1x2;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-#endif
-    case ((DCTSIZE << 8) + DCTSIZE):
-      switch (cinfo->dct_method) {
-#ifdef DCT_ISLOW_SUPPORTED
-      case JDCT_ISLOW:
-	method_ptr = jpeg_idct_islow;
-	method = JDCT_ISLOW;
-	break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-      case JDCT_IFAST:
-	method_ptr = jpeg_idct_ifast;
-	method = JDCT_IFAST;
-	break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-      case JDCT_FLOAT:
-	method_ptr = jpeg_idct_float;
-	method = JDCT_FLOAT;
-	break;
-#endif
-      default:
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
-	break;
-      }
-      break;
-    default:
-      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
-      break;
-    }
-    idct->pub.inverse_DCT[ci] = method_ptr;
-    /* Create multiplier table from quant table.
-     * However, we can skip this if the component is uninteresting
-     * or if we already built the table.  Also, if no quant table
-     * has yet been saved for the component, we leave the
-     * multiplier table all-zero; we'll be reading zeroes from the
-     * coefficient controller's buffer anyway.
-     */
-    if (! compptr->component_needed || idct->cur_method[ci] == method)
-      continue;
-    qtbl = compptr->quant_table;
-    if (qtbl == NULL)		/* happens if no data yet for component */
-      continue;
-    idct->cur_method[ci] = method;
-    switch (method) {
-#ifdef PROVIDE_ISLOW_TABLES
-    case JDCT_ISLOW:
-      {
-	/* For LL&M IDCT method, multipliers are equal to raw quantization
-	 * coefficients, but are stored as ints to ensure access efficiency.
-	 */
-	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
-	}
-      }
-      break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-    case JDCT_IFAST:
-      {
-	/* For AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * For integer operation, the multiplier table is to be scaled by
-	 * IFAST_SCALE_BITS.
-	 */
-	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
-#define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
-
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ifmtbl[i] = (IFAST_MULT_TYPE)
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    CONST_BITS-IFAST_SCALE_BITS);
-	}
-      }
-      break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-    case JDCT_FLOAT:
-      {
-	/* For float AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 1/8.
-	 */
-	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
-
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fmtbl[i] = (FLOAT_MULT_TYPE)
-	      ((double) qtbl->quantval[i] *
-	       aanscalefactor[row] * aanscalefactor[col] * 0.125);
-	    i++;
-	  }
-	}
-      }
-      break;
-#endif
-    default:
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-      break;
-    }
-  }
-}
-
-
-/*
- * Initialize IDCT manager.
- */
-
-GLOBAL(void)
-jinit_inverse_dct (j_decompress_ptr cinfo)
-{
-  my_idct_ptr idct;
-  int ci;
-  jpeg_component_info *compptr;
-
-  idct = (my_idct_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_idct_controller));
-  cinfo->idct = (struct jpeg_inverse_dct *) idct;
-  idct->pub.start_pass = start_pass;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Allocate and pre-zero a multiplier table for each component */
-    compptr->dct_table =
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(multiplier_table));
-    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
-    /* Mark multiplier table not yet set up for any method */
-    idct->cur_method[ci] = -1;
-  }
-}
+/*
+ * jddctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2010 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the inverse-DCT management logic.
+ * This code selects a particular IDCT implementation to be used,
+ * and it performs related housekeeping chores.  No code in this file
+ * is executed per IDCT step, only during output pass setup.
+ *
+ * Note that the IDCT routines are responsible for performing coefficient
+ * dequantization as well as the IDCT proper.  This module sets up the
+ * dequantization multiplier table needed by the IDCT routine.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/*
+ * The decompressor input side (jdinput.c) saves away the appropriate
+ * quantization table for each component at the start of the first scan
+ * involving that component.  (This is necessary in order to correctly
+ * decode files that reuse Q-table slots.)
+ * When we are ready to make an output pass, the saved Q-table is converted
+ * to a multiplier table that will actually be used by the IDCT routine.
+ * The multiplier table contents are IDCT-method-dependent.  To support
+ * application changes in IDCT method between scans, we can remake the
+ * multiplier tables if necessary.
+ * In buffered-image mode, the first output pass may occur before any data
+ * has been seen for some components, and thus before their Q-tables have
+ * been saved away.  To handle this case, multiplier tables are preset
+ * to zeroes; the result of the IDCT will be a neutral gray level.
+ */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_inverse_dct pub;	/* public fields */
+
+  /* This array contains the IDCT method code that each multiplier table
+   * is currently set up for, or -1 if it's not yet set up.
+   * The actual multiplier tables are pointed to by dct_table in the
+   * per-component comp_info structures.
+   */
+  int cur_method[MAX_COMPONENTS];
+} my_idct_controller;
+
+typedef my_idct_controller * my_idct_ptr;
+
+
+/* Allocated multiplier tables: big enough for any supported variant */
+
+typedef union {
+  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
+#ifdef DCT_IFAST_SUPPORTED
+  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  FLOAT_MULT_TYPE float_array[DCTSIZE2];
+#endif
+} multiplier_table;
+
+
+/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
+ * so be sure to compile that code if either ISLOW or SCALING is requested.
+ */
+#ifdef DCT_ISLOW_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#else
+#ifdef IDCT_SCALING_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#endif
+#endif
+
+
+/*
+ * Prepare for an output pass.
+ * Here we select the proper IDCT routine for each component and build
+ * a matching multiplier table.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
+  int ci, i;
+  jpeg_component_info *compptr;
+  int method = 0;
+  inverse_DCT_method_ptr method_ptr = NULL;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Select the proper IDCT routine for this component's scaling */
+    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
+#ifdef IDCT_SCALING_SUPPORTED
+    case ((1 << 8) + 1):
+      method_ptr = jpeg_idct_1x1;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((2 << 8) + 2):
+      method_ptr = jpeg_idct_2x2;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((3 << 8) + 3):
+      method_ptr = jpeg_idct_3x3;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((4 << 8) + 4):
+      method_ptr = jpeg_idct_4x4;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((5 << 8) + 5):
+      method_ptr = jpeg_idct_5x5;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((6 << 8) + 6):
+      method_ptr = jpeg_idct_6x6;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((7 << 8) + 7):
+      method_ptr = jpeg_idct_7x7;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((9 << 8) + 9):
+      method_ptr = jpeg_idct_9x9;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((10 << 8) + 10):
+      method_ptr = jpeg_idct_10x10;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((11 << 8) + 11):
+      method_ptr = jpeg_idct_11x11;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((12 << 8) + 12):
+      method_ptr = jpeg_idct_12x12;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((13 << 8) + 13):
+      method_ptr = jpeg_idct_13x13;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((14 << 8) + 14):
+      method_ptr = jpeg_idct_14x14;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((15 << 8) + 15):
+      method_ptr = jpeg_idct_15x15;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((16 << 8) + 16):
+      method_ptr = jpeg_idct_16x16;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((16 << 8) + 8):
+      method_ptr = jpeg_idct_16x8;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((14 << 8) + 7):
+      method_ptr = jpeg_idct_14x7;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((12 << 8) + 6):
+      method_ptr = jpeg_idct_12x6;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((10 << 8) + 5):
+      method_ptr = jpeg_idct_10x5;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((8 << 8) + 4):
+      method_ptr = jpeg_idct_8x4;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((6 << 8) + 3):
+      method_ptr = jpeg_idct_6x3;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((4 << 8) + 2):
+      method_ptr = jpeg_idct_4x2;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((2 << 8) + 1):
+      method_ptr = jpeg_idct_2x1;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((8 << 8) + 16):
+      method_ptr = jpeg_idct_8x16;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((7 << 8) + 14):
+      method_ptr = jpeg_idct_7x14;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((6 << 8) + 12):
+      method_ptr = jpeg_idct_6x12;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((5 << 8) + 10):
+      method_ptr = jpeg_idct_5x10;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((4 << 8) + 8):
+      method_ptr = jpeg_idct_4x8;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((3 << 8) + 6):
+      method_ptr = jpeg_idct_3x6;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((2 << 8) + 4):
+      method_ptr = jpeg_idct_2x4;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((1 << 8) + 2):
+      method_ptr = jpeg_idct_1x2;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+#endif
+    case ((DCTSIZE << 8) + DCTSIZE):
+      switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+      case JDCT_ISLOW:
+	method_ptr = jpeg_idct_islow;
+	method = JDCT_ISLOW;
+	break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+      case JDCT_IFAST:
+	method_ptr = jpeg_idct_ifast;
+	method = JDCT_IFAST;
+	break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+      case JDCT_FLOAT:
+	method_ptr = jpeg_idct_float;
+	method = JDCT_FLOAT;
+	break;
+#endif
+      default:
+	ERREXIT(cinfo, JERR_NOT_COMPILED);
+	break;
+      }
+      break;
+    default:
+      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
+      break;
+    }
+    idct->pub.inverse_DCT[ci] = method_ptr;
+    /* Create multiplier table from quant table.
+     * However, we can skip this if the component is uninteresting
+     * or if we already built the table.  Also, if no quant table
+     * has yet been saved for the component, we leave the
+     * multiplier table all-zero; we'll be reading zeroes from the
+     * coefficient controller's buffer anyway.
+     */
+    if (! compptr->component_needed || idct->cur_method[ci] == method)
+      continue;
+    qtbl = compptr->quant_table;
+    if (qtbl == NULL)		/* happens if no data yet for component */
+      continue;
+    idct->cur_method[ci] = method;
+    switch (method) {
+#ifdef PROVIDE_ISLOW_TABLES
+    case JDCT_ISLOW:
+      {
+	/* For LL&M IDCT method, multipliers are equal to raw quantization
+	 * coefficients, but are stored as ints to ensure access efficiency.
+	 */
+	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
+	}
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * For integer operation, the multiplier table is to be scaled by
+	 * IFAST_SCALE_BITS.
+	 */
+	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ifmtbl[i] = (IFAST_MULT_TYPE)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-IFAST_SCALE_BITS);
+	}
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 1/8.
+	 */
+	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fmtbl[i] = (FLOAT_MULT_TYPE)
+	      ((double) qtbl->quantval[i] *
+	       aanscalefactor[row] * aanscalefactor[col] * 0.125);
+	    i++;
+	  }
+	}
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Initialize IDCT manager.
+ */
+
+GLOBAL(void)
+jinit_inverse_dct (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct;
+  int ci;
+  jpeg_component_info *compptr;
+
+  idct = (my_idct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_idct_controller));
+  cinfo->idct = (struct jpeg_inverse_dct *) idct;
+  idct->pub.start_pass = start_pass;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate and pre-zero a multiplier table for each component */
+    compptr->dct_table =
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(multiplier_table));
+    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
+    /* Mark multiplier table not yet set up for any method */
+    idct->cur_method[ci] = -1;
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdhuff.c b/plugins/AdvaImg/src/LibJPEG/jdhuff.c
index 06f92fe47f..b18ff7a20a 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdhuff.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdhuff.c
@@ -1,1541 +1,1542 @@
-/*
- * jdhuff.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains Huffman entropy decoding routines.
- * Both sequential and progressive modes are supported in this single module.
- *
- * Much of the complexity here has to do with supporting input suspension.
- * If the data source module demands suspension, we want to be able to back
- * up to the start of the current MCU.  To do this, we copy state variables
- * into local working storage, and update them back to the permanent
- * storage only upon successful completion of an MCU.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Derived data constructed for each Huffman table */
-
-#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
-
-typedef struct {
-  /* Basic tables: (element [0] of each array is unused) */
-  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
-  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
-  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
-  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
-   * the smallest code of length k; so given a code of length k, the
-   * corresponding symbol is huffval[code + valoffset[k]]
-   */
-
-  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
-  JHUFF_TBL *pub;
-
-  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
-   * the input data stream.  If the next Huffman code is no more
-   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
-   * the corresponding symbol directly from these tables.
-   */
-  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
-  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
-} d_derived_tbl;
-
-
-/*
- * Fetching the next N bits from the input stream is a time-critical operation
- * for the Huffman decoders.  We implement it with a combination of inline
- * macros and out-of-line subroutines.  Note that N (the number of bits
- * demanded at one time) never exceeds 15 for JPEG use.
- *
- * We read source bytes into get_buffer and dole out bits as needed.
- * If get_buffer already contains enough bits, they are fetched in-line
- * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
- * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
- * as full as possible (not just to the number of bits needed; this
- * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
- * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
- * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
- * at least the requested number of bits --- dummy zeroes are inserted if
- * necessary.
- */
-
-typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
-#define BIT_BUF_SIZE  32	/* size of buffer in bits */
-
-/* If long is > 32 bits on your machine, and shifting/masking longs is
- * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
- * appropriately should be a win.  Unfortunately we can't define the size
- * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
- * because not all machines measure sizeof in 8-bit bytes.
- */
-
-typedef struct {		/* Bitreading state saved across MCUs */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
-} bitread_perm_state;
-
-typedef struct {		/* Bitreading working state within an MCU */
-  /* Current data source location */
-  /* We need a copy, rather than munging the original, in case of suspension */
-  const JOCTET * next_input_byte; /* => next byte to read from source */
-  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
-  /* Bit input buffer --- note these values are kept in register variables,
-   * not in this struct, inside the inner loops.
-   */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
-  /* Pointer needed by jpeg_fill_bit_buffer. */
-  j_decompress_ptr cinfo;	/* back link to decompress master record */
-} bitread_working_state;
-
-/* Macros to declare and load/save bitread local variables. */
-#define BITREAD_STATE_VARS  \
-	register bit_buf_type get_buffer;  \
-	register int bits_left;  \
-	bitread_working_state br_state
-
-#define BITREAD_LOAD_STATE(cinfop,permstate)  \
-	br_state.cinfo = cinfop; \
-	br_state.next_input_byte = cinfop->src->next_input_byte; \
-	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
-	get_buffer = permstate.get_buffer; \
-	bits_left = permstate.bits_left;
-
-#define BITREAD_SAVE_STATE(cinfop,permstate)  \
-	cinfop->src->next_input_byte = br_state.next_input_byte; \
-	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
-	permstate.get_buffer = get_buffer; \
-	permstate.bits_left = bits_left
-
-/*
- * These macros provide the in-line portion of bit fetching.
- * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
- * before using GET_BITS, PEEK_BITS, or DROP_BITS.
- * The variables get_buffer and bits_left are assumed to be locals,
- * but the state struct might not be (jpeg_huff_decode needs this).
- *	CHECK_BIT_BUFFER(state,n,action);
- *		Ensure there are N bits in get_buffer; if suspend, take action.
- *      val = GET_BITS(n);
- *		Fetch next N bits.
- *      val = PEEK_BITS(n);
- *		Fetch next N bits without removing them from the buffer.
- *	DROP_BITS(n);
- *		Discard next N bits.
- * The value N should be a simple variable, not an expression, because it
- * is evaluated multiple times.
- */
-
-#define CHECK_BIT_BUFFER(state,nbits,action) \
-	{ if (bits_left < (nbits)) {  \
-	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
-	      { action; }  \
-	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
-
-#define GET_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -= (nbits)))) & BIT_MASK(nbits))
-
-#define PEEK_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -  (nbits)))) & BIT_MASK(nbits))
-
-#define DROP_BITS(nbits) \
-	(bits_left -= (nbits))
-
-
-/*
- * Code for extracting next Huffman-coded symbol from input bit stream.
- * Again, this is time-critical and we make the main paths be macros.
- *
- * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
- * without looping.  Usually, more than 95% of the Huffman codes will be 8
- * or fewer bits long.  The few overlength codes are handled with a loop,
- * which need not be inline code.
- *
- * Notes about the HUFF_DECODE macro:
- * 1. Near the end of the data segment, we may fail to get enough bits
- *    for a lookahead.  In that case, we do it the hard way.
- * 2. If the lookahead table contains no entry, the next code must be
- *    more than HUFF_LOOKAHEAD bits long.
- * 3. jpeg_huff_decode returns -1 if forced to suspend.
- */
-
-#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
-{ register int nb, look; \
-  if (bits_left < HUFF_LOOKAHEAD) { \
-    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
-    get_buffer = state.get_buffer; bits_left = state.bits_left; \
-    if (bits_left < HUFF_LOOKAHEAD) { \
-      nb = 1; goto slowlabel; \
-    } \
-  } \
-  look = PEEK_BITS(HUFF_LOOKAHEAD); \
-  if ((nb = htbl->look_nbits[look]) != 0) { \
-    DROP_BITS(nb); \
-    result = htbl->look_sym[look]; \
-  } else { \
-    nb = HUFF_LOOKAHEAD+1; \
-slowlabel: \
-    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
-	{ failaction; } \
-    get_buffer = state.get_buffer; bits_left = state.bits_left; \
-  } \
-}
-
-
-/*
- * Expanded entropy decoder object for Huffman decoding.
- *
- * The savable_state subrecord contains fields that change within an MCU,
- * but must not be updated permanently until we complete the MCU.
- */
-
-typedef struct {
-  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
-  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
-} savable_state;
-
-/* This macro is to work around compilers with missing or broken
- * structure assignment.  You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest,src)  ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest,src)  \
-	((dest).EOBRUN = (src).EOBRUN, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
-typedef struct {
-  struct jpeg_entropy_decoder pub; /* public fields */
-
-  /* These fields are loaded into local variables at start of each MCU.
-   * In case of suspension, we exit WITHOUT updating them.
-   */
-  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
-  savable_state saved;		/* Other state at start of MCU */
-
-  /* These fields are NOT loaded into local working state. */
-  boolean insufficient_data;	/* set TRUE after emitting warning */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-
-  /* Following two fields used only in progressive mode */
-
-  /* Pointers to derived tables (these workspaces have image lifespan) */
-  d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
-
-  d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
-
-  /* Following fields used only in sequential mode */
-
-  /* Pointers to derived tables (these workspaces have image lifespan) */
-  d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
-  d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
-
-  /* Precalculated info set up by start_pass for use in decode_mcu: */
-
-  /* Pointers to derived tables to be used for each block within an MCU */
-  d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
-  d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
-  /* Whether we care about the DC and AC coefficient values for each block */
-  int coef_limit[D_MAX_BLOCKS_IN_MCU];
-} huff_entropy_decoder;
-
-typedef huff_entropy_decoder * huff_entropy_ptr;
-
-
-static const int jpeg_zigzag_order[8][8] = {
-  {  0,  1,  5,  6, 14, 15, 27, 28 },
-  {  2,  4,  7, 13, 16, 26, 29, 42 },
-  {  3,  8, 12, 17, 25, 30, 41, 43 },
-  {  9, 11, 18, 24, 31, 40, 44, 53 },
-  { 10, 19, 23, 32, 39, 45, 52, 54 },
-  { 20, 22, 33, 38, 46, 51, 55, 60 },
-  { 21, 34, 37, 47, 50, 56, 59, 61 },
-  { 35, 36, 48, 49, 57, 58, 62, 63 }
-};
-
-static const int jpeg_zigzag_order7[7][7] = {
-  {  0,  1,  5,  6, 14, 15, 27 },
-  {  2,  4,  7, 13, 16, 26, 28 },
-  {  3,  8, 12, 17, 25, 29, 38 },
-  {  9, 11, 18, 24, 30, 37, 39 },
-  { 10, 19, 23, 31, 36, 40, 45 },
-  { 20, 22, 32, 35, 41, 44, 46 },
-  { 21, 33, 34, 42, 43, 47, 48 }
-};
-
-static const int jpeg_zigzag_order6[6][6] = {
-  {  0,  1,  5,  6, 14, 15 },
-  {  2,  4,  7, 13, 16, 25 },
-  {  3,  8, 12, 17, 24, 26 },
-  {  9, 11, 18, 23, 27, 32 },
-  { 10, 19, 22, 28, 31, 33 },
-  { 20, 21, 29, 30, 34, 35 }
-};
-
-static const int jpeg_zigzag_order5[5][5] = {
-  {  0,  1,  5,  6, 14 },
-  {  2,  4,  7, 13, 15 },
-  {  3,  8, 12, 16, 21 },
-  {  9, 11, 17, 20, 22 },
-  { 10, 18, 19, 23, 24 }
-};
-
-static const int jpeg_zigzag_order4[4][4] = {
-  { 0,  1,  5,  6 },
-  { 2,  4,  7, 12 },
-  { 3,  8, 11, 13 },
-  { 9, 10, 14, 15 }
-};
-
-static const int jpeg_zigzag_order3[3][3] = {
-  { 0, 1, 5 },
-  { 2, 4, 6 },
-  { 3, 7, 8 }
-};
-
-static const int jpeg_zigzag_order2[2][2] = {
-  { 0, 1 },
-  { 2, 3 }
-};
-
-
-/*
- * Compute the derived values for a Huffman table.
- * This routine also performs some validation checks on the table.
- */
-
-LOCAL(void)
-jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
-			 d_derived_tbl ** pdtbl)
-{
-  JHUFF_TBL *htbl;
-  d_derived_tbl *dtbl;
-  int p, i, l, si, numsymbols;
-  int lookbits, ctr;
-  char huffsize[257];
-  unsigned int huffcode[257];
-  unsigned int code;
-
-  /* Note that huffsize[] and huffcode[] are filled in code-length order,
-   * paralleling the order of the symbols themselves in htbl->huffval[].
-   */
-
-  /* Find the input Huffman table */
-  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-  htbl =
-    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
-  if (htbl == NULL)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-
-  /* Allocate a workspace if we haven't already done so. */
-  if (*pdtbl == NULL)
-    *pdtbl = (d_derived_tbl *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(d_derived_tbl));
-  dtbl = *pdtbl;
-  dtbl->pub = htbl;		/* fill in back link */
-  
-  /* Figure C.1: make table of Huffman code length for each symbol */
-
-  p = 0;
-  for (l = 1; l <= 16; l++) {
-    i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    while (i--)
-      huffsize[p++] = (char) l;
-  }
-  huffsize[p] = 0;
-  numsymbols = p;
-  
-  /* Figure C.2: generate the codes themselves */
-  /* We also validate that the counts represent a legal Huffman code tree. */
-  
-  code = 0;
-  si = huffsize[0];
-  p = 0;
-  while (huffsize[p]) {
-    while (((int) huffsize[p]) == si) {
-      huffcode[p++] = code;
-      code++;
-    }
-    /* code is now 1 more than the last code used for codelength si; but
-     * it must still fit in si bits, since no code is allowed to be all ones.
-     */
-    if (((INT32) code) >= (((INT32) 1) << si))
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    code <<= 1;
-    si++;
-  }
-
-  /* Figure F.15: generate decoding tables for bit-sequential decoding */
-
-  p = 0;
-  for (l = 1; l <= 16; l++) {
-    if (htbl->bits[l]) {
-      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
-       * minus the minimum code of length l
-       */
-      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
-      p += htbl->bits[l];
-      dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
-    } else {
-      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
-    }
-  }
-  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
-
-  /* Compute lookahead tables to speed up decoding.
-   * First we set all the table entries to 0, indicating "too long";
-   * then we iterate through the Huffman codes that are short enough and
-   * fill in all the entries that correspond to bit sequences starting
-   * with that code.
-   */
-
-  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
-
-  p = 0;
-  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
-    for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
-      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
-      /* Generate left-justified code followed by all possible bit sequences */
-      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
-      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
-	dtbl->look_nbits[lookbits] = l;
-	dtbl->look_sym[lookbits] = htbl->huffval[p];
-	lookbits++;
-      }
-    }
-  }
-
-  /* Validate symbols as being reasonable.
-   * For AC tables, we make no check, but accept all byte values 0..255.
-   * For DC tables, we require the symbols to be in range 0..15.
-   * (Tighter bounds could be applied depending on the data depth and mode,
-   * but this is sufficient to ensure safe decoding.)
-   */
-  if (isDC) {
-    for (i = 0; i < numsymbols; i++) {
-      int sym = htbl->huffval[i];
-      if (sym < 0 || sym > 15)
-	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    }
-  }
-}
-
-
-/*
- * Out-of-line code for bit fetching.
- * Note: current values of get_buffer and bits_left are passed as parameters,
- * but are returned in the corresponding fields of the state struct.
- *
- * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
- * of get_buffer to be used.  (On machines with wider words, an even larger
- * buffer could be used.)  However, on some machines 32-bit shifts are
- * quite slow and take time proportional to the number of places shifted.
- * (This is true with most PC compilers, for instance.)  In this case it may
- * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
- * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
- */
-
-#ifdef SLOW_SHIFT_32
-#define MIN_GET_BITS  15	/* minimum allowable value */
-#else
-#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
-#endif
-
-
-LOCAL(boolean)
-jpeg_fill_bit_buffer (bitread_working_state * state,
-		      register bit_buf_type get_buffer, register int bits_left,
-		      int nbits)
-/* Load up the bit buffer to a depth of at least nbits */
-{
-  /* Copy heavily used state fields into locals (hopefully registers) */
-  register const JOCTET * next_input_byte = state->next_input_byte;
-  register size_t bytes_in_buffer = state->bytes_in_buffer;
-  j_decompress_ptr cinfo = state->cinfo;
-
-  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
-  /* (It is assumed that no request will be for more than that many bits.) */
-  /* We fail to do so only if we hit a marker or are forced to suspend. */
-
-  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
-    while (bits_left < MIN_GET_BITS) {
-      register int c;
-
-      /* Attempt to read a byte */
-      if (bytes_in_buffer == 0) {
-	if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	  return FALSE;
-	next_input_byte = cinfo->src->next_input_byte;
-	bytes_in_buffer = cinfo->src->bytes_in_buffer;
-      }
-      bytes_in_buffer--;
-      c = GETJOCTET(*next_input_byte++);
-
-      /* If it's 0xFF, check and discard stuffed zero byte */
-      if (c == 0xFF) {
-	/* Loop here to discard any padding FF's on terminating marker,
-	 * so that we can save a valid unread_marker value.  NOTE: we will
-	 * accept multiple FF's followed by a 0 as meaning a single FF data
-	 * byte.  This data pattern is not valid according to the standard.
-	 */
-	do {
-	  if (bytes_in_buffer == 0) {
-	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	      return FALSE;
-	    next_input_byte = cinfo->src->next_input_byte;
-	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
-	  }
-	  bytes_in_buffer--;
-	  c = GETJOCTET(*next_input_byte++);
-	} while (c == 0xFF);
-
-	if (c == 0) {
-	  /* Found FF/00, which represents an FF data byte */
-	  c = 0xFF;
-	} else {
-	  /* Oops, it's actually a marker indicating end of compressed data.
-	   * Save the marker code for later use.
-	   * Fine point: it might appear that we should save the marker into
-	   * bitread working state, not straight into permanent state.  But
-	   * once we have hit a marker, we cannot need to suspend within the
-	   * current MCU, because we will read no more bytes from the data
-	   * source.  So it is OK to update permanent state right away.
-	   */
-	  cinfo->unread_marker = c;
-	  /* See if we need to insert some fake zero bits. */
-	  goto no_more_bytes;
-	}
-      }
-
-      /* OK, load c into get_buffer */
-      get_buffer = (get_buffer << 8) | c;
-      bits_left += 8;
-    } /* end while */
-  } else {
-  no_more_bytes:
-    /* We get here if we've read the marker that terminates the compressed
-     * data segment.  There should be enough bits in the buffer register
-     * to satisfy the request; if so, no problem.
-     */
-    if (nbits > bits_left) {
-      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
-       * the data stream, so that we can produce some kind of image.
-       * We use a nonvolatile flag to ensure that only one warning message
-       * appears per data segment.
-       */
-      if (! ((huff_entropy_ptr) cinfo->entropy)->insufficient_data) {
-	WARNMS(cinfo, JWRN_HIT_MARKER);
-	((huff_entropy_ptr) cinfo->entropy)->insufficient_data = TRUE;
-      }
-      /* Fill the buffer with zero bits */
-      get_buffer <<= MIN_GET_BITS - bits_left;
-      bits_left = MIN_GET_BITS;
-    }
-  }
-
-  /* Unload the local registers */
-  state->next_input_byte = next_input_byte;
-  state->bytes_in_buffer = bytes_in_buffer;
-  state->get_buffer = get_buffer;
-  state->bits_left = bits_left;
-
-  return TRUE;
-}
-
-
-/*
- * Figure F.12: extend sign bit.
- * On some machines, a shift and sub will be faster than a table lookup.
- */
-
-#ifdef AVOID_TABLES
-
-#define BIT_MASK(nbits)   ((1<<(nbits))-1)
-#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) - ((1<<(s))-1) : (x))
-
-#else
-
-#define BIT_MASK(nbits)   bmask[nbits]
-#define HUFF_EXTEND(x,s)  ((x) <= bmask[(s) - 1] ? (x) - bmask[s] : (x))
-
-static const int bmask[16] =	/* bmask[n] is mask for n rightmost bits */
-  { 0, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
-    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF };
-
-#endif /* AVOID_TABLES */
-
-
-/*
- * Out-of-line code for Huffman code decoding.
- */
-
-LOCAL(int)
-jpeg_huff_decode (bitread_working_state * state,
-		  register bit_buf_type get_buffer, register int bits_left,
-		  d_derived_tbl * htbl, int min_bits)
-{
-  register int l = min_bits;
-  register INT32 code;
-
-  /* HUFF_DECODE has determined that the code is at least min_bits */
-  /* bits long, so fetch that many bits in one swoop. */
-
-  CHECK_BIT_BUFFER(*state, l, return -1);
-  code = GET_BITS(l);
-
-  /* Collect the rest of the Huffman code one bit at a time. */
-  /* This is per Figure F.16 in the JPEG spec. */
-
-  while (code > htbl->maxcode[l]) {
-    code <<= 1;
-    CHECK_BIT_BUFFER(*state, 1, return -1);
-    code |= GET_BITS(1);
-    l++;
-  }
-
-  /* Unload the local registers */
-  state->get_buffer = get_buffer;
-  state->bits_left = bits_left;
-
-  /* With garbage input we may reach the sentinel value l = 17. */
-
-  if (l > 16) {
-    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
-    return 0;			/* fake a zero as the safest result */
-  }
-
-  return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
-}
-
-
-/*
- * Check for a restart marker & resynchronize decoder.
- * Returns FALSE if must suspend.
- */
-
-LOCAL(boolean)
-process_restart (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci;
-
-  /* Throw away any unused bits remaining in bit buffer; */
-  /* include any full bytes in next_marker's count of discarded bytes */
-  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
-  entropy->bitstate.bits_left = 0;
-
-  /* Advance past the RSTn marker */
-  if (! (*cinfo->marker->read_restart_marker) (cinfo))
-    return FALSE;
-
-  /* Re-initialize DC predictions to 0 */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
-    entropy->saved.last_dc_val[ci] = 0;
-  /* Re-init EOB run count, too */
-  entropy->saved.EOBRUN = 0;
-
-  /* Reset restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-
-  /* Reset out-of-data flag, unless read_restart_marker left us smack up
-   * against a marker.  In that case we will end up treating the next data
-   * segment as empty, and we can avoid producing bogus output pixels by
-   * leaving the flag set.
-   */
-  if (cinfo->unread_marker == 0)
-    entropy->insufficient_data = FALSE;
-
-  return TRUE;
-}
-
-
-/*
- * Huffman MCU decoding.
- * Each of these routines decodes and returns one MCU's worth of
- * Huffman-compressed coefficients. 
- * The coefficients are reordered from zigzag order into natural array order,
- * but are not dequantized.
- *
- * The i'th block of the MCU is stored into the block pointed to by
- * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
- * (Wholesale zeroing is usually a little faster than retail...)
- *
- * We return FALSE if data source requested suspension.  In that case no
- * changes have been made to permanent state.  (Exception: some output
- * coefficients may already have been assigned.  This is harmless for
- * spectral selection, since we'll just re-assign them on the next call.
- * Successive approximation AC refinement has to be more careful, however.)
- */
-
-/*
- * MCU decoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int Al = cinfo->Al;
-  register int s, r;
-  int blkn, ci;
-  JBLOCKROW block;
-  BITREAD_STATE_VARS;
-  savable_state state;
-  d_derived_tbl * tbl;
-  jpeg_component_info * compptr;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      block = MCU_data[blkn];
-      ci = cinfo->MCU_membership[blkn];
-      compptr = cinfo->cur_comp_info[ci];
-      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
-      if (s) {
-	CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	r = GET_BITS(s);
-	s = HUFF_EXTEND(r, s);
-      }
-
-      /* Convert DC difference to actual value, update last_dc_val */
-      s += state.last_dc_val[ci];
-      state.last_dc_val[ci] = s;
-      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
-      (*block)[0] = (JCOEF) (s << Al);
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
-  }
-
-  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int s, k, r;
-  unsigned int EOBRUN;
-  int Se, Al;
-  const int * natural_order;
-  JBLOCKROW block;
-  BITREAD_STATE_VARS;
-  d_derived_tbl * tbl;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    Se = cinfo->Se;
-    Al = cinfo->Al;
-    natural_order = cinfo->natural_order;
-
-    /* Load up working state.
-     * We can avoid loading/saving bitread state if in an EOB run.
-     */
-    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
-
-    /* There is always only one block per MCU */
-
-    if (EOBRUN > 0)		/* if it's a band of zeroes... */
-      EOBRUN--;			/* ...process it now (we do nothing) */
-    else {
-      BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
-      block = MCU_data[0];
-      tbl = entropy->ac_derived_tbl;
-
-      for (k = cinfo->Ss; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	  /* Scale and output coefficient in natural (dezigzagged) order */
-	  (*block)[natural_order[k]] = (JCOEF) (s << Al);
-	} else {
-	  if (r == 15) {	/* ZRL */
-	    k += 15;		/* skip 15 zeroes in band */
-	  } else {		/* EOBr, run length is 2^r + appended bits */
-	    EOBRUN = 1 << r;
-	    if (r) {		/* EOBr, r > 0 */
-	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	    }
-	    EOBRUN--;		/* this band is processed at this moment */
-	    break;		/* force end-of-band */
-	  }
-	}
-      }
-
-      BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
-    }
-
-    /* Completed MCU, so update state */
-    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
-  }
-
-  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component, although the spec
- * is not very clear on the point.
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
-  int blkn;
-  JBLOCKROW block;
-  BITREAD_STATE_VARS;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* Not worth the cycles to check insufficient_data here,
-   * since we will not change the data anyway if we read zeroes.
-   */
-
-  /* Load up working state */
-  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-
-    /* Encoded data is simply the next bit of the two's-complement DC value */
-    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
-    if (GET_BITS(1))
-      (*block)[0] |= p1;
-    /* Note: since we use |=, repeating the assignment later is safe */
-  }
-
-  /* Completed MCU, so update state */
-  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
-
-  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int s, k, r;
-  unsigned int EOBRUN;
-  int Se, p1, m1;
-  const int * natural_order;
-  JBLOCKROW block;
-  JCOEFPTR thiscoef;
-  BITREAD_STATE_VARS;
-  d_derived_tbl * tbl;
-  int num_newnz;
-  int newnz_pos[DCTSIZE2];
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, don't modify the MCU.
-   */
-  if (! entropy->insufficient_data) {
-
-    Se = cinfo->Se;
-    p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
-    m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
-    natural_order = cinfo->natural_order;
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
-    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
-
-    /* There is always only one block per MCU */
-    block = MCU_data[0];
-    tbl = entropy->ac_derived_tbl;
-
-    /* If we are forced to suspend, we must undo the assignments to any newly
-     * nonzero coefficients in the block, because otherwise we'd get confused
-     * next time about which coefficients were already nonzero.
-     * But we need not undo addition of bits to already-nonzero coefficients;
-     * instead, we can test the current bit to see if we already did it.
-     */
-    num_newnz = 0;
-
-    /* initialize coefficient loop counter to start of band */
-    k = cinfo->Ss;
-
-    if (EOBRUN == 0) {
-      for (; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  if (s != 1)		/* size of new coef should always be 1 */
-	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1))
-	    s = p1;		/* newly nonzero coef is positive */
-	  else
-	    s = m1;		/* newly nonzero coef is negative */
-	} else {
-	  if (r != 15) {
-	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
-	    if (r) {
-	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	    }
-	    break;		/* rest of block is handled by EOB logic */
-	  }
-	  /* note s = 0 for processing ZRL */
-	}
-	/* Advance over already-nonzero coefs and r still-zero coefs,
-	 * appending correction bits to the nonzeroes.  A correction bit is 1
-	 * if the absolute value of the coefficient must be increased.
-	 */
-	do {
-	  thiscoef = *block + natural_order[k];
-	  if (*thiscoef != 0) {
-	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	    if (GET_BITS(1)) {
-	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
-		if (*thiscoef >= 0)
-		  *thiscoef += p1;
-		else
-		  *thiscoef += m1;
-	      }
-	    }
-	  } else {
-	    if (--r < 0)
-	      break;		/* reached target zero coefficient */
-	  }
-	  k++;
-	} while (k <= Se);
-	if (s) {
-	  int pos = natural_order[k];
-	  /* Output newly nonzero coefficient */
-	  (*block)[pos] = (JCOEF) s;
-	  /* Remember its position in case we have to suspend */
-	  newnz_pos[num_newnz++] = pos;
-	}
-      }
-    }
-
-    if (EOBRUN > 0) {
-      /* Scan any remaining coefficient positions after the end-of-band
-       * (the last newly nonzero coefficient, if any).  Append a correction
-       * bit to each already-nonzero coefficient.  A correction bit is 1
-       * if the absolute value of the coefficient must be increased.
-       */
-      for (; k <= Se; k++) {
-	thiscoef = *block + natural_order[k];
-	if (*thiscoef != 0) {
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1)) {
-	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
-	      if (*thiscoef >= 0)
-		*thiscoef += p1;
-	      else
-		*thiscoef += m1;
-	    }
-	  }
-	}
-      }
-      /* Count one block completed in EOB run */
-      EOBRUN--;
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
-    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
-  }
-
-  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
-
-  return TRUE;
-
-undoit:
-  /* Re-zero any output coefficients that we made newly nonzero */
-  while (num_newnz > 0)
-    (*block)[newnz_pos[--num_newnz]] = 0;
-
-  return FALSE;
-}
-
-
-/*
- * Decode one MCU's worth of Huffman-compressed coefficients,
- * partial blocks.
- */
-
-METHODDEF(boolean)
-decode_mcu_sub (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  int Se, blkn;
-  BITREAD_STATE_VARS;
-  savable_state state;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    natural_order = cinfo->natural_order;
-    Se = cinfo->lim_Se;
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      JBLOCKROW block = MCU_data[blkn];
-      d_derived_tbl * htbl;
-      register int s, k, r;
-      int coef_limit, ci;
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      htbl = entropy->dc_cur_tbls[blkn];
-      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
-
-      htbl = entropy->ac_cur_tbls[blkn];
-      k = 1;
-      coef_limit = entropy->coef_limit[blkn];
-      if (coef_limit) {
-	/* Convert DC difference to actual value, update last_dc_val */
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	}
-	ci = cinfo->MCU_membership[blkn];
-	s += state.last_dc_val[ci];
-	state.last_dc_val[ci] = s;
-	/* Output the DC coefficient */
-	(*block)[0] = (JCOEF) s;
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* Since zeroes are skipped, output area must be cleared beforehand */
-	for (; k < coef_limit; k++) {
-	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
-
-	  r = s >> 4;
-	  s &= 15;
-
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    r = GET_BITS(s);
-	    s = HUFF_EXTEND(r, s);
-	    /* Output coefficient in natural (dezigzagged) order.
-	     * Note: the extra entries in natural_order[] will save us
-	     * if k > Se, which could happen if the data is corrupted.
-	     */
-	    (*block)[natural_order[k]] = (JCOEF) s;
-	  } else {
-	    if (r != 15)
-	      goto EndOfBlock;
-	    k += 15;
-	  }
-	}
-      } else {
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	}
-      }
-
-      /* Section F.2.2.2: decode the AC coefficients */
-      /* In this path we just discard the values */
-      for (; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
-
-	r = s >> 4;
-	s &= 15;
-
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	} else {
-	  if (r != 15)
-	    break;
-	  k += 15;
-	}
-      }
-
-      EndOfBlock: ;
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
-  }
-
-  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * Decode one MCU's worth of Huffman-compressed coefficients,
- * full-size blocks.
- */
-
-METHODDEF(boolean)
-decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int blkn;
-  BITREAD_STATE_VARS;
-  savable_state state;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      JBLOCKROW block = MCU_data[blkn];
-      d_derived_tbl * htbl;
-      register int s, k, r;
-      int coef_limit, ci;
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      htbl = entropy->dc_cur_tbls[blkn];
-      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
-
-      htbl = entropy->ac_cur_tbls[blkn];
-      k = 1;
-      coef_limit = entropy->coef_limit[blkn];
-      if (coef_limit) {
-	/* Convert DC difference to actual value, update last_dc_val */
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	}
-	ci = cinfo->MCU_membership[blkn];
-	s += state.last_dc_val[ci];
-	state.last_dc_val[ci] = s;
-	/* Output the DC coefficient */
-	(*block)[0] = (JCOEF) s;
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* Since zeroes are skipped, output area must be cleared beforehand */
-	for (; k < coef_limit; k++) {
-	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
-
-	  r = s >> 4;
-	  s &= 15;
-
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    r = GET_BITS(s);
-	    s = HUFF_EXTEND(r, s);
-	    /* Output coefficient in natural (dezigzagged) order.
-	     * Note: the extra entries in jpeg_natural_order[] will save us
-	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
-	     */
-	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
-	  } else {
-	    if (r != 15)
-	      goto EndOfBlock;
-	    k += 15;
-	  }
-	}
-      } else {
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	}
-      }
-
-      /* Section F.2.2.2: decode the AC coefficients */
-      /* In this path we just discard the values */
-      for (; k < DCTSIZE2; k++) {
-	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
-
-	r = s >> 4;
-	s &= 15;
-
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	} else {
-	  if (r != 15)
-	    break;
-	  k += 15;
-	}
-      }
-
-      EndOfBlock: ;
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
-  }
-
-  /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * Initialize for a Huffman-compressed scan.
- */
-
-METHODDEF(void)
-start_pass_huff_decoder (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci, blkn, tbl, i;
-  jpeg_component_info * compptr;
-
-  if (cinfo->progressive_mode) {
-    /* Validate progressive scan parameters */
-    if (cinfo->Ss == 0) {
-      if (cinfo->Se != 0)
-	goto bad;
-    } else {
-      /* need not check Ss/Se < 0 since they came from unsigned bytes */
-      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
-	goto bad;
-      /* AC scans may have only one component */
-      if (cinfo->comps_in_scan != 1)
-	goto bad;
-    }
-    if (cinfo->Ah != 0) {
-      /* Successive approximation refinement scan: must have Al = Ah-1. */
-      if (cinfo->Ah-1 != cinfo->Al)
-	goto bad;
-    }
-    if (cinfo->Al > 13) {	/* need not check for < 0 */
-      /* Arguably the maximum Al value should be less than 13 for 8-bit precision,
-       * but the spec doesn't say so, and we try to be liberal about what we
-       * accept.  Note: large Al values could result in out-of-range DC
-       * coefficients during early scans, leading to bizarre displays due to
-       * overflows in the IDCT math.  But we won't crash.
-       */
-      bad:
-      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
-    }
-    /* Update progression status, and verify that scan order is legal.
-     * Note that inter-scan inconsistencies are treated as warnings
-     * not fatal errors ... not clear if this is right way to behave.
-     */
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
-      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
-      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
-	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
-      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
-	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
-	if (cinfo->Ah != expected)
-	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
-	coef_bit_ptr[coefi] = cinfo->Al;
-      }
-    }
-
-    /* Select MCU decoding routine */
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_first;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_refine;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_refine;
-    }
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Make sure requested tables are present, and compute derived tables.
-       * We may build same derived table more than once, but it's not expensive.
-       */
-      if (cinfo->Ss == 0) {
-	if (cinfo->Ah == 0) {	/* DC refinement needs no table */
-	  tbl = compptr->dc_tbl_no;
-	  jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
-				  & entropy->derived_tbls[tbl]);
-	}
-      } else {
-	tbl = compptr->ac_tbl_no;
-	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
-				& entropy->derived_tbls[tbl]);
-	/* remember the single active table */
-	entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
-      }
-      /* Initialize DC predictions to 0 */
-      entropy->saved.last_dc_val[ci] = 0;
-    }
-
-    /* Initialize private state variables */
-    entropy->saved.EOBRUN = 0;
-  } else {
-    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
-     * This ought to be an error condition, but we make it a warning because
-     * there are some baseline files out there with all zeroes in these bytes.
-     */
-    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
-	((cinfo->is_baseline || cinfo->Se < DCTSIZE2) &&
-	cinfo->Se != cinfo->lim_Se))
-      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
-
-    /* Select MCU decoding routine */
-    /* We retain the hard-coded case for full-size blocks.
-     * This is not necessary, but it appears that this version is slightly
-     * more performant in the given implementation.
-     * With an improved implementation we would prefer a single optimized
-     * function.
-     */
-    if (cinfo->lim_Se != DCTSIZE2-1)
-      entropy->pub.decode_mcu = decode_mcu_sub;
-    else
-      entropy->pub.decode_mcu = decode_mcu;
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Compute derived values for Huffman tables */
-      /* We may do this more than once for a table, but it's not expensive */
-      tbl = compptr->dc_tbl_no;
-      jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
-			      & entropy->dc_derived_tbls[tbl]);
-      if (cinfo->lim_Se) {	/* AC needs no table when not present */
-	tbl = compptr->ac_tbl_no;
-	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
-				& entropy->ac_derived_tbls[tbl]);
-      }
-      /* Initialize DC predictions to 0 */
-      entropy->saved.last_dc_val[ci] = 0;
-    }
-
-    /* Precalculate decoding info for each block in an MCU of this scan */
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      ci = cinfo->MCU_membership[blkn];
-      compptr = cinfo->cur_comp_info[ci];
-      /* Precalculate which table to use for each block */
-      entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
-      entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no];
-      /* Decide whether we really care about the coefficient values */
-      if (compptr->component_needed) {
-	ci = compptr->DCT_v_scaled_size;
-	i = compptr->DCT_h_scaled_size;
-	switch (cinfo->lim_Se) {
-	case (1*1-1):
-	  entropy->coef_limit[blkn] = 1;
-	  break;
-	case (2*2-1):
-	  if (ci <= 0 || ci > 2) ci = 2;
-	  if (i <= 0 || i > 2) i = 2;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order2[ci - 1][i - 1];
-	  break;
-	case (3*3-1):
-	  if (ci <= 0 || ci > 3) ci = 3;
-	  if (i <= 0 || i > 3) i = 3;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order3[ci - 1][i - 1];
-	  break;
-	case (4*4-1):
-	  if (ci <= 0 || ci > 4) ci = 4;
-	  if (i <= 0 || i > 4) i = 4;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order4[ci - 1][i - 1];
-	  break;
-	case (5*5-1):
-	  if (ci <= 0 || ci > 5) ci = 5;
-	  if (i <= 0 || i > 5) i = 5;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order5[ci - 1][i - 1];
-	  break;
-	case (6*6-1):
-	  if (ci <= 0 || ci > 6) ci = 6;
-	  if (i <= 0 || i > 6) i = 6;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order6[ci - 1][i - 1];
-	  break;
-	case (7*7-1):
-	  if (ci <= 0 || ci > 7) ci = 7;
-	  if (i <= 0 || i > 7) i = 7;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order7[ci - 1][i - 1];
-	  break;
-	default:
-	  if (ci <= 0 || ci > 8) ci = 8;
-	  if (i <= 0 || i > 8) i = 8;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order[ci - 1][i - 1];
-	  break;
-	}
-      } else {
-	entropy->coef_limit[blkn] = 0;
-      }
-    }
-  }
-
-  /* Initialize bitread state variables */
-  entropy->bitstate.bits_left = 0;
-  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
-  entropy->insufficient_data = FALSE;
-
-  /* Initialize restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-}
-
-
-/*
- * Module initialization routine for Huffman entropy decoding.
- */
-
-GLOBAL(void)
-jinit_huff_decoder (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy;
-  int i;
-
-  entropy = (huff_entropy_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(huff_entropy_decoder));
-  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
-  entropy->pub.start_pass = start_pass_huff_decoder;
-
-  if (cinfo->progressive_mode) {
-    /* Create progression status table */
-    int *coef_bit_ptr, ci;
-    cinfo->coef_bits = (int (*)[DCTSIZE2])
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  cinfo->num_components*DCTSIZE2*SIZEOF(int));
-    coef_bit_ptr = & cinfo->coef_bits[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++)
-      for (i = 0; i < DCTSIZE2; i++)
-	*coef_bit_ptr++ = -1;
-
-    /* Mark derived tables unallocated */
-    for (i = 0; i < NUM_HUFF_TBLS; i++) {
-      entropy->derived_tbls[i] = NULL;
-    }
-  } else {
-    /* Mark tables unallocated */
-    for (i = 0; i < NUM_HUFF_TBLS; i++) {
-      entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
-    }
-  }
-}
+/*
+ * jdhuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2006-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy decoding routines.
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Derived data constructed for each Huffman table */
+
+#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
+
+typedef struct {
+  /* Basic tables: (element [0] of each array is unused) */
+  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
+  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
+  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
+  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
+   * the smallest code of length k; so given a code of length k, the
+   * corresponding symbol is huffval[code + valoffset[k]]
+   */
+
+  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
+  JHUFF_TBL *pub;
+
+  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+   * the input data stream.  If the next Huffman code is no more
+   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
+   * the corresponding symbol directly from these tables.
+   */
+  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
+  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+} d_derived_tbl;
+
+
+/*
+ * Fetching the next N bits from the input stream is a time-critical operation
+ * for the Huffman decoders.  We implement it with a combination of inline
+ * macros and out-of-line subroutines.  Note that N (the number of bits
+ * demanded at one time) never exceeds 15 for JPEG use.
+ *
+ * We read source bytes into get_buffer and dole out bits as needed.
+ * If get_buffer already contains enough bits, they are fetched in-line
+ * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
+ * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
+ * as full as possible (not just to the number of bits needed; this
+ * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
+ * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
+ * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
+ * at least the requested number of bits --- dummy zeroes are inserted if
+ * necessary.
+ */
+
+typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  32	/* size of buffer in bits */
+
+/* If long is > 32 bits on your machine, and shifting/masking longs is
+ * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
+ * appropriately should be a win.  Unfortunately we can't define the size
+ * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
+ * because not all machines measure sizeof in 8-bit bytes.
+ */
+
+typedef struct {		/* Bitreading state saved across MCUs */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+} bitread_perm_state;
+
+typedef struct {		/* Bitreading working state within an MCU */
+  /* Current data source location */
+  /* We need a copy, rather than munging the original, in case of suspension */
+  const JOCTET * next_input_byte; /* => next byte to read from source */
+  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
+  /* Bit input buffer --- note these values are kept in register variables,
+   * not in this struct, inside the inner loops.
+   */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+  /* Pointer needed by jpeg_fill_bit_buffer. */
+  j_decompress_ptr cinfo;	/* back link to decompress master record */
+} bitread_working_state;
+
+/* Macros to declare and load/save bitread local variables. */
+#define BITREAD_STATE_VARS  \
+	register bit_buf_type get_buffer;  \
+	register int bits_left;  \
+	bitread_working_state br_state
+
+#define BITREAD_LOAD_STATE(cinfop,permstate)  \
+	br_state.cinfo = cinfop; \
+	br_state.next_input_byte = cinfop->src->next_input_byte; \
+	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
+	get_buffer = permstate.get_buffer; \
+	bits_left = permstate.bits_left;
+
+#define BITREAD_SAVE_STATE(cinfop,permstate)  \
+	cinfop->src->next_input_byte = br_state.next_input_byte; \
+	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
+	permstate.get_buffer = get_buffer; \
+	permstate.bits_left = bits_left
+
+/*
+ * These macros provide the in-line portion of bit fetching.
+ * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
+ * before using GET_BITS, PEEK_BITS, or DROP_BITS.
+ * The variables get_buffer and bits_left are assumed to be locals,
+ * but the state struct might not be (jpeg_huff_decode needs this).
+ *	CHECK_BIT_BUFFER(state,n,action);
+ *		Ensure there are N bits in get_buffer; if suspend, take action.
+ *      val = GET_BITS(n);
+ *		Fetch next N bits.
+ *      val = PEEK_BITS(n);
+ *		Fetch next N bits without removing them from the buffer.
+ *	DROP_BITS(n);
+ *		Discard next N bits.
+ * The value N should be a simple variable, not an expression, because it
+ * is evaluated multiple times.
+ */
+
+#define CHECK_BIT_BUFFER(state,nbits,action) \
+	{ if (bits_left < (nbits)) {  \
+	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
+	      { action; }  \
+	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
+
+#define GET_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -= (nbits)))) & BIT_MASK(nbits))
+
+#define PEEK_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -  (nbits)))) & BIT_MASK(nbits))
+
+#define DROP_BITS(nbits) \
+	(bits_left -= (nbits))
+
+
+/*
+ * Code for extracting next Huffman-coded symbol from input bit stream.
+ * Again, this is time-critical and we make the main paths be macros.
+ *
+ * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
+ * without looping.  Usually, more than 95% of the Huffman codes will be 8
+ * or fewer bits long.  The few overlength codes are handled with a loop,
+ * which need not be inline code.
+ *
+ * Notes about the HUFF_DECODE macro:
+ * 1. Near the end of the data segment, we may fail to get enough bits
+ *    for a lookahead.  In that case, we do it the hard way.
+ * 2. If the lookahead table contains no entry, the next code must be
+ *    more than HUFF_LOOKAHEAD bits long.
+ * 3. jpeg_huff_decode returns -1 if forced to suspend.
+ */
+
+#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
+{ register int nb, look; \
+  if (bits_left < HUFF_LOOKAHEAD) { \
+    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+    if (bits_left < HUFF_LOOKAHEAD) { \
+      nb = 1; goto slowlabel; \
+    } \
+  } \
+  look = PEEK_BITS(HUFF_LOOKAHEAD); \
+  if ((nb = htbl->look_nbits[look]) != 0) { \
+    DROP_BITS(nb); \
+    result = htbl->look_sym[look]; \
+  } else { \
+    nb = HUFF_LOOKAHEAD+1; \
+slowlabel: \
+    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
+	{ failaction; } \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+  } \
+}
+
+
+/*
+ * Expanded entropy decoder object for Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
+  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).EOBRUN = (src).EOBRUN, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  boolean insufficient_data;	/* set TRUE after emitting warning */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Following two fields used only in progressive mode */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
+
+  d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
+
+  /* Following fields used only in sequential mode */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+  /* Precalculated info set up by start_pass for use in decode_mcu: */
+
+  /* Pointers to derived tables to be used for each block within an MCU */
+  d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  /* Whether we care about the DC and AC coefficient values for each block */
+  int coef_limit[D_MAX_BLOCKS_IN_MCU];
+} huff_entropy_decoder;
+
+typedef huff_entropy_decoder * huff_entropy_ptr;
+
+
+static const int jpeg_zigzag_order[8][8] = {
+  {  0,  1,  5,  6, 14, 15, 27, 28 },
+  {  2,  4,  7, 13, 16, 26, 29, 42 },
+  {  3,  8, 12, 17, 25, 30, 41, 43 },
+  {  9, 11, 18, 24, 31, 40, 44, 53 },
+  { 10, 19, 23, 32, 39, 45, 52, 54 },
+  { 20, 22, 33, 38, 46, 51, 55, 60 },
+  { 21, 34, 37, 47, 50, 56, 59, 61 },
+  { 35, 36, 48, 49, 57, 58, 62, 63 }
+};
+
+static const int jpeg_zigzag_order7[7][7] = {
+  {  0,  1,  5,  6, 14, 15, 27 },
+  {  2,  4,  7, 13, 16, 26, 28 },
+  {  3,  8, 12, 17, 25, 29, 38 },
+  {  9, 11, 18, 24, 30, 37, 39 },
+  { 10, 19, 23, 31, 36, 40, 45 },
+  { 20, 22, 32, 35, 41, 44, 46 },
+  { 21, 33, 34, 42, 43, 47, 48 }
+};
+
+static const int jpeg_zigzag_order6[6][6] = {
+  {  0,  1,  5,  6, 14, 15 },
+  {  2,  4,  7, 13, 16, 25 },
+  {  3,  8, 12, 17, 24, 26 },
+  {  9, 11, 18, 23, 27, 32 },
+  { 10, 19, 22, 28, 31, 33 },
+  { 20, 21, 29, 30, 34, 35 }
+};
+
+static const int jpeg_zigzag_order5[5][5] = {
+  {  0,  1,  5,  6, 14 },
+  {  2,  4,  7, 13, 15 },
+  {  3,  8, 12, 16, 21 },
+  {  9, 11, 17, 20, 22 },
+  { 10, 18, 19, 23, 24 }
+};
+
+static const int jpeg_zigzag_order4[4][4] = {
+  { 0,  1,  5,  6 },
+  { 2,  4,  7, 12 },
+  { 3,  8, 11, 13 },
+  { 9, 10, 14, 15 }
+};
+
+static const int jpeg_zigzag_order3[3][3] = {
+  { 0, 1, 5 },
+  { 2, 4, 6 },
+  { 3, 7, 8 }
+};
+
+static const int jpeg_zigzag_order2[2][2] = {
+  { 0, 1 },
+  { 2, 3 }
+};
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ */
+
+LOCAL(void)
+jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
+			 d_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  d_derived_tbl *dtbl;
+  int p, i, l, si, numsymbols;
+  int lookbits, ctr;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (d_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(d_derived_tbl));
+  dtbl = *pdtbl;
+  dtbl->pub = htbl;		/* fill in back link */
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  numsymbols = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+  
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+
+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    if (htbl->bits[l]) {
+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
+       * minus the minimum code of length l
+       */
+      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
+      p += htbl->bits[l];
+      dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
+    } else {
+      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
+    }
+  }
+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
+
+  /* Compute lookahead tables to speed up decoding.
+   * First we set all the table entries to 0, indicating "too long";
+   * then we iterate through the Huffman codes that are short enough and
+   * fill in all the entries that correspond to bit sequences starting
+   * with that code.
+   */
+
+  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
+
+  p = 0;
+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
+    for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
+      /* Generate left-justified code followed by all possible bit sequences */
+      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
+      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
+	dtbl->look_nbits[lookbits] = l;
+	dtbl->look_sym[lookbits] = htbl->huffval[p];
+	lookbits++;
+      }
+    }
+  }
+
+  /* Validate symbols as being reasonable.
+   * For AC tables, we make no check, but accept all byte values 0..255.
+   * For DC tables, we require the symbols to be in range 0..15.
+   * (Tighter bounds could be applied depending on the data depth and mode,
+   * but this is sufficient to ensure safe decoding.)
+   */
+  if (isDC) {
+    for (i = 0; i < numsymbols; i++) {
+      int sym = htbl->huffval[i];
+      if (sym < 0 || sym > 15)
+	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    }
+  }
+}
+
+
+/*
+ * Out-of-line code for bit fetching.
+ * Note: current values of get_buffer and bits_left are passed as parameters,
+ * but are returned in the corresponding fields of the state struct.
+ *
+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
+ * of get_buffer to be used.  (On machines with wider words, an even larger
+ * buffer could be used.)  However, on some machines 32-bit shifts are
+ * quite slow and take time proportional to the number of places shifted.
+ * (This is true with most PC compilers, for instance.)  In this case it may
+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
+ * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
+ */
+
+#ifdef SLOW_SHIFT_32
+#define MIN_GET_BITS  15	/* minimum allowable value */
+#else
+#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
+#endif
+
+
+LOCAL(boolean)
+jpeg_fill_bit_buffer (bitread_working_state * state,
+		      register bit_buf_type get_buffer, register int bits_left,
+		      int nbits)
+/* Load up the bit buffer to a depth of at least nbits */
+{
+  /* Copy heavily used state fields into locals (hopefully registers) */
+  register const JOCTET * next_input_byte = state->next_input_byte;
+  register size_t bytes_in_buffer = state->bytes_in_buffer;
+  j_decompress_ptr cinfo = state->cinfo;
+
+  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
+  /* (It is assumed that no request will be for more than that many bits.) */
+  /* We fail to do so only if we hit a marker or are forced to suspend. */
+
+  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
+    while (bits_left < MIN_GET_BITS) {
+      register int c;
+
+      /* Attempt to read a byte */
+      if (bytes_in_buffer == 0) {
+	if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	  return FALSE;
+	next_input_byte = cinfo->src->next_input_byte;
+	bytes_in_buffer = cinfo->src->bytes_in_buffer;
+      }
+      bytes_in_buffer--;
+      c = GETJOCTET(*next_input_byte++);
+
+      /* If it's 0xFF, check and discard stuffed zero byte */
+      if (c == 0xFF) {
+	/* Loop here to discard any padding FF's on terminating marker,
+	 * so that we can save a valid unread_marker value.  NOTE: we will
+	 * accept multiple FF's followed by a 0 as meaning a single FF data
+	 * byte.  This data pattern is not valid according to the standard.
+	 */
+	do {
+	  if (bytes_in_buffer == 0) {
+	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	      return FALSE;
+	    next_input_byte = cinfo->src->next_input_byte;
+	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
+	  }
+	  bytes_in_buffer--;
+	  c = GETJOCTET(*next_input_byte++);
+	} while (c == 0xFF);
+
+	if (c == 0) {
+	  /* Found FF/00, which represents an FF data byte */
+	  c = 0xFF;
+	} else {
+	  /* Oops, it's actually a marker indicating end of compressed data.
+	   * Save the marker code for later use.
+	   * Fine point: it might appear that we should save the marker into
+	   * bitread working state, not straight into permanent state.  But
+	   * once we have hit a marker, we cannot need to suspend within the
+	   * current MCU, because we will read no more bytes from the data
+	   * source.  So it is OK to update permanent state right away.
+	   */
+	  cinfo->unread_marker = c;
+	  /* See if we need to insert some fake zero bits. */
+	  goto no_more_bytes;
+	}
+      }
+
+      /* OK, load c into get_buffer */
+      get_buffer = (get_buffer << 8) | c;
+      bits_left += 8;
+    } /* end while */
+  } else {
+  no_more_bytes:
+    /* We get here if we've read the marker that terminates the compressed
+     * data segment.  There should be enough bits in the buffer register
+     * to satisfy the request; if so, no problem.
+     */
+    if (nbits > bits_left) {
+      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
+       * the data stream, so that we can produce some kind of image.
+       * We use a nonvolatile flag to ensure that only one warning message
+       * appears per data segment.
+       */
+      if (! ((huff_entropy_ptr) cinfo->entropy)->insufficient_data) {
+	WARNMS(cinfo, JWRN_HIT_MARKER);
+	((huff_entropy_ptr) cinfo->entropy)->insufficient_data = TRUE;
+      }
+      /* Fill the buffer with zero bits */
+      get_buffer <<= MIN_GET_BITS - bits_left;
+      bits_left = MIN_GET_BITS;
+    }
+  }
+
+  /* Unload the local registers */
+  state->next_input_byte = next_input_byte;
+  state->bytes_in_buffer = bytes_in_buffer;
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  return TRUE;
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and sub will be faster than a table lookup.
+ */
+
+#ifdef AVOID_TABLES
+
+#define BIT_MASK(nbits)   ((1<<(nbits))-1)
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) - ((1<<(s))-1) : (x))
+
+#else
+
+#define BIT_MASK(nbits)   bmask[nbits]
+#define HUFF_EXTEND(x,s)  ((x) <= bmask[(s) - 1] ? (x) - bmask[s] : (x))
+
+static const int bmask[16] =	/* bmask[n] is mask for n rightmost bits */
+  { 0, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
+    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF };
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Out-of-line code for Huffman code decoding.
+ */
+
+LOCAL(int)
+jpeg_huff_decode (bitread_working_state * state,
+		  register bit_buf_type get_buffer, register int bits_left,
+		  d_derived_tbl * htbl, int min_bits)
+{
+  register int l = min_bits;
+  register INT32 code;
+
+  /* HUFF_DECODE has determined that the code is at least min_bits */
+  /* bits long, so fetch that many bits in one swoop. */
+
+  CHECK_BIT_BUFFER(*state, l, return -1);
+  code = GET_BITS(l);
+
+  /* Collect the rest of the Huffman code one bit at a time. */
+  /* This is per Figure F.16 in the JPEG spec. */
+
+  while (code > htbl->maxcode[l]) {
+    code <<= 1;
+    CHECK_BIT_BUFFER(*state, 1, return -1);
+    code |= GET_BITS(1);
+    l++;
+  }
+
+  /* Unload the local registers */
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  /* With garbage input we may reach the sentinel value l = 17. */
+
+  if (l > 16) {
+    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
+    return 0;			/* fake a zero as the safest result */
+  }
+
+  return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
+}
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+  /* Re-init EOB run count, too */
+  entropy->saved.EOBRUN = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Huffman MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * Huffman-compressed coefficients. 
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ * (Wholesale zeroing is usually a little faster than retail...)
+ *
+ * We return FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * spectral selection, since we'll just re-assign them on the next call.
+ * Successive approximation AC refinement has to be more careful, however.)
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int Al = cinfo->Al;
+  register int s, r;
+  int blkn, ci;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  savable_state state;
+  d_derived_tbl * tbl;
+  jpeg_component_info * compptr;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      block = MCU_data[blkn];
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      /* Convert DC difference to actual value, update last_dc_val */
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
+      (*block)[0] = (JCOEF) (s << Al);
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  int Se, Al;
+  const int * natural_order;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    Se = cinfo->Se;
+    Al = cinfo->Al;
+    natural_order = cinfo->natural_order;
+
+    /* Load up working state.
+     * We can avoid loading/saving bitread state if in an EOB run.
+     */
+    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+
+    if (EOBRUN)			/* if it's a band of zeroes... */
+      EOBRUN--;			/* ...process it now (we do nothing) */
+    else {
+      BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+      block = MCU_data[0];
+      tbl = entropy->ac_derived_tbl;
+
+      for (k = cinfo->Ss; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	  /* Scale and output coefficient in natural (dezigzagged) order */
+	  (*block)[natural_order[k]] = (JCOEF) (s << Al);
+	} else {
+	  if (r != 15) {	/* EOBr, run length is 2^r + appended bits */
+	    if (r) {		/* EOBr, r > 0 */
+	      EOBRUN = 1 << r;
+	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	      EOBRUN--;		/* this band is processed at this moment */
+	    }
+	    break;		/* force end-of-band */
+	  }
+	  k += 15;		/* ZRL: skip 15 zeroes in band */
+	}
+      }
+
+      BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    }
+
+    /* Completed MCU, so update state */
+    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+  int blkn;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* Not worth the cycles to check insufficient_data here,
+   * since we will not change the data anyway if we read zeroes.
+   */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
+    if (GET_BITS(1))
+      (*block)[0] |= p1;
+    /* Note: since we use |=, repeating the assignment later is safe */
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  int Se, p1, m1;
+  const int * natural_order;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+  int num_newnz;
+  int newnz_pos[DCTSIZE2];
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, don't modify the MCU.
+   */
+  if (! entropy->insufficient_data) {
+
+    Se = cinfo->Se;
+    p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+    m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+    natural_order = cinfo->natural_order;
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+    block = MCU_data[0];
+    tbl = entropy->ac_derived_tbl;
+
+    /* If we are forced to suspend, we must undo the assignments to any newly
+     * nonzero coefficients in the block, because otherwise we'd get confused
+     * next time about which coefficients were already nonzero.
+     * But we need not undo addition of bits to already-nonzero coefficients;
+     * instead, we can test the current bit to see if we already did it.
+     */
+    num_newnz = 0;
+
+    /* initialize coefficient loop counter to start of band */
+    k = cinfo->Ss;
+
+    if (EOBRUN == 0) {
+      do {
+	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  if (s != 1)		/* size of new coef should always be 1 */
+	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1))
+	    s = p1;		/* newly nonzero coef is positive */
+	  else
+	    s = m1;		/* newly nonzero coef is negative */
+	} else {
+	  if (r != 15) {
+	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
+	    if (r) {
+	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	    }
+	    break;		/* rest of block is handled by EOB logic */
+	  }
+	  /* note s = 0 for processing ZRL */
+	}
+	/* Advance over already-nonzero coefs and r still-zero coefs,
+	 * appending correction bits to the nonzeroes.  A correction bit is 1
+	 * if the absolute value of the coefficient must be increased.
+	 */
+	do {
+	  thiscoef = *block + natural_order[k];
+	  if (*thiscoef) {
+	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	    if (GET_BITS(1)) {
+	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
+		if (*thiscoef >= 0)
+		  *thiscoef += p1;
+		else
+		  *thiscoef += m1;
+	      }
+	    }
+	  } else {
+	    if (--r < 0)
+	      break;		/* reached target zero coefficient */
+	  }
+	  k++;
+	} while (k <= Se);
+	if (s) {
+	  int pos = natural_order[k];
+	  /* Output newly nonzero coefficient */
+	  (*block)[pos] = (JCOEF) s;
+	  /* Remember its position in case we have to suspend */
+	  newnz_pos[num_newnz++] = pos;
+	}
+	k++;
+      } while (k <= Se);
+    }
+
+    if (EOBRUN) {
+      /* Scan any remaining coefficient positions after the end-of-band
+       * (the last newly nonzero coefficient, if any).  Append a correction
+       * bit to each already-nonzero coefficient.  A correction bit is 1
+       * if the absolute value of the coefficient must be increased.
+       */
+      do {
+	thiscoef = *block + natural_order[k];
+	if (*thiscoef) {
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1)) {
+	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
+	      if (*thiscoef >= 0)
+		*thiscoef += p1;
+	      else
+		*thiscoef += m1;
+	    }
+	  }
+	}
+	k++;
+      } while (k <= Se);
+      /* Count one block completed in EOB run */
+      EOBRUN--;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+
+undoit:
+  /* Re-zero any output coefficients that we made newly nonzero */
+  while (num_newnz)
+    (*block)[newnz_pos[--num_newnz]] = 0;
+
+  return FALSE;
+}
+
+
+/*
+ * Decode one MCU's worth of Huffman-compressed coefficients,
+ * partial blocks.
+ */
+
+METHODDEF(boolean)
+decode_mcu_sub (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  int Se, blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    natural_order = cinfo->natural_order;
+    Se = cinfo->lim_Se;
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      JBLOCKROW block = MCU_data[blkn];
+      d_derived_tbl * htbl;
+      register int s, k, r;
+      int coef_limit, ci;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      htbl = entropy->dc_cur_tbls[blkn];
+      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
+
+      htbl = entropy->ac_cur_tbls[blkn];
+      k = 1;
+      coef_limit = entropy->coef_limit[blkn];
+      if (coef_limit) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	}
+	ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+	/* Output the DC coefficient */
+	(*block)[0] = (JCOEF) s;
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* Since zeroes are skipped, output area must be cleared beforehand */
+	for (; k < coef_limit; k++) {
+	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
+
+	  r = s >> 4;
+	  s &= 15;
+
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    r = GET_BITS(s);
+	    s = HUFF_EXTEND(r, s);
+	    /* Output coefficient in natural (dezigzagged) order.
+	     * Note: the extra entries in natural_order[] will save us
+	     * if k > Se, which could happen if the data is corrupted.
+	     */
+	    (*block)[natural_order[k]] = (JCOEF) s;
+	  } else {
+	    if (r != 15)
+	      goto EndOfBlock;
+	    k += 15;
+	  }
+	}
+      } else {
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	}
+      }
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* In this path we just discard the values */
+      for (; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
+
+	r = s >> 4;
+	s &= 15;
+
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	} else {
+	  if (r != 15)
+	    break;
+	  k += 15;
+	}
+      }
+
+      EndOfBlock: ;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Decode one MCU's worth of Huffman-compressed coefficients,
+ * full-size blocks.
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      JBLOCKROW block = MCU_data[blkn];
+      d_derived_tbl * htbl;
+      register int s, k, r;
+      int coef_limit, ci;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      htbl = entropy->dc_cur_tbls[blkn];
+      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
+
+      htbl = entropy->ac_cur_tbls[blkn];
+      k = 1;
+      coef_limit = entropy->coef_limit[blkn];
+      if (coef_limit) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	}
+	ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+	/* Output the DC coefficient */
+	(*block)[0] = (JCOEF) s;
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* Since zeroes are skipped, output area must be cleared beforehand */
+	for (; k < coef_limit; k++) {
+	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
+
+	  r = s >> 4;
+	  s &= 15;
+
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    r = GET_BITS(s);
+	    s = HUFF_EXTEND(r, s);
+	    /* Output coefficient in natural (dezigzagged) order.
+	     * Note: the extra entries in jpeg_natural_order[] will save us
+	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
+	     */
+	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+	  } else {
+	    if (r != 15)
+	      goto EndOfBlock;
+	    k += 15;
+	  }
+	}
+      } else {
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	}
+      }
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* In this path we just discard the values */
+      for (; k < DCTSIZE2; k++) {
+	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
+
+	r = s >> 4;
+	s &= 15;
+
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	} else {
+	  if (r != 15)
+	    break;
+	  k += 15;
+	}
+      }
+
+      EndOfBlock: ;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, blkn, tbl, i;
+  jpeg_component_info * compptr;
+
+  if (cinfo->progressive_mode) {
+    /* Validate progressive scan parameters */
+    if (cinfo->Ss == 0) {
+      if (cinfo->Se != 0)
+	goto bad;
+    } else {
+      /* need not check Ss/Se < 0 since they came from unsigned bytes */
+      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
+	goto bad;
+      /* AC scans may have only one component */
+      if (cinfo->comps_in_scan != 1)
+	goto bad;
+    }
+    if (cinfo->Ah != 0) {
+      /* Successive approximation refinement scan: must have Al = Ah-1. */
+      if (cinfo->Ah-1 != cinfo->Al)
+	goto bad;
+    }
+    if (cinfo->Al > 13) {	/* need not check for < 0 */
+      /* Arguably the maximum Al value should be less than 13 for 8-bit precision,
+       * but the spec doesn't say so, and we try to be liberal about what we
+       * accept.  Note: large Al values could result in out-of-range DC
+       * coefficients during early scans, leading to bizarre displays due to
+       * overflows in the IDCT math.  But we won't crash.
+       */
+      bad:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+    }
+    /* Update progression status, and verify that scan order is legal.
+     * Note that inter-scan inconsistencies are treated as warnings
+     * not fatal errors ... not clear if this is right way to behave.
+     */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
+      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+	if (cinfo->Ah != expected)
+	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+	coef_bit_ptr[coefi] = cinfo->Al;
+      }
+    }
+
+    /* Select MCU decoding routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_first;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_refine;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_refine;
+    }
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Make sure requested tables are present, and compute derived tables.
+       * We may build same derived table more than once, but it's not expensive.
+       */
+      if (cinfo->Ss == 0) {
+	if (cinfo->Ah == 0) {	/* DC refinement needs no table */
+	  tbl = compptr->dc_tbl_no;
+	  jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+				  & entropy->derived_tbls[tbl]);
+	}
+      } else {
+	tbl = compptr->ac_tbl_no;
+	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
+				& entropy->derived_tbls[tbl]);
+	/* remember the single active table */
+	entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
+      }
+      /* Initialize DC predictions to 0 */
+      entropy->saved.last_dc_val[ci] = 0;
+    }
+
+    /* Initialize private state variables */
+    entropy->saved.EOBRUN = 0;
+  } else {
+    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+     * This ought to be an error condition, but we make it a warning because
+     * there are some baseline files out there with all zeroes in these bytes.
+     */
+    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
+	((cinfo->is_baseline || cinfo->Se < DCTSIZE2) &&
+	cinfo->Se != cinfo->lim_Se))
+      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+
+    /* Select MCU decoding routine */
+    /* We retain the hard-coded case for full-size blocks.
+     * This is not necessary, but it appears that this version is slightly
+     * more performant in the given implementation.
+     * With an improved implementation we would prefer a single optimized
+     * function.
+     */
+    if (cinfo->lim_Se != DCTSIZE2-1)
+      entropy->pub.decode_mcu = decode_mcu_sub;
+    else
+      entropy->pub.decode_mcu = decode_mcu;
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Compute derived values for Huffman tables */
+      /* We may do this more than once for a table, but it's not expensive */
+      tbl = compptr->dc_tbl_no;
+      jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+			      & entropy->dc_derived_tbls[tbl]);
+      if (cinfo->lim_Se) {	/* AC needs no table when not present */
+	tbl = compptr->ac_tbl_no;
+	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
+				& entropy->ac_derived_tbls[tbl]);
+      }
+      /* Initialize DC predictions to 0 */
+      entropy->saved.last_dc_val[ci] = 0;
+    }
+
+    /* Precalculate decoding info for each block in an MCU of this scan */
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      /* Precalculate which table to use for each block */
+      entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
+      entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no];
+      /* Decide whether we really care about the coefficient values */
+      if (compptr->component_needed) {
+	ci = compptr->DCT_v_scaled_size;
+	i = compptr->DCT_h_scaled_size;
+	switch (cinfo->lim_Se) {
+	case (1*1-1):
+	  entropy->coef_limit[blkn] = 1;
+	  break;
+	case (2*2-1):
+	  if (ci <= 0 || ci > 2) ci = 2;
+	  if (i <= 0 || i > 2) i = 2;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order2[ci - 1][i - 1];
+	  break;
+	case (3*3-1):
+	  if (ci <= 0 || ci > 3) ci = 3;
+	  if (i <= 0 || i > 3) i = 3;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order3[ci - 1][i - 1];
+	  break;
+	case (4*4-1):
+	  if (ci <= 0 || ci > 4) ci = 4;
+	  if (i <= 0 || i > 4) i = 4;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order4[ci - 1][i - 1];
+	  break;
+	case (5*5-1):
+	  if (ci <= 0 || ci > 5) ci = 5;
+	  if (i <= 0 || i > 5) i = 5;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order5[ci - 1][i - 1];
+	  break;
+	case (6*6-1):
+	  if (ci <= 0 || ci > 6) ci = 6;
+	  if (i <= 0 || i > 6) i = 6;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order6[ci - 1][i - 1];
+	  break;
+	case (7*7-1):
+	  if (ci <= 0 || ci > 7) ci = 7;
+	  if (i <= 0 || i > 7) i = 7;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order7[ci - 1][i - 1];
+	  break;
+	default:
+	  if (ci <= 0 || ci > 8) ci = 8;
+	  if (i <= 0 || i > 8) i = 8;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order[ci - 1][i - 1];
+	  break;
+	}
+      } else {
+	entropy->coef_limit[blkn] = 0;
+      }
+    }
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->insufficient_data = FALSE;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Module initialization routine for Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(huff_entropy_decoder));
+  cinfo->entropy = &entropy->pub;
+  entropy->pub.start_pass = start_pass_huff_decoder;
+
+  if (cinfo->progressive_mode) {
+    /* Create progression status table */
+    int *coef_bit_ptr, ci;
+    cinfo->coef_bits = (int (*)[DCTSIZE2])
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  cinfo->num_components*DCTSIZE2*SIZEOF(int));
+    coef_bit_ptr = & cinfo->coef_bits[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      for (i = 0; i < DCTSIZE2; i++)
+	*coef_bit_ptr++ = -1;
+
+    /* Mark derived tables unallocated */
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      entropy->derived_tbls[i] = NULL;
+    }
+  } else {
+    /* Mark tables unallocated */
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+    }
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdinput.c b/plugins/AdvaImg/src/LibJPEG/jdinput.c
index 2c5c717b9c..de6f7ed8e9 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdinput.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdinput.c
@@ -1,661 +1,661 @@
-/*
- * jdinput.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains input control logic for the JPEG decompressor.
- * These routines are concerned with controlling the decompressor's input
- * processing (marker reading and coefficient decoding).  The actual input
- * reading is done in jdmarker.c, jdhuff.c, and jdarith.c.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_input_controller pub; /* public fields */
-
-  int inheaders;		/* Nonzero until first SOS is reached */
-} my_input_controller;
-
-typedef my_input_controller * my_inputctl_ptr;
-
-
-/* Forward declarations */
-METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
-
-
-/*
- * Routines to calculate various quantities related to the size of the image.
- */
-
-
-/*
- * Compute output image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- */
-
-GLOBAL(void)
-jpeg_core_output_dimensions (j_decompress_ptr cinfo)
-/* Do computations that are needed before master selection phase.
- * This function is used for transcoding and full decompression.
- */
-{
-#ifdef IDCT_SCALING_SUPPORTED
-  int ci;
-  jpeg_component_info *compptr;
-
-  /* Compute actual output image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) {
-    /* Provide 1/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 1;
-    cinfo->min_DCT_v_scaled_size = 1;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) {
-    /* Provide 2/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 2;
-    cinfo->min_DCT_v_scaled_size = 2;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 3) {
-    /* Provide 3/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 3L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 3L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 3;
-    cinfo->min_DCT_v_scaled_size = 3;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) {
-    /* Provide 4/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 4;
-    cinfo->min_DCT_v_scaled_size = 4;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 5) {
-    /* Provide 5/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 5L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 5L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 5;
-    cinfo->min_DCT_v_scaled_size = 5;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 6) {
-    /* Provide 6/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 6L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 6L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 6;
-    cinfo->min_DCT_v_scaled_size = 6;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 7) {
-    /* Provide 7/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 7L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 7L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 7;
-    cinfo->min_DCT_v_scaled_size = 7;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) {
-    /* Provide 8/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 8;
-    cinfo->min_DCT_v_scaled_size = 8;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 9) {
-    /* Provide 9/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 9L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 9L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 9;
-    cinfo->min_DCT_v_scaled_size = 9;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 10) {
-    /* Provide 10/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 10L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 10L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 10;
-    cinfo->min_DCT_v_scaled_size = 10;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 11) {
-    /* Provide 11/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 11L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 11L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 11;
-    cinfo->min_DCT_v_scaled_size = 11;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 12) {
-    /* Provide 12/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 12L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 12L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 12;
-    cinfo->min_DCT_v_scaled_size = 12;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 13) {
-    /* Provide 13/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 13L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 13L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 13;
-    cinfo->min_DCT_v_scaled_size = 13;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 14) {
-    /* Provide 14/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 14L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 14L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 14;
-    cinfo->min_DCT_v_scaled_size = 14;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 15) {
-    /* Provide 15/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 15L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 15L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 15;
-    cinfo->min_DCT_v_scaled_size = 15;
-  } else {
-    /* Provide 16/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 16L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 16L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 16;
-    cinfo->min_DCT_v_scaled_size = 16;
-  }
-
-  /* Recompute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size;
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size;
-  }
-
-#else /* !IDCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->output_width = cinfo->image_width;
-  cinfo->output_height = cinfo->image_height;
-  /* jdinput.c has already initialized DCT_scaled_size,
-   * and has computed unscaled downsampled_width and downsampled_height.
-   */
-
-#endif /* IDCT_SCALING_SUPPORTED */
-}
-
-
-LOCAL(void)
-initial_setup (j_decompress_ptr cinfo)
-/* Called once, when first SOS marker is reached */
-{
-  int ci;
-  jpeg_component_info *compptr;
-
-  /* Make sure image isn't bigger than I can handle */
-  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
-      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Check that number of components won't exceed internal array sizes */
-  if (cinfo->num_components > MAX_COMPONENTS)
-    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
-
-  /* Compute maximum sampling factors; check factor validity */
-  cinfo->max_h_samp_factor = 1;
-  cinfo->max_v_samp_factor = 1;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
-      ERREXIT(cinfo, JERR_BAD_SAMPLING);
-    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
-    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
-  }
-
-  /* Derive block_size, natural_order, and lim_Se */
-  if (cinfo->is_baseline || (cinfo->progressive_mode &&
-      cinfo->comps_in_scan)) { /* no pseudo SOS marker */
-    cinfo->block_size = DCTSIZE;
-    cinfo->natural_order = jpeg_natural_order;
-    cinfo->lim_Se = DCTSIZE2-1;
-  } else
-    switch (cinfo->Se) {
-    case (1*1-1):
-      cinfo->block_size = 1;
-      cinfo->natural_order = jpeg_natural_order; /* not needed */
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (2*2-1):
-      cinfo->block_size = 2;
-      cinfo->natural_order = jpeg_natural_order2;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (3*3-1):
-      cinfo->block_size = 3;
-      cinfo->natural_order = jpeg_natural_order3;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (4*4-1):
-      cinfo->block_size = 4;
-      cinfo->natural_order = jpeg_natural_order4;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (5*5-1):
-      cinfo->block_size = 5;
-      cinfo->natural_order = jpeg_natural_order5;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (6*6-1):
-      cinfo->block_size = 6;
-      cinfo->natural_order = jpeg_natural_order6;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (7*7-1):
-      cinfo->block_size = 7;
-      cinfo->natural_order = jpeg_natural_order7;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (8*8-1):
-      cinfo->block_size = 8;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (9*9-1):
-      cinfo->block_size = 9;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (10*10-1):
-      cinfo->block_size = 10;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (11*11-1):
-      cinfo->block_size = 11;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (12*12-1):
-      cinfo->block_size = 12;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (13*13-1):
-      cinfo->block_size = 13;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (14*14-1):
-      cinfo->block_size = 14;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (15*15-1):
-      cinfo->block_size = 15;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (16*16-1):
-      cinfo->block_size = 16;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    default:
-      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
-      break;
-    }
-
-  /* We initialize DCT_scaled_size and min_DCT_scaled_size to block_size.
-   * In the full decompressor,
-   * this will be overridden by jpeg_calc_output_dimensions in jdmaster.c;
-   * but in the transcoder,
-   * jpeg_calc_output_dimensions is not used, so we must do it here.
-   */
-  cinfo->min_DCT_h_scaled_size = cinfo->block_size;
-  cinfo->min_DCT_v_scaled_size = cinfo->block_size;
-
-  /* Compute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    compptr->DCT_h_scaled_size = cinfo->block_size;
-    compptr->DCT_v_scaled_size = cinfo->block_size;
-    /* Size in DCT blocks */
-    compptr->width_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->height_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* downsampled_width and downsampled_height will also be overridden by
-     * jdmaster.c if we are doing full decompression.  The transcoder library
-     * doesn't use these values, but the calling application might.
-     */
-    /* Size in samples */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) cinfo->max_h_samp_factor);
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) cinfo->max_v_samp_factor);
-    /* Mark component needed, until color conversion says otherwise */
-    compptr->component_needed = TRUE;
-    /* Mark no quantization table yet saved for component */
-    compptr->quant_table = NULL;
-  }
-
-  /* Compute number of fully interleaved MCU rows. */
-  cinfo->total_iMCU_rows = (JDIMENSION)
-    jdiv_round_up((long) cinfo->image_height,
-	          (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-
-  /* Decide whether file contains multiple scans */
-  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
-    cinfo->inputctl->has_multiple_scans = TRUE;
-  else
-    cinfo->inputctl->has_multiple_scans = FALSE;
-}
-
-
-LOCAL(void)
-per_scan_setup (j_decompress_ptr cinfo)
-/* Do computations that are needed before processing a JPEG scan */
-/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
-{
-  int ci, mcublks, tmp;
-  jpeg_component_info *compptr;
-  
-  if (cinfo->comps_in_scan == 1) {
-    
-    /* Noninterleaved (single-component) scan */
-    compptr = cinfo->cur_comp_info[0];
-    
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = compptr->width_in_blocks;
-    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-    
-    /* For noninterleaved scan, always one block per MCU */
-    compptr->MCU_width = 1;
-    compptr->MCU_height = 1;
-    compptr->MCU_blocks = 1;
-    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
-    compptr->last_col_width = 1;
-    /* For noninterleaved scans, it is convenient to define last_row_height
-     * as the number of block rows present in the last iMCU row.
-     */
-    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-    if (tmp == 0) tmp = compptr->v_samp_factor;
-    compptr->last_row_height = tmp;
-    
-    /* Prepare array describing MCU composition */
-    cinfo->blocks_in_MCU = 1;
-    cinfo->MCU_membership[0] = 0;
-    
-  } else {
-    
-    /* Interleaved (multi-component) scan */
-    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-    
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    cinfo->MCU_rows_in_scan = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height,
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    
-    cinfo->blocks_in_MCU = 0;
-    
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Sampling factors give # of blocks of component in each MCU */
-      compptr->MCU_width = compptr->h_samp_factor;
-      compptr->MCU_height = compptr->v_samp_factor;
-      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
-      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
-      /* Figure number of non-dummy blocks in last MCU column & row */
-      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
-      if (tmp == 0) tmp = compptr->MCU_width;
-      compptr->last_col_width = tmp;
-      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
-      if (tmp == 0) tmp = compptr->MCU_height;
-      compptr->last_row_height = tmp;
-      /* Prepare array describing MCU composition */
-      mcublks = compptr->MCU_blocks;
-      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
-      while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
-      }
-    }
-    
-  }
-}
-
-
-/*
- * Save away a copy of the Q-table referenced by each component present
- * in the current scan, unless already saved during a prior scan.
- *
- * In a multiple-scan JPEG file, the encoder could assign different components
- * the same Q-table slot number, but change table definitions between scans
- * so that each component uses a different Q-table.  (The IJG encoder is not
- * currently capable of doing this, but other encoders might.)  Since we want
- * to be able to dequantize all the components at the end of the file, this
- * means that we have to save away the table actually used for each component.
- * We do this by copying the table at the start of the first scan containing
- * the component.
- * The JPEG spec prohibits the encoder from changing the contents of a Q-table
- * slot between scans of a component using that slot.  If the encoder does so
- * anyway, this decoder will simply use the Q-table values that were current
- * at the start of the first scan for the component.
- *
- * The decompressor output side looks only at the saved quant tables,
- * not at the current Q-table slots.
- */
-
-LOCAL(void)
-latch_quant_tables (j_decompress_ptr cinfo)
-{
-  int ci, qtblno;
-  jpeg_component_info *compptr;
-  JQUANT_TBL * qtbl;
-
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* No work if we already saved Q-table for this component */
-    if (compptr->quant_table != NULL)
-      continue;
-    /* Make sure specified quantization table is present */
-    qtblno = compptr->quant_tbl_no;
-    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
-      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
-    /* OK, save away the quantization table */
-    qtbl = (JQUANT_TBL *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(JQUANT_TBL));
-    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
-    compptr->quant_table = qtbl;
-  }
-}
-
-
-/*
- * Initialize the input modules to read a scan of compressed data.
- * The first call to this is done by jdmaster.c after initializing
- * the entire decompressor (during jpeg_start_decompress).
- * Subsequent calls come from consume_markers, below.
- */
-
-METHODDEF(void)
-start_input_pass (j_decompress_ptr cinfo)
-{
-  per_scan_setup(cinfo);
-  latch_quant_tables(cinfo);
-  (*cinfo->entropy->start_pass) (cinfo);
-  (*cinfo->coef->start_input_pass) (cinfo);
-  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
-}
-
-
-/*
- * Finish up after inputting a compressed-data scan.
- * This is called by the coefficient controller after it's read all
- * the expected data of the scan.
- */
-
-METHODDEF(void)
-finish_input_pass (j_decompress_ptr cinfo)
-{
-  cinfo->inputctl->consume_input = consume_markers;
-}
-
-
-/*
- * Read JPEG markers before, between, or after compressed-data scans.
- * Change state as necessary when a new scan is reached.
- * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
- *
- * The consume_input method pointer points either here or to the
- * coefficient controller's consume_data routine, depending on whether
- * we are reading a compressed data segment or inter-segment markers.
- *
- * Note: This function should NOT return a pseudo SOS marker (with zero
- * component number) to the caller.  A pseudo marker received by
- * read_markers is processed and then skipped for other markers.
- */
-
-METHODDEF(int)
-consume_markers (j_decompress_ptr cinfo)
-{
-  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
-  int val;
-
-  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
-    return JPEG_REACHED_EOI;
-
-  for (;;) {			/* Loop to pass pseudo SOS marker */
-    val = (*cinfo->marker->read_markers) (cinfo);
-
-    switch (val) {
-    case JPEG_REACHED_SOS:	/* Found SOS */
-      if (inputctl->inheaders) { /* 1st SOS */
-	if (inputctl->inheaders == 1)
-	  initial_setup(cinfo);
-	if (cinfo->comps_in_scan == 0) { /* pseudo SOS marker */
-	  inputctl->inheaders = 2;
-	  break;
-	}
-	inputctl->inheaders = 0;
-	/* Note: start_input_pass must be called by jdmaster.c
-	 * before any more input can be consumed.  jdapimin.c is
-	 * responsible for enforcing this sequencing.
-	 */
-      } else {			/* 2nd or later SOS marker */
-	if (! inputctl->pub.has_multiple_scans)
-	  ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
-	if (cinfo->comps_in_scan == 0) /* unexpected pseudo SOS marker */
-	  break;
-	start_input_pass(cinfo);
-      }
-      return val;
-    case JPEG_REACHED_EOI:	/* Found EOI */
-      inputctl->pub.eoi_reached = TRUE;
-      if (inputctl->inheaders) { /* Tables-only datastream, apparently */
-	if (cinfo->marker->saw_SOF)
-	  ERREXIT(cinfo, JERR_SOF_NO_SOS);
-      } else {
-	/* Prevent infinite loop in coef ctlr's decompress_data routine
-	 * if user set output_scan_number larger than number of scans.
-	 */
-	if (cinfo->output_scan_number > cinfo->input_scan_number)
-	  cinfo->output_scan_number = cinfo->input_scan_number;
-      }
-      return val;
-    case JPEG_SUSPENDED:
-      return val;
-    default:
-      return val;
-    }
-  }
-}
-
-
-/*
- * Reset state to begin a fresh datastream.
- */
-
-METHODDEF(void)
-reset_input_controller (j_decompress_ptr cinfo)
-{
-  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
-
-  inputctl->pub.consume_input = consume_markers;
-  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
-  inputctl->pub.eoi_reached = FALSE;
-  inputctl->inheaders = 1;
-  /* Reset other modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->marker->reset_marker_reader) (cinfo);
-  /* Reset progression state -- would be cleaner if entropy decoder did this */
-  cinfo->coef_bits = NULL;
-}
-
-
-/*
- * Initialize the input controller module.
- * This is called only once, when the decompression object is created.
- */
-
-GLOBAL(void)
-jinit_input_controller (j_decompress_ptr cinfo)
-{
-  my_inputctl_ptr inputctl;
-
-  /* Create subobject in permanent pool */
-  inputctl = (my_inputctl_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				SIZEOF(my_input_controller));
-  cinfo->inputctl = (struct jpeg_input_controller *) inputctl;
-  /* Initialize method pointers */
-  inputctl->pub.consume_input = consume_markers;
-  inputctl->pub.reset_input_controller = reset_input_controller;
-  inputctl->pub.start_input_pass = start_input_pass;
-  inputctl->pub.finish_input_pass = finish_input_pass;
-  /* Initialize state: can't use reset_input_controller since we don't
-   * want to try to reset other modules yet.
-   */
-  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
-  inputctl->pub.eoi_reached = FALSE;
-  inputctl->inheaders = 1;
-}
+/*
+ * jdinput.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2002-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input control logic for the JPEG decompressor.
+ * These routines are concerned with controlling the decompressor's input
+ * processing (marker reading and coefficient decoding).  The actual input
+ * reading is done in jdmarker.c, jdhuff.c, and jdarith.c.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_input_controller pub; /* public fields */
+
+  int inheaders;		/* Nonzero until first SOS is reached */
+} my_input_controller;
+
+typedef my_input_controller * my_inputctl_ptr;
+
+
+/* Forward declarations */
+METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Routines to calculate various quantities related to the size of the image.
+ */
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+GLOBAL(void)
+jpeg_core_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase.
+ * This function is used for transcoding and full decompression.
+ */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Compute actual output image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) {
+    /* Provide 1/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 1;
+    cinfo->min_DCT_v_scaled_size = 1;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) {
+    /* Provide 2/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 2;
+    cinfo->min_DCT_v_scaled_size = 2;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 3) {
+    /* Provide 3/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 3L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 3L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 3;
+    cinfo->min_DCT_v_scaled_size = 3;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) {
+    /* Provide 4/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 4;
+    cinfo->min_DCT_v_scaled_size = 4;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 5) {
+    /* Provide 5/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 5L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 5L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 5;
+    cinfo->min_DCT_v_scaled_size = 5;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 6) {
+    /* Provide 6/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 6L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 6L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 6;
+    cinfo->min_DCT_v_scaled_size = 6;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 7) {
+    /* Provide 7/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 7L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 7L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 7;
+    cinfo->min_DCT_v_scaled_size = 7;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) {
+    /* Provide 8/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 8;
+    cinfo->min_DCT_v_scaled_size = 8;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 9) {
+    /* Provide 9/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 9L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 9L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 9;
+    cinfo->min_DCT_v_scaled_size = 9;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 10) {
+    /* Provide 10/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 10L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 10L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 10;
+    cinfo->min_DCT_v_scaled_size = 10;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 11) {
+    /* Provide 11/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 11L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 11L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 11;
+    cinfo->min_DCT_v_scaled_size = 11;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 12) {
+    /* Provide 12/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 12L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 12L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 12;
+    cinfo->min_DCT_v_scaled_size = 12;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 13) {
+    /* Provide 13/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 13L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 13L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 13;
+    cinfo->min_DCT_v_scaled_size = 13;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 14) {
+    /* Provide 14/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 14L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 14L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 14;
+    cinfo->min_DCT_v_scaled_size = 14;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 15) {
+    /* Provide 15/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 15L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 15L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 15;
+    cinfo->min_DCT_v_scaled_size = 15;
+  } else {
+    /* Provide 16/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 16L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 16L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 16;
+    cinfo->min_DCT_v_scaled_size = 16;
+  }
+
+  /* Recompute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size;
+    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size;
+  }
+
+#else /* !IDCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->output_width = cinfo->image_width;
+  cinfo->output_height = cinfo->image_height;
+  /* jdinput.c has already initialized DCT_scaled_size,
+   * and has computed unscaled downsampled_width and downsampled_height.
+   */
+
+#endif /* IDCT_SCALING_SUPPORTED */
+}
+
+
+LOCAL(void)
+initial_setup (j_decompress_ptr cinfo)
+/* Called once, when first SOS marker is reached */
+{
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* Derive block_size, natural_order, and lim_Se */
+  if (cinfo->is_baseline || (cinfo->progressive_mode &&
+      cinfo->comps_in_scan)) { /* no pseudo SOS marker */
+    cinfo->block_size = DCTSIZE;
+    cinfo->natural_order = jpeg_natural_order;
+    cinfo->lim_Se = DCTSIZE2-1;
+  } else
+    switch (cinfo->Se) {
+    case (1*1-1):
+      cinfo->block_size = 1;
+      cinfo->natural_order = jpeg_natural_order; /* not needed */
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (2*2-1):
+      cinfo->block_size = 2;
+      cinfo->natural_order = jpeg_natural_order2;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (3*3-1):
+      cinfo->block_size = 3;
+      cinfo->natural_order = jpeg_natural_order3;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (4*4-1):
+      cinfo->block_size = 4;
+      cinfo->natural_order = jpeg_natural_order4;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (5*5-1):
+      cinfo->block_size = 5;
+      cinfo->natural_order = jpeg_natural_order5;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (6*6-1):
+      cinfo->block_size = 6;
+      cinfo->natural_order = jpeg_natural_order6;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (7*7-1):
+      cinfo->block_size = 7;
+      cinfo->natural_order = jpeg_natural_order7;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (8*8-1):
+      cinfo->block_size = 8;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (9*9-1):
+      cinfo->block_size = 9;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (10*10-1):
+      cinfo->block_size = 10;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (11*11-1):
+      cinfo->block_size = 11;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (12*12-1):
+      cinfo->block_size = 12;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (13*13-1):
+      cinfo->block_size = 13;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (14*14-1):
+      cinfo->block_size = 14;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (15*15-1):
+      cinfo->block_size = 15;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (16*16-1):
+      cinfo->block_size = 16;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    default:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+      break;
+    }
+
+  /* We initialize DCT_scaled_size and min_DCT_scaled_size to block_size.
+   * In the full decompressor,
+   * this will be overridden by jpeg_calc_output_dimensions in jdmaster.c;
+   * but in the transcoder,
+   * jpeg_calc_output_dimensions is not used, so we must do it here.
+   */
+  cinfo->min_DCT_h_scaled_size = cinfo->block_size;
+  cinfo->min_DCT_v_scaled_size = cinfo->block_size;
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->DCT_h_scaled_size = cinfo->block_size;
+    compptr->DCT_v_scaled_size = cinfo->block_size;
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    /* downsampled_width and downsampled_height will also be overridden by
+     * jdmaster.c if we are doing full decompression.  The transcoder library
+     * doesn't use these values, but the calling application might.
+     */
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) cinfo->max_v_samp_factor);
+    /* Mark component needed, until color conversion says otherwise */
+    compptr->component_needed = TRUE;
+    /* Mark no quantization table yet saved for component */
+    compptr->quant_table = NULL;
+  }
+
+  /* Compute number of fully interleaved MCU rows. */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->image_height,
+	          (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+
+  /* Decide whether file contains multiple scans */
+  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
+    cinfo->inputctl->has_multiple_scans = TRUE;
+  else
+    cinfo->inputctl->has_multiple_scans = FALSE;
+}
+
+
+LOCAL(void)
+per_scan_setup (j_decompress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  
+  if (cinfo->comps_in_scan == 1) {
+    
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+    
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+    
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+    
+  } else {
+    
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height,
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    
+    cinfo->blocks_in_MCU = 0;
+    
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+    
+  }
+}
+
+
+/*
+ * Save away a copy of the Q-table referenced by each component present
+ * in the current scan, unless already saved during a prior scan.
+ *
+ * In a multiple-scan JPEG file, the encoder could assign different components
+ * the same Q-table slot number, but change table definitions between scans
+ * so that each component uses a different Q-table.  (The IJG encoder is not
+ * currently capable of doing this, but other encoders might.)  Since we want
+ * to be able to dequantize all the components at the end of the file, this
+ * means that we have to save away the table actually used for each component.
+ * We do this by copying the table at the start of the first scan containing
+ * the component.
+ * The JPEG spec prohibits the encoder from changing the contents of a Q-table
+ * slot between scans of a component using that slot.  If the encoder does so
+ * anyway, this decoder will simply use the Q-table values that were current
+ * at the start of the first scan for the component.
+ *
+ * The decompressor output side looks only at the saved quant tables,
+ * not at the current Q-table slots.
+ */
+
+LOCAL(void)
+latch_quant_tables (j_decompress_ptr cinfo)
+{
+  int ci, qtblno;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* No work if we already saved Q-table for this component */
+    if (compptr->quant_table != NULL)
+      continue;
+    /* Make sure specified quantization table is present */
+    qtblno = compptr->quant_tbl_no;
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    /* OK, save away the quantization table */
+    qtbl = (JQUANT_TBL *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(JQUANT_TBL));
+    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
+    compptr->quant_table = qtbl;
+  }
+}
+
+
+/*
+ * Initialize the input modules to read a scan of compressed data.
+ * The first call to this is done by jdmaster.c after initializing
+ * the entire decompressor (during jpeg_start_decompress).
+ * Subsequent calls come from consume_markers, below.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  per_scan_setup(cinfo);
+  latch_quant_tables(cinfo);
+  (*cinfo->entropy->start_pass) (cinfo);
+  (*cinfo->coef->start_input_pass) (cinfo);
+  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+}
+
+
+/*
+ * Finish up after inputting a compressed-data scan.
+ * This is called by the coefficient controller after it's read all
+ * the expected data of the scan.
+ */
+
+METHODDEF(void)
+finish_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->inputctl->consume_input = consume_markers;
+}
+
+
+/*
+ * Read JPEG markers before, between, or after compressed-data scans.
+ * Change state as necessary when a new scan is reached.
+ * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * The consume_input method pointer points either here or to the
+ * coefficient controller's consume_data routine, depending on whether
+ * we are reading a compressed data segment or inter-segment markers.
+ *
+ * Note: This function should NOT return a pseudo SOS marker (with zero
+ * component number) to the caller.  A pseudo marker received by
+ * read_markers is processed and then skipped for other markers.
+ */
+
+METHODDEF(int)
+consume_markers (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+  int val;
+
+  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
+    return JPEG_REACHED_EOI;
+
+  for (;;) {			/* Loop to pass pseudo SOS marker */
+    val = (*cinfo->marker->read_markers) (cinfo);
+
+    switch (val) {
+    case JPEG_REACHED_SOS:	/* Found SOS */
+      if (inputctl->inheaders) { /* 1st SOS */
+	if (inputctl->inheaders == 1)
+	  initial_setup(cinfo);
+	if (cinfo->comps_in_scan == 0) { /* pseudo SOS marker */
+	  inputctl->inheaders = 2;
+	  break;
+	}
+	inputctl->inheaders = 0;
+	/* Note: start_input_pass must be called by jdmaster.c
+	 * before any more input can be consumed.  jdapimin.c is
+	 * responsible for enforcing this sequencing.
+	 */
+      } else {			/* 2nd or later SOS marker */
+	if (! inputctl->pub.has_multiple_scans)
+	  ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
+	if (cinfo->comps_in_scan == 0) /* unexpected pseudo SOS marker */
+	  break;
+	start_input_pass(cinfo);
+      }
+      return val;
+    case JPEG_REACHED_EOI:	/* Found EOI */
+      inputctl->pub.eoi_reached = TRUE;
+      if (inputctl->inheaders) { /* Tables-only datastream, apparently */
+	if (cinfo->marker->saw_SOF)
+	  ERREXIT(cinfo, JERR_SOF_NO_SOS);
+      } else {
+	/* Prevent infinite loop in coef ctlr's decompress_data routine
+	 * if user set output_scan_number larger than number of scans.
+	 */
+	if (cinfo->output_scan_number > cinfo->input_scan_number)
+	  cinfo->output_scan_number = cinfo->input_scan_number;
+      }
+      return val;
+    case JPEG_SUSPENDED:
+      return val;
+    default:
+      return val;
+    }
+  }
+}
+
+
+/*
+ * Reset state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = 1;
+  /* Reset other modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->marker->reset_marker_reader) (cinfo);
+  /* Reset progression state -- would be cleaner if entropy decoder did this */
+  cinfo->coef_bits = NULL;
+}
+
+
+/*
+ * Initialize the input controller module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl;
+
+  /* Create subobject in permanent pool */
+  inputctl = (my_inputctl_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_input_controller));
+  cinfo->inputctl = (struct jpeg_input_controller *) inputctl;
+  /* Initialize method pointers */
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.reset_input_controller = reset_input_controller;
+  inputctl->pub.start_input_pass = start_input_pass;
+  inputctl->pub.finish_input_pass = finish_input_pass;
+  /* Initialize state: can't use reset_input_controller since we don't
+   * want to try to reset other modules yet.
+   */
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = 1;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdmainct.c b/plugins/AdvaImg/src/LibJPEG/jdmainct.c
index 02723ca732..7ced4386b2 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdmainct.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdmainct.c
@@ -1,512 +1,513 @@
-/*
- * jdmainct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the main buffer controller for decompression.
- * The main buffer lies between the JPEG decompressor proper and the
- * post-processor; it holds downsampled data in the JPEG colorspace.
- *
- * Note that this code is bypassed in raw-data mode, since the application
- * supplies the equivalent of the main buffer in that case.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * In the current system design, the main buffer need never be a full-image
- * buffer; any full-height buffers will be found inside the coefficient or
- * postprocessing controllers.  Nonetheless, the main controller is not
- * trivial.  Its responsibility is to provide context rows for upsampling/
- * rescaling, and doing this in an efficient fashion is a bit tricky.
- *
- * Postprocessor input data is counted in "row groups".  A row group
- * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
- * sample rows of each component.  (We require DCT_scaled_size values to be
- * chosen such that these numbers are integers.  In practice DCT_scaled_size
- * values will likely be powers of two, so we actually have the stronger
- * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
- * Upsampling will typically produce max_v_samp_factor pixel rows from each
- * row group (times any additional scale factor that the upsampler is
- * applying).
- *
- * The coefficient controller will deliver data to us one iMCU row at a time;
- * each iMCU row contains v_samp_factor * DCT_scaled_size sample rows, or
- * exactly min_DCT_scaled_size row groups.  (This amount of data corresponds
- * to one row of MCUs when the image is fully interleaved.)  Note that the
- * number of sample rows varies across components, but the number of row
- * groups does not.  Some garbage sample rows may be included in the last iMCU
- * row at the bottom of the image.
- *
- * Depending on the vertical scaling algorithm used, the upsampler may need
- * access to the sample row(s) above and below its current input row group.
- * The upsampler is required to set need_context_rows TRUE at global selection
- * time if so.  When need_context_rows is FALSE, this controller can simply
- * obtain one iMCU row at a time from the coefficient controller and dole it
- * out as row groups to the postprocessor.
- *
- * When need_context_rows is TRUE, this controller guarantees that the buffer
- * passed to postprocessing contains at least one row group's worth of samples
- * above and below the row group(s) being processed.  Note that the context
- * rows "above" the first passed row group appear at negative row offsets in
- * the passed buffer.  At the top and bottom of the image, the required
- * context rows are manufactured by duplicating the first or last real sample
- * row; this avoids having special cases in the upsampling inner loops.
- *
- * The amount of context is fixed at one row group just because that's a
- * convenient number for this controller to work with.  The existing
- * upsamplers really only need one sample row of context.  An upsampler
- * supporting arbitrary output rescaling might wish for more than one row
- * group of context when shrinking the image; tough, we don't handle that.
- * (This is justified by the assumption that downsizing will be handled mostly
- * by adjusting the DCT_scaled_size values, so that the actual scale factor at
- * the upsample step needn't be much less than one.)
- *
- * To provide the desired context, we have to retain the last two row groups
- * of one iMCU row while reading in the next iMCU row.  (The last row group
- * can't be processed until we have another row group for its below-context,
- * and so we have to save the next-to-last group too for its above-context.)
- * We could do this most simply by copying data around in our buffer, but
- * that'd be very slow.  We can avoid copying any data by creating a rather
- * strange pointer structure.  Here's how it works.  We allocate a workspace
- * consisting of M+2 row groups (where M = min_DCT_scaled_size is the number
- * of row groups per iMCU row).  We create two sets of redundant pointers to
- * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
- * pointer lists look like this:
- *                   M+1                          M-1
- * master pointer --> 0         master pointer --> 0
- *                    1                            1
- *                   ...                          ...
- *                   M-3                          M-3
- *                   M-2                           M
- *                   M-1                          M+1
- *                    M                           M-2
- *                   M+1                          M-1
- *                    0                            0
- * We read alternate iMCU rows using each master pointer; thus the last two
- * row groups of the previous iMCU row remain un-overwritten in the workspace.
- * The pointer lists are set up so that the required context rows appear to
- * be adjacent to the proper places when we pass the pointer lists to the
- * upsampler.
- *
- * The above pictures describe the normal state of the pointer lists.
- * At top and bottom of the image, we diddle the pointer lists to duplicate
- * the first or last sample row as necessary (this is cheaper than copying
- * sample rows around).
- *
- * This scheme breaks down if M < 2, ie, min_DCT_scaled_size is 1.  In that
- * situation each iMCU row provides only one row group so the buffering logic
- * must be different (eg, we must read two iMCU rows before we can emit the
- * first row group).  For now, we simply do not support providing context
- * rows when min_DCT_scaled_size is 1.  That combination seems unlikely to
- * be worth providing --- if someone wants a 1/8th-size preview, they probably
- * want it quick and dirty, so a context-free upsampler is sufficient.
- */
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_d_main_controller pub; /* public fields */
-
-  /* Pointer to allocated workspace (M or M+2 row groups). */
-  JSAMPARRAY buffer[MAX_COMPONENTS];
-
-  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
-  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
-
-  /* Remaining fields are only used in the context case. */
-
-  /* These are the master pointers to the funny-order pointer lists. */
-  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
-
-  int whichptr;			/* indicates which pointer set is now in use */
-  int context_state;		/* process_data state machine status */
-  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
-  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
-} my_main_controller;
-
-typedef my_main_controller * my_main_ptr;
-
-/* context_state values: */
-#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
-#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
-#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
-
-
-/* Forward declarations */
-METHODDEF(void) process_data_simple_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
-METHODDEF(void) process_data_context_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
-#ifdef QUANT_2PASS_SUPPORTED
-METHODDEF(void) process_data_crank_post
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
-#endif
-
-
-LOCAL(void)
-alloc_funny_pointers (j_decompress_ptr cinfo)
-/* Allocate space for the funny pointer lists.
- * This is done only once, not once per pass.
- */
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-  int ci, rgroup;
-  int M = cinfo->min_DCT_v_scaled_size;
-  jpeg_component_info *compptr;
-  JSAMPARRAY xbuf;
-
-  /* Get top-level space for component array pointers.
-   * We alloc both arrays with one call to save a few cycles.
-   */
-  main->xbuffer[0] = (JSAMPIMAGE)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
-  main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    /* Get space for pointer lists --- M+4 row groups in each list.
-     * We alloc both pointer lists with one call to save a few cycles.
-     */
-    xbuf = (JSAMPARRAY)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
-    xbuf += rgroup;		/* want one row group at negative offsets */
-    main->xbuffer[0][ci] = xbuf;
-    xbuf += rgroup * (M + 4);
-    main->xbuffer[1][ci] = xbuf;
-  }
-}
-
-
-LOCAL(void)
-make_funny_pointers (j_decompress_ptr cinfo)
-/* Create the funny pointer lists discussed in the comments above.
- * The actual workspace is already allocated (in main->buffer),
- * and the space for the pointer lists is allocated too.
- * This routine just fills in the curiously ordered lists.
- * This will be repeated at the beginning of each pass.
- */
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-  int ci, i, rgroup;
-  int M = cinfo->min_DCT_v_scaled_size;
-  jpeg_component_info *compptr;
-  JSAMPARRAY buf, xbuf0, xbuf1;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    xbuf0 = main->xbuffer[0][ci];
-    xbuf1 = main->xbuffer[1][ci];
-    /* First copy the workspace pointers as-is */
-    buf = main->buffer[ci];
-    for (i = 0; i < rgroup * (M + 2); i++) {
-      xbuf0[i] = xbuf1[i] = buf[i];
-    }
-    /* In the second list, put the last four row groups in swapped order */
-    for (i = 0; i < rgroup * 2; i++) {
-      xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i];
-      xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i];
-    }
-    /* The wraparound pointers at top and bottom will be filled later
-     * (see set_wraparound_pointers, below).  Initially we want the "above"
-     * pointers to duplicate the first actual data line.  This only needs
-     * to happen in xbuffer[0].
-     */
-    for (i = 0; i < rgroup; i++) {
-      xbuf0[i - rgroup] = xbuf0[0];
-    }
-  }
-}
-
-
-LOCAL(void)
-set_wraparound_pointers (j_decompress_ptr cinfo)
-/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
- * This changes the pointer list state from top-of-image to the normal state.
- */
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-  int ci, i, rgroup;
-  int M = cinfo->min_DCT_v_scaled_size;
-  jpeg_component_info *compptr;
-  JSAMPARRAY xbuf0, xbuf1;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    xbuf0 = main->xbuffer[0][ci];
-    xbuf1 = main->xbuffer[1][ci];
-    for (i = 0; i < rgroup; i++) {
-      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
-      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
-      xbuf0[rgroup*(M+2) + i] = xbuf0[i];
-      xbuf1[rgroup*(M+2) + i] = xbuf1[i];
-    }
-  }
-}
-
-
-LOCAL(void)
-set_bottom_pointers (j_decompress_ptr cinfo)
-/* Change the pointer lists to duplicate the last sample row at the bottom
- * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
- * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
- */
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-  int ci, i, rgroup, iMCUheight, rows_left;
-  jpeg_component_info *compptr;
-  JSAMPARRAY xbuf;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Count sample rows in one iMCU row and in one row group */
-    iMCUheight = compptr->v_samp_factor * compptr->DCT_v_scaled_size;
-    rgroup = iMCUheight / cinfo->min_DCT_v_scaled_size;
-    /* Count nondummy sample rows remaining for this component */
-    rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
-    if (rows_left == 0) rows_left = iMCUheight;
-    /* Count nondummy row groups.  Should get same answer for each component,
-     * so we need only do it once.
-     */
-    if (ci == 0) {
-      main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
-    }
-    /* Duplicate the last real sample row rgroup*2 times; this pads out the
-     * last partial rowgroup and ensures at least one full rowgroup of context.
-     */
-    xbuf = main->xbuffer[main->whichptr][ci];
-    for (i = 0; i < rgroup * 2; i++) {
-      xbuf[rows_left + i] = xbuf[rows_left-1];
-    }
-  }
-}
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-    if (cinfo->upsample->need_context_rows) {
-      main->pub.process_data = process_data_context_main;
-      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
-      main->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
-      main->context_state = CTX_PREPARE_FOR_IMCU;
-      main->iMCU_row_ctr = 0;
-    } else {
-      /* Simple case with no context needed */
-      main->pub.process_data = process_data_simple_main;
-    }
-    main->buffer_full = FALSE;	/* Mark buffer empty */
-    main->rowgroup_ctr = 0;
-    break;
-#ifdef QUANT_2PASS_SUPPORTED
-  case JBUF_CRANK_DEST:
-    /* For last pass of 2-pass quantization, just crank the postprocessor */
-    main->pub.process_data = process_data_crank_post;
-    break;
-#endif
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    break;
-  }
-}
-
-
-/*
- * Process some data.
- * This handles the simple case where no context is required.
- */
-
-METHODDEF(void)
-process_data_simple_main (j_decompress_ptr cinfo,
-			  JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			  JDIMENSION out_rows_avail)
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-  JDIMENSION rowgroups_avail;
-
-  /* Read input data if we haven't filled the main buffer yet */
-  if (! main->buffer_full) {
-    if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer))
-      return;			/* suspension forced, can do nothing more */
-    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
-  }
-
-  /* There are always min_DCT_scaled_size row groups in an iMCU row. */
-  rowgroups_avail = (JDIMENSION) cinfo->min_DCT_v_scaled_size;
-  /* Note: at the bottom of the image, we may pass extra garbage row groups
-   * to the postprocessor.  The postprocessor has to check for bottom
-   * of image anyway (at row resolution), so no point in us doing it too.
-   */
-
-  /* Feed the postprocessor */
-  (*cinfo->post->post_process_data) (cinfo, main->buffer,
-				     &main->rowgroup_ctr, rowgroups_avail,
-				     output_buf, out_row_ctr, out_rows_avail);
-
-  /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
-  if (main->rowgroup_ctr >= rowgroups_avail) {
-    main->buffer_full = FALSE;
-    main->rowgroup_ctr = 0;
-  }
-}
-
-
-/*
- * Process some data.
- * This handles the case where context rows must be provided.
- */
-
-METHODDEF(void)
-process_data_context_main (j_decompress_ptr cinfo,
-			   JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			   JDIMENSION out_rows_avail)
-{
-  my_main_ptr main = (my_main_ptr) cinfo->main;
-
-  /* Read input data if we haven't filled the main buffer yet */
-  if (! main->buffer_full) {
-    if (! (*cinfo->coef->decompress_data) (cinfo,
-					   main->xbuffer[main->whichptr]))
-      return;			/* suspension forced, can do nothing more */
-    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
-    main->iMCU_row_ctr++;	/* count rows received */
-  }
-
-  /* Postprocessor typically will not swallow all the input data it is handed
-   * in one call (due to filling the output buffer first).  Must be prepared
-   * to exit and restart.  This switch lets us keep track of how far we got.
-   * Note that each case falls through to the next on successful completion.
-   */
-  switch (main->context_state) {
-  case CTX_POSTPONED_ROW:
-    /* Call postprocessor using previously set pointers for postponed row */
-    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
-			&main->rowgroup_ctr, main->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-    if (main->rowgroup_ctr < main->rowgroups_avail)
-      return;			/* Need to suspend */
-    main->context_state = CTX_PREPARE_FOR_IMCU;
-    if (*out_row_ctr >= out_rows_avail)
-      return;			/* Postprocessor exactly filled output buf */
-    /*FALLTHROUGH*/
-  case CTX_PREPARE_FOR_IMCU:
-    /* Prepare to process first M-1 row groups of this iMCU row */
-    main->rowgroup_ctr = 0;
-    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size - 1);
-    /* Check for bottom of image: if so, tweak pointers to "duplicate"
-     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
-     */
-    if (main->iMCU_row_ctr == cinfo->total_iMCU_rows)
-      set_bottom_pointers(cinfo);
-    main->context_state = CTX_PROCESS_IMCU;
-    /*FALLTHROUGH*/
-  case CTX_PROCESS_IMCU:
-    /* Call postprocessor using previously set pointers */
-    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
-			&main->rowgroup_ctr, main->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-    if (main->rowgroup_ctr < main->rowgroups_avail)
-      return;			/* Need to suspend */
-    /* After the first iMCU, change wraparound pointers to normal state */
-    if (main->iMCU_row_ctr == 1)
-      set_wraparound_pointers(cinfo);
-    /* Prepare to load new iMCU row using other xbuffer list */
-    main->whichptr ^= 1;	/* 0=>1 or 1=>0 */
-    main->buffer_full = FALSE;
-    /* Still need to process last row group of this iMCU row, */
-    /* which is saved at index M+1 of the other xbuffer */
-    main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 1);
-    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 2);
-    main->context_state = CTX_POSTPONED_ROW;
-  }
-}
-
-
-/*
- * Process some data.
- * Final pass of two-pass quantization: just call the postprocessor.
- * Source data will be the postprocessor controller's internal buffer.
- */
-
-#ifdef QUANT_2PASS_SUPPORTED
-
-METHODDEF(void)
-process_data_crank_post (j_decompress_ptr cinfo,
-			 JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			 JDIMENSION out_rows_avail)
-{
-  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
-				     (JDIMENSION *) NULL, (JDIMENSION) 0,
-				     output_buf, out_row_ctr, out_rows_avail);
-}
-
-#endif /* QUANT_2PASS_SUPPORTED */
-
-
-/*
- * Initialize main buffer controller.
- */
-
-GLOBAL(void)
-jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
-{
-  my_main_ptr main;
-  int ci, rgroup, ngroups;
-  jpeg_component_info *compptr;
-
-  main = (my_main_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_main_controller));
-  cinfo->main = (struct jpeg_d_main_controller *) main;
-  main->pub.start_pass = start_pass_main;
-
-  if (need_full_buffer)		/* shouldn't happen */
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  /* Allocate the workspace.
-   * ngroups is the number of row groups we need.
-   */
-  if (cinfo->upsample->need_context_rows) {
-    if (cinfo->min_DCT_v_scaled_size < 2) /* unsupported, see comments above */
-      ERREXIT(cinfo, JERR_NOTIMPL);
-    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
-    ngroups = cinfo->min_DCT_v_scaled_size + 2;
-  } else {
-    ngroups = cinfo->min_DCT_v_scaled_size;
-  }
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    main->buffer[ci] = (*cinfo->mem->alloc_sarray)
-			((j_common_ptr) cinfo, JPOOL_IMAGE,
-			 compptr->width_in_blocks * compptr->DCT_h_scaled_size,
-			 (JDIMENSION) (rgroup * ngroups));
-  }
-}
+/*
+ * jdmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for decompression.
+ * The main buffer lies between the JPEG decompressor proper and the
+ * post-processor; it holds downsampled data in the JPEG colorspace.
+ *
+ * Note that this code is bypassed in raw-data mode, since the application
+ * supplies the equivalent of the main buffer in that case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * In the current system design, the main buffer need never be a full-image
+ * buffer; any full-height buffers will be found inside the coefficient or
+ * postprocessing controllers.  Nonetheless, the main controller is not
+ * trivial.  Its responsibility is to provide context rows for upsampling/
+ * rescaling, and doing this in an efficient fashion is a bit tricky.
+ *
+ * Postprocessor input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * sample rows of each component.  (We require DCT_scaled_size values to be
+ * chosen such that these numbers are integers.  In practice DCT_scaled_size
+ * values will likely be powers of two, so we actually have the stronger
+ * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
+ * Upsampling will typically produce max_v_samp_factor pixel rows from each
+ * row group (times any additional scale factor that the upsampler is
+ * applying).
+ *
+ * The coefficient controller will deliver data to us one iMCU row at a time;
+ * each iMCU row contains v_samp_factor * DCT_scaled_size sample rows, or
+ * exactly min_DCT_scaled_size row groups.  (This amount of data corresponds
+ * to one row of MCUs when the image is fully interleaved.)  Note that the
+ * number of sample rows varies across components, but the number of row
+ * groups does not.  Some garbage sample rows may be included in the last iMCU
+ * row at the bottom of the image.
+ *
+ * Depending on the vertical scaling algorithm used, the upsampler may need
+ * access to the sample row(s) above and below its current input row group.
+ * The upsampler is required to set need_context_rows TRUE at global selection
+ * time if so.  When need_context_rows is FALSE, this controller can simply
+ * obtain one iMCU row at a time from the coefficient controller and dole it
+ * out as row groups to the postprocessor.
+ *
+ * When need_context_rows is TRUE, this controller guarantees that the buffer
+ * passed to postprocessing contains at least one row group's worth of samples
+ * above and below the row group(s) being processed.  Note that the context
+ * rows "above" the first passed row group appear at negative row offsets in
+ * the passed buffer.  At the top and bottom of the image, the required
+ * context rows are manufactured by duplicating the first or last real sample
+ * row; this avoids having special cases in the upsampling inner loops.
+ *
+ * The amount of context is fixed at one row group just because that's a
+ * convenient number for this controller to work with.  The existing
+ * upsamplers really only need one sample row of context.  An upsampler
+ * supporting arbitrary output rescaling might wish for more than one row
+ * group of context when shrinking the image; tough, we don't handle that.
+ * (This is justified by the assumption that downsizing will be handled mostly
+ * by adjusting the DCT_scaled_size values, so that the actual scale factor at
+ * the upsample step needn't be much less than one.)
+ *
+ * To provide the desired context, we have to retain the last two row groups
+ * of one iMCU row while reading in the next iMCU row.  (The last row group
+ * can't be processed until we have another row group for its below-context,
+ * and so we have to save the next-to-last group too for its above-context.)
+ * We could do this most simply by copying data around in our buffer, but
+ * that'd be very slow.  We can avoid copying any data by creating a rather
+ * strange pointer structure.  Here's how it works.  We allocate a workspace
+ * consisting of M+2 row groups (where M = min_DCT_scaled_size is the number
+ * of row groups per iMCU row).  We create two sets of redundant pointers to
+ * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
+ * pointer lists look like this:
+ *                   M+1                          M-1
+ * master pointer --> 0         master pointer --> 0
+ *                    1                            1
+ *                   ...                          ...
+ *                   M-3                          M-3
+ *                   M-2                           M
+ *                   M-1                          M+1
+ *                    M                           M-2
+ *                   M+1                          M-1
+ *                    0                            0
+ * We read alternate iMCU rows using each master pointer; thus the last two
+ * row groups of the previous iMCU row remain un-overwritten in the workspace.
+ * The pointer lists are set up so that the required context rows appear to
+ * be adjacent to the proper places when we pass the pointer lists to the
+ * upsampler.
+ *
+ * The above pictures describe the normal state of the pointer lists.
+ * At top and bottom of the image, we diddle the pointer lists to duplicate
+ * the first or last sample row as necessary (this is cheaper than copying
+ * sample rows around).
+ *
+ * This scheme breaks down if M < 2, ie, min_DCT_scaled_size is 1.  In that
+ * situation each iMCU row provides only one row group so the buffering logic
+ * must be different (eg, we must read two iMCU rows before we can emit the
+ * first row group).  For now, we simply do not support providing context
+ * rows when min_DCT_scaled_size is 1.  That combination seems unlikely to
+ * be worth providing --- if someone wants a 1/8th-size preview, they probably
+ * want it quick and dirty, so a context-free upsampler is sufficient.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_main_controller pub; /* public fields */
+
+  /* Pointer to allocated workspace (M or M+2 row groups). */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
+  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
+
+  /* Remaining fields are only used in the context case. */
+
+  /* These are the master pointers to the funny-order pointer lists. */
+  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
+
+  int whichptr;			/* indicates which pointer set is now in use */
+  int context_state;		/* process_data state machine status */
+  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
+  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+/* context_state values: */
+#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
+#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
+#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+METHODDEF(void) process_data_context_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) process_data_crank_post
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#endif
+
+
+LOCAL(void)
+alloc_funny_pointers (j_decompress_ptr cinfo)
+/* Allocate space for the funny pointer lists.
+ * This is done only once, not once per pass.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, rgroup;
+  int M = cinfo->min_DCT_v_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  /* Get top-level space for component array pointers.
+   * We alloc both arrays with one call to save a few cycles.
+   */
+  mainp->xbuffer[0] = (JSAMPIMAGE)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
+  mainp->xbuffer[1] = mainp->xbuffer[0] + cinfo->num_components;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    /* Get space for pointer lists --- M+4 row groups in each list.
+     * We alloc both pointer lists with one call to save a few cycles.
+     */
+    xbuf = (JSAMPARRAY)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
+    xbuf += rgroup;		/* want one row group at negative offsets */
+    mainp->xbuffer[0][ci] = xbuf;
+    xbuf += rgroup * (M + 4);
+    mainp->xbuffer[1][ci] = xbuf;
+  }
+}
+
+
+LOCAL(void)
+make_funny_pointers (j_decompress_ptr cinfo)
+/* Create the funny pointer lists discussed in the comments above.
+ * The actual workspace is already allocated (in main->buffer),
+ * and the space for the pointer lists is allocated too.
+ * This routine just fills in the curiously ordered lists.
+ * This will be repeated at the beginning of each pass.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_v_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY buf, xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    xbuf0 = mainp->xbuffer[0][ci];
+    xbuf1 = mainp->xbuffer[1][ci];
+    /* First copy the workspace pointers as-is */
+    buf = mainp->buffer[ci];
+    for (i = 0; i < rgroup * (M + 2); i++) {
+      xbuf0[i] = xbuf1[i] = buf[i];
+    }
+    /* In the second list, put the last four row groups in swapped order */
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i];
+      xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i];
+    }
+    /* The wraparound pointers at top and bottom will be filled later
+     * (see set_wraparound_pointers, below).  Initially we want the "above"
+     * pointers to duplicate the first actual data line.  This only needs
+     * to happen in xbuffer[0].
+     */
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[0];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_wraparound_pointers (j_decompress_ptr cinfo)
+/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
+ * This changes the pointer list state from top-of-image to the normal state.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_v_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    xbuf0 = mainp->xbuffer[0][ci];
+    xbuf1 = mainp->xbuffer[1][ci];
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
+      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
+      xbuf0[rgroup*(M+2) + i] = xbuf0[i];
+      xbuf1[rgroup*(M+2) + i] = xbuf1[i];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_bottom_pointers (j_decompress_ptr cinfo)
+/* Change the pointer lists to duplicate the last sample row at the bottom
+ * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
+ * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup, iMCUheight, rows_left;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Count sample rows in one iMCU row and in one row group */
+    iMCUheight = compptr->v_samp_factor * compptr->DCT_v_scaled_size;
+    rgroup = iMCUheight / cinfo->min_DCT_v_scaled_size;
+    /* Count nondummy sample rows remaining for this component */
+    rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
+    if (rows_left == 0) rows_left = iMCUheight;
+    /* Count nondummy row groups.  Should get same answer for each component,
+     * so we need only do it once.
+     */
+    if (ci == 0) {
+      mainp->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
+    }
+    /* Duplicate the last real sample row rgroup*2 times; this pads out the
+     * last partial rowgroup and ensures at least one full rowgroup of context.
+     */
+    xbuf = mainp->xbuffer[mainp->whichptr][ci];
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf[rows_left + i] = xbuf[rows_left-1];
+    }
+  }
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->upsample->need_context_rows) {
+      mainp->pub.process_data = process_data_context_main;
+      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
+      mainp->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
+      mainp->context_state = CTX_PREPARE_FOR_IMCU;
+      mainp->iMCU_row_ctr = 0;
+    } else {
+      /* Simple case with no context needed */
+      mainp->pub.process_data = process_data_simple_main;
+    }
+    mainp->buffer_full = FALSE;	/* Mark buffer empty */
+    mainp->rowgroup_ctr = 0;
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_CRANK_DEST:
+    /* For last pass of 2-pass quantization, just crank the postprocessor */
+    mainp->pub.process_data = process_data_crank_post;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the simple case where no context is required.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_decompress_ptr cinfo,
+			  JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			  JDIMENSION out_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  JDIMENSION rowgroups_avail;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! mainp->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo, mainp->buffer))
+      return;			/* suspension forced, can do nothing more */
+    mainp->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+  }
+
+  /* There are always min_DCT_scaled_size row groups in an iMCU row. */
+  rowgroups_avail = (JDIMENSION) cinfo->min_DCT_v_scaled_size;
+  /* Note: at the bottom of the image, we may pass extra garbage row groups
+   * to the postprocessor.  The postprocessor has to check for bottom
+   * of image anyway (at row resolution), so no point in us doing it too.
+   */
+
+  /* Feed the postprocessor */
+  (*cinfo->post->post_process_data) (cinfo, mainp->buffer,
+				     &mainp->rowgroup_ctr, rowgroups_avail,
+				     output_buf, out_row_ctr, out_rows_avail);
+
+  /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
+  if (mainp->rowgroup_ctr >= rowgroups_avail) {
+    mainp->buffer_full = FALSE;
+    mainp->rowgroup_ctr = 0;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the case where context rows must be provided.
+ */
+
+METHODDEF(void)
+process_data_context_main (j_decompress_ptr cinfo,
+			   JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! mainp->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo,
+					   mainp->xbuffer[mainp->whichptr]))
+      return;			/* suspension forced, can do nothing more */
+    mainp->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+    mainp->iMCU_row_ctr++;	/* count rows received */
+  }
+
+  /* Postprocessor typically will not swallow all the input data it is handed
+   * in one call (due to filling the output buffer first).  Must be prepared
+   * to exit and restart.  This switch lets us keep track of how far we got.
+   * Note that each case falls through to the next on successful completion.
+   */
+  switch (mainp->context_state) {
+  case CTX_POSTPONED_ROW:
+    /* Call postprocessor using previously set pointers for postponed row */
+    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
+			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
+      return;			/* Need to suspend */
+    mainp->context_state = CTX_PREPARE_FOR_IMCU;
+    if (*out_row_ctr >= out_rows_avail)
+      return;			/* Postprocessor exactly filled output buf */
+    /*FALLTHROUGH*/
+  case CTX_PREPARE_FOR_IMCU:
+    /* Prepare to process first M-1 row groups of this iMCU row */
+    mainp->rowgroup_ctr = 0;
+    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size - 1);
+    /* Check for bottom of image: if so, tweak pointers to "duplicate"
+     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
+     */
+    if (mainp->iMCU_row_ctr == cinfo->total_iMCU_rows)
+      set_bottom_pointers(cinfo);
+    mainp->context_state = CTX_PROCESS_IMCU;
+    /*FALLTHROUGH*/
+  case CTX_PROCESS_IMCU:
+    /* Call postprocessor using previously set pointers */
+    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
+			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
+      return;			/* Need to suspend */
+    /* After the first iMCU, change wraparound pointers to normal state */
+    if (mainp->iMCU_row_ctr == 1)
+      set_wraparound_pointers(cinfo);
+    /* Prepare to load new iMCU row using other xbuffer list */
+    mainp->whichptr ^= 1;	/* 0=>1 or 1=>0 */
+    mainp->buffer_full = FALSE;
+    /* Still need to process last row group of this iMCU row, */
+    /* which is saved at index M+1 of the other xbuffer */
+    mainp->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 1);
+    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 2);
+    mainp->context_state = CTX_POSTPONED_ROW;
+  }
+}
+
+
+/*
+ * Process some data.
+ * Final pass of two-pass quantization: just call the postprocessor.
+ * Source data will be the postprocessor controller's internal buffer.
+ */
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+METHODDEF(void)
+process_data_crank_post (j_decompress_ptr cinfo,
+			 JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			 JDIMENSION out_rows_avail)
+{
+  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
+				     (JDIMENSION *) NULL, (JDIMENSION) 0,
+				     output_buf, out_row_ctr, out_rows_avail);
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr mainp;
+  int ci, rgroup, ngroups;
+  jpeg_component_info *compptr;
+
+  mainp = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = &mainp->pub;
+  mainp->pub.start_pass = start_pass_main;
+
+  if (need_full_buffer)		/* shouldn't happen */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Allocate the workspace.
+   * ngroups is the number of row groups we need.
+   */
+  if (cinfo->upsample->need_context_rows) {
+    if (cinfo->min_DCT_v_scaled_size < 2) /* unsupported, see comments above */
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
+    ngroups = cinfo->min_DCT_v_scaled_size + 2;
+  } else {
+    ngroups = cinfo->min_DCT_v_scaled_size;
+  }
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    mainp->buffer[ci] = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
+       (JDIMENSION) (rgroup * ngroups));
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdmarker.c b/plugins/AdvaImg/src/LibJPEG/jdmarker.c
index f2a9cc4295..47ebee9576 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdmarker.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdmarker.c
@@ -1,1406 +1,1505 @@
-/*
- * jdmarker.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains routines to decode JPEG datastream markers.
- * Most of the complexity arises from our desire to support input
- * suspension: if not all of the data for a marker is available,
- * we must exit back to the application.  On resumption, we reprocess
- * the marker.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-typedef enum {			/* JPEG marker codes */
-  M_SOF0  = 0xc0,
-  M_SOF1  = 0xc1,
-  M_SOF2  = 0xc2,
-  M_SOF3  = 0xc3,
-  
-  M_SOF5  = 0xc5,
-  M_SOF6  = 0xc6,
-  M_SOF7  = 0xc7,
-  
-  M_JPG   = 0xc8,
-  M_SOF9  = 0xc9,
-  M_SOF10 = 0xca,
-  M_SOF11 = 0xcb,
-  
-  M_SOF13 = 0xcd,
-  M_SOF14 = 0xce,
-  M_SOF15 = 0xcf,
-  
-  M_DHT   = 0xc4,
-  
-  M_DAC   = 0xcc,
-  
-  M_RST0  = 0xd0,
-  M_RST1  = 0xd1,
-  M_RST2  = 0xd2,
-  M_RST3  = 0xd3,
-  M_RST4  = 0xd4,
-  M_RST5  = 0xd5,
-  M_RST6  = 0xd6,
-  M_RST7  = 0xd7,
-  
-  M_SOI   = 0xd8,
-  M_EOI   = 0xd9,
-  M_SOS   = 0xda,
-  M_DQT   = 0xdb,
-  M_DNL   = 0xdc,
-  M_DRI   = 0xdd,
-  M_DHP   = 0xde,
-  M_EXP   = 0xdf,
-  
-  M_APP0  = 0xe0,
-  M_APP1  = 0xe1,
-  M_APP2  = 0xe2,
-  M_APP3  = 0xe3,
-  M_APP4  = 0xe4,
-  M_APP5  = 0xe5,
-  M_APP6  = 0xe6,
-  M_APP7  = 0xe7,
-  M_APP8  = 0xe8,
-  M_APP9  = 0xe9,
-  M_APP10 = 0xea,
-  M_APP11 = 0xeb,
-  M_APP12 = 0xec,
-  M_APP13 = 0xed,
-  M_APP14 = 0xee,
-  M_APP15 = 0xef,
-  
-  M_JPG0  = 0xf0,
-  M_JPG13 = 0xfd,
-  M_COM   = 0xfe,
-  
-  M_TEM   = 0x01,
-  
-  M_ERROR = 0x100
-} JPEG_MARKER;
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_marker_reader pub; /* public fields */
-
-  /* Application-overridable marker processing methods */
-  jpeg_marker_parser_method process_COM;
-  jpeg_marker_parser_method process_APPn[16];
-
-  /* Limit on marker data length to save for each marker type */
-  unsigned int length_limit_COM;
-  unsigned int length_limit_APPn[16];
-
-  /* Status of COM/APPn marker saving */
-  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
-  unsigned int bytes_read;		/* data bytes read so far in marker */
-  /* Note: cur_marker is not linked into marker_list until it's all read. */
-} my_marker_reader;
-
-typedef my_marker_reader * my_marker_ptr;
-
-
-/*
- * Macros for fetching data from the data source module.
- *
- * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
- * the current restart point; we update them only when we have reached a
- * suitable place to restart if a suspension occurs.
- */
-
-/* Declare and initialize local copies of input pointer/count */
-#define INPUT_VARS(cinfo)  \
-	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
-	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
-	size_t bytes_in_buffer = datasrc->bytes_in_buffer
-
-/* Unload the local copies --- do this only at a restart boundary */
-#define INPUT_SYNC(cinfo)  \
-	( datasrc->next_input_byte = next_input_byte,  \
-	  datasrc->bytes_in_buffer = bytes_in_buffer )
-
-/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
-#define INPUT_RELOAD(cinfo)  \
-	( next_input_byte = datasrc->next_input_byte,  \
-	  bytes_in_buffer = datasrc->bytes_in_buffer )
-
-/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
- * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
- * but we must reload the local copies after a successful fill.
- */
-#define MAKE_BYTE_AVAIL(cinfo,action)  \
-	if (bytes_in_buffer == 0) {  \
-	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
-	    { action; }  \
-	  INPUT_RELOAD(cinfo);  \
-	}
-
-/* Read a byte into variable V.
- * If must suspend, take the specified action (typically "return FALSE").
- */
-#define INPUT_BYTE(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = GETJOCTET(*next_input_byte++); )
-
-/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
- * V should be declared unsigned int or perhaps INT32.
- */
-#define INPUT_2BYTES(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
-		  MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V += GETJOCTET(*next_input_byte++); )
-
-
-/*
- * Routines to process JPEG markers.
- *
- * Entry condition: JPEG marker itself has been read and its code saved
- *   in cinfo->unread_marker; input restart point is just after the marker.
- *
- * Exit: if return TRUE, have read and processed any parameters, and have
- *   updated the restart point to point after the parameters.
- *   If return FALSE, was forced to suspend before reaching end of
- *   marker parameters; restart point has not been moved.  Same routine
- *   will be called again after application supplies more input data.
- *
- * This approach to suspension assumes that all of a marker's parameters
- * can fit into a single input bufferload.  This should hold for "normal"
- * markers.  Some COM/APPn markers might have large parameter segments
- * that might not fit.  If we are simply dropping such a marker, we use
- * skip_input_data to get past it, and thereby put the problem on the
- * source manager's shoulders.  If we are saving the marker's contents
- * into memory, we use a slightly different convention: when forced to
- * suspend, the marker processor updates the restart point to the end of
- * what it's consumed (ie, the end of the buffer) before returning FALSE.
- * On resumption, cinfo->unread_marker still contains the marker code,
- * but the data source will point to the next chunk of marker data.
- * The marker processor must retain internal state to deal with this.
- *
- * Note that we don't bother to avoid duplicate trace messages if a
- * suspension occurs within marker parameters.  Other side effects
- * require more care.
- */
-
-
-LOCAL(boolean)
-get_soi (j_decompress_ptr cinfo)
-/* Process an SOI marker */
-{
-  int i;
-  
-  TRACEMS(cinfo, 1, JTRC_SOI);
-
-  if (cinfo->marker->saw_SOI)
-    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
-
-  /* Reset all parameters that are defined to be reset by SOI */
-
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    cinfo->arith_dc_L[i] = 0;
-    cinfo->arith_dc_U[i] = 1;
-    cinfo->arith_ac_K[i] = 5;
-  }
-  cinfo->restart_interval = 0;
-
-  /* Set initial assumptions for colorspace etc */
-
-  cinfo->jpeg_color_space = JCS_UNKNOWN;
-  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
-
-  cinfo->saw_JFIF_marker = FALSE;
-  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
-  cinfo->JFIF_minor_version = 1;
-  cinfo->density_unit = 0;
-  cinfo->X_density = 1;
-  cinfo->Y_density = 1;
-  cinfo->saw_Adobe_marker = FALSE;
-  cinfo->Adobe_transform = 0;
-
-  cinfo->marker->saw_SOI = TRUE;
-
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_sof (j_decompress_ptr cinfo, boolean is_baseline, boolean is_prog,
-	 boolean is_arith)
-/* Process a SOFn marker */
-{
-  INT32 length;
-  int c, ci;
-  jpeg_component_info * compptr;
-  INPUT_VARS(cinfo);
-
-  cinfo->is_baseline = is_baseline;
-  cinfo->progressive_mode = is_prog;
-  cinfo->arith_code = is_arith;
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-
-  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
-  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
-  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
-  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
-
-  length -= 8;
-
-  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
-	   (int) cinfo->image_width, (int) cinfo->image_height,
-	   cinfo->num_components);
-
-  if (cinfo->marker->saw_SOF)
-    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
-
-  /* We don't support files in which the image height is initially specified */
-  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
-  /* might as well have a general sanity check. */
-  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
-      || cinfo->num_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  if (length != (cinfo->num_components * 3))
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
-    cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
-			((j_common_ptr) cinfo, JPOOL_IMAGE,
-			 cinfo->num_components * SIZEOF(jpeg_component_info));
-  
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    compptr->component_index = ci;
-    INPUT_BYTE(cinfo, compptr->component_id, return FALSE);
-    INPUT_BYTE(cinfo, c, return FALSE);
-    compptr->h_samp_factor = (c >> 4) & 15;
-    compptr->v_samp_factor = (c     ) & 15;
-    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
-
-    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
-	     compptr->component_id, compptr->h_samp_factor,
-	     compptr->v_samp_factor, compptr->quant_tbl_no);
-  }
-
-  cinfo->marker->saw_SOF = TRUE;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_sos (j_decompress_ptr cinfo)
-/* Process a SOS marker */
-{
-  INT32 length;
-  int i, ci, n, c, cc;
-  jpeg_component_info * compptr;
-  INPUT_VARS(cinfo);
-
-  if (! cinfo->marker->saw_SOF)
-    ERREXIT(cinfo, JERR_SOS_NO_SOF);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-
-  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
-
-  TRACEMS1(cinfo, 1, JTRC_SOS, n);
-
-  if (length != (n * 2 + 6) || n > MAX_COMPS_IN_SCAN ||
-      (n == 0 && !cinfo->progressive_mode))
-      /* pseudo SOS marker only allowed in progressive mode */
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  cinfo->comps_in_scan = n;
-
-  /* Collect the component-spec parameters */
-
-  for (i = 0; i < n; i++) {
-    INPUT_BYTE(cinfo, cc, return FALSE);
-    INPUT_BYTE(cinfo, c, return FALSE);
-    
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      if (cc == compptr->component_id)
-	goto id_found;
-    }
-
-    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
-
-  id_found:
-
-    cinfo->cur_comp_info[i] = compptr;
-    compptr->dc_tbl_no = (c >> 4) & 15;
-    compptr->ac_tbl_no = (c     ) & 15;
-    
-    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc,
-	     compptr->dc_tbl_no, compptr->ac_tbl_no);
-  }
-
-  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
-  INPUT_BYTE(cinfo, c, return FALSE);
-  cinfo->Ss = c;
-  INPUT_BYTE(cinfo, c, return FALSE);
-  cinfo->Se = c;
-  INPUT_BYTE(cinfo, c, return FALSE);
-  cinfo->Ah = (c >> 4) & 15;
-  cinfo->Al = (c     ) & 15;
-
-  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
-	   cinfo->Ah, cinfo->Al);
-
-  /* Prepare to scan data & restart markers */
-  cinfo->marker->next_restart_num = 0;
-
-  /* Count another (non-pseudo) SOS marker */
-  if (n) cinfo->input_scan_number++;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-#ifdef D_ARITH_CODING_SUPPORTED
-
-LOCAL(boolean)
-get_dac (j_decompress_ptr cinfo)
-/* Process a DAC marker */
-{
-  INT32 length;
-  int index, val;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-  
-  while (length > 0) {
-    INPUT_BYTE(cinfo, index, return FALSE);
-    INPUT_BYTE(cinfo, val, return FALSE);
-
-    length -= 2;
-
-    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
-
-    if (index < 0 || index >= (2*NUM_ARITH_TBLS))
-      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
-
-    if (index >= NUM_ARITH_TBLS) { /* define AC table */
-      cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
-    } else {			/* define DC table */
-      cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
-      cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
-      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
-	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
-    }
-  }
-
-  if (length != 0)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-#else /* ! D_ARITH_CODING_SUPPORTED */
-
-#define get_dac(cinfo)  skip_variable(cinfo)
-
-#endif /* D_ARITH_CODING_SUPPORTED */
-
-
-LOCAL(boolean)
-get_dht (j_decompress_ptr cinfo)
-/* Process a DHT marker */
-{
-  INT32 length;
-  UINT8 bits[17];
-  UINT8 huffval[256];
-  int i, index, count;
-  JHUFF_TBL **htblptr;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-  
-  while (length > 16) {
-    INPUT_BYTE(cinfo, index, return FALSE);
-
-    TRACEMS1(cinfo, 1, JTRC_DHT, index);
-      
-    bits[0] = 0;
-    count = 0;
-    for (i = 1; i <= 16; i++) {
-      INPUT_BYTE(cinfo, bits[i], return FALSE);
-      count += bits[i];
-    }
-
-    length -= 1 + 16;
-
-    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[1], bits[2], bits[3], bits[4],
-	     bits[5], bits[6], bits[7], bits[8]);
-    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[9], bits[10], bits[11], bits[12],
-	     bits[13], bits[14], bits[15], bits[16]);
-
-    /* Here we just do minimal validation of the counts to avoid walking
-     * off the end of our table space.  jdhuff.c will check more carefully.
-     */
-    if (count > 256 || ((INT32) count) > length)
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-
-    for (i = 0; i < count; i++)
-      INPUT_BYTE(cinfo, huffval[i], return FALSE);
-
-    length -= count;
-
-    if (index & 0x10) {		/* AC table definition */
-      index -= 0x10;
-      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
-    } else {			/* DC table definition */
-      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
-    }
-
-    if (index < 0 || index >= NUM_HUFF_TBLS)
-      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
-
-    if (*htblptr == NULL)
-      *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-  
-    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
-    MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval));
-  }
-
-  if (length != 0)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_dqt (j_decompress_ptr cinfo)
-/* Process a DQT marker */
-{
-  INT32 length, count, i;
-  int n, prec;
-  unsigned int tmp;
-  JQUANT_TBL *quant_ptr;
-  const int *natural_order;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-
-  while (length > 0) {
-    length--;
-    INPUT_BYTE(cinfo, n, return FALSE);
-    prec = n >> 4;
-    n &= 0x0F;
-
-    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
-
-    if (n >= NUM_QUANT_TBLS)
-      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
-      
-    if (cinfo->quant_tbl_ptrs[n] == NULL)
-      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
-    quant_ptr = cinfo->quant_tbl_ptrs[n];
-
-    if (prec) {
-      if (length < DCTSIZE2 * 2) {
-	/* Initialize full table for safety. */
-	for (i = 0; i < DCTSIZE2; i++) {
-	  quant_ptr->quantval[i] = 1;
-	}
-	count = length >> 1;
-      } else
-	count = DCTSIZE2;
-    } else {
-      if (length < DCTSIZE2) {
-	/* Initialize full table for safety. */
-	for (i = 0; i < DCTSIZE2; i++) {
-	  quant_ptr->quantval[i] = 1;
-	}
-	count = length;
-      } else
-	count = DCTSIZE2;
-    }
-
-    switch (count) {
-    case (2*2): natural_order = jpeg_natural_order2; break;
-    case (3*3): natural_order = jpeg_natural_order3; break;
-    case (4*4): natural_order = jpeg_natural_order4; break;
-    case (5*5): natural_order = jpeg_natural_order5; break;
-    case (6*6): natural_order = jpeg_natural_order6; break;
-    case (7*7): natural_order = jpeg_natural_order7; break;
-    default:    natural_order = jpeg_natural_order;  break;
-    }
-
-    for (i = 0; i < count; i++) {
-      if (prec)
-	INPUT_2BYTES(cinfo, tmp, return FALSE);
-      else
-	INPUT_BYTE(cinfo, tmp, return FALSE);
-      /* We convert the zigzag-order table to natural array order. */
-      quant_ptr->quantval[natural_order[i]] = (UINT16) tmp;
-    }
-
-    if (cinfo->err->trace_level >= 2) {
-      for (i = 0; i < DCTSIZE2; i += 8) {
-	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
-		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
-		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
-		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
-		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
-      }
-    }
-
-    length -= count;
-    if (prec) length -= count;
-  }
-
-  if (length != 0)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_dri (j_decompress_ptr cinfo)
-/* Process a DRI marker */
-{
-  INT32 length;
-  unsigned int tmp;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  
-  if (length != 4)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-
-  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
-
-  cinfo->restart_interval = tmp;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-/*
- * Routines for processing APPn and COM markers.
- * These are either saved in memory or discarded, per application request.
- * APP0 and APP14 are specially checked to see if they are
- * JFIF and Adobe markers, respectively.
- */
-
-#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
-#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
-#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
-
-
-LOCAL(void)
-examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	      unsigned int datalen, INT32 remaining)
-/* Examine first few bytes from an APP0.
- * Take appropriate action if it is a JFIF marker.
- * datalen is # of bytes at data[], remaining is length of rest of marker data.
- */
-{
-  INT32 totallen = (INT32) datalen + remaining;
-
-  if (datalen >= APP0_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x4A &&
-      GETJOCTET(data[1]) == 0x46 &&
-      GETJOCTET(data[2]) == 0x49 &&
-      GETJOCTET(data[3]) == 0x46 &&
-      GETJOCTET(data[4]) == 0) {
-    /* Found JFIF APP0 marker: save info */
-    cinfo->saw_JFIF_marker = TRUE;
-    cinfo->JFIF_major_version = GETJOCTET(data[5]);
-    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
-    cinfo->density_unit = GETJOCTET(data[7]);
-    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
-    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
-    /* Check version.
-     * Major version must be 1, anything else signals an incompatible change.
-     * (We used to treat this as an error, but now it's a nonfatal warning,
-     * because some bozo at Hijaak couldn't read the spec.)
-     * Minor version should be 0..2, but process anyway if newer.
-     */
-    if (cinfo->JFIF_major_version != 1)
-      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
-	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
-    /* Generate trace messages */
-    TRACEMS5(cinfo, 1, JTRC_JFIF,
-	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
-	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
-    /* Validate thumbnail dimensions and issue appropriate messages */
-    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
-      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
-	       GETJOCTET(data[12]), GETJOCTET(data[13]));
-    totallen -= APP0_DATA_LEN;
-    if (totallen !=
-	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
-      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
-  } else if (datalen >= 6 &&
-      GETJOCTET(data[0]) == 0x4A &&
-      GETJOCTET(data[1]) == 0x46 &&
-      GETJOCTET(data[2]) == 0x58 &&
-      GETJOCTET(data[3]) == 0x58 &&
-      GETJOCTET(data[4]) == 0) {
-    /* Found JFIF "JFXX" extension APP0 marker */
-    /* The library doesn't actually do anything with these,
-     * but we try to produce a helpful trace message.
-     */
-    switch (GETJOCTET(data[5])) {
-    case 0x10:
-      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen);
-      break;
-    case 0x11:
-      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen);
-      break;
-    case 0x13:
-      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen);
-      break;
-    default:
-      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
-	       GETJOCTET(data[5]), (int) totallen);
-      break;
-    }
-  } else {
-    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
-    TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen);
-  }
-}
-
-
-LOCAL(void)
-examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	       unsigned int datalen, INT32 remaining)
-/* Examine first few bytes from an APP14.
- * Take appropriate action if it is an Adobe marker.
- * datalen is # of bytes at data[], remaining is length of rest of marker data.
- */
-{
-  unsigned int version, flags0, flags1, transform;
-
-  if (datalen >= APP14_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x41 &&
-      GETJOCTET(data[1]) == 0x64 &&
-      GETJOCTET(data[2]) == 0x6F &&
-      GETJOCTET(data[3]) == 0x62 &&
-      GETJOCTET(data[4]) == 0x65) {
-    /* Found Adobe APP14 marker */
-    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
-    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
-    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
-    transform = GETJOCTET(data[11]);
-    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
-    cinfo->saw_Adobe_marker = TRUE;
-    cinfo->Adobe_transform = (UINT8) transform;
-  } else {
-    /* Start of APP14 does not match "Adobe", or too short */
-    TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining));
-  }
-}
-
-
-METHODDEF(boolean)
-get_interesting_appn (j_decompress_ptr cinfo)
-/* Process an APP0 or APP14 marker without saving it */
-{
-  INT32 length;
-  JOCTET b[APPN_DATA_LEN];
-  unsigned int i, numtoread;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-
-  /* get the interesting part of the marker data */
-  if (length >= APPN_DATA_LEN)
-    numtoread = APPN_DATA_LEN;
-  else if (length > 0)
-    numtoread = (unsigned int) length;
-  else
-    numtoread = 0;
-  for (i = 0; i < numtoread; i++)
-    INPUT_BYTE(cinfo, b[i], return FALSE);
-  length -= numtoread;
-
-  /* process it */
-  switch (cinfo->unread_marker) {
-  case M_APP0:
-    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
-    break;
-  case M_APP14:
-    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
-    break;
-  default:
-    /* can't get here unless jpeg_save_markers chooses wrong processor */
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
-    break;
-  }
-
-  /* skip any remaining data -- could be lots */
-  INPUT_SYNC(cinfo);
-  if (length > 0)
-    (*cinfo->src->skip_input_data) (cinfo, (long) length);
-
-  return TRUE;
-}
-
-
-#ifdef SAVE_MARKERS_SUPPORTED
-
-METHODDEF(boolean)
-save_marker (j_decompress_ptr cinfo)
-/* Save an APPn or COM marker into the marker list */
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
-  unsigned int bytes_read, data_length;
-  JOCTET FAR * data;
-  INT32 length = 0;
-  INPUT_VARS(cinfo);
-
-  if (cur_marker == NULL) {
-    /* begin reading a marker */
-    INPUT_2BYTES(cinfo, length, return FALSE);
-    length -= 2;
-    if (length >= 0) {		/* watch out for bogus length word */
-      /* figure out how much we want to save */
-      unsigned int limit;
-      if (cinfo->unread_marker == (int) M_COM)
-	limit = marker->length_limit_COM;
-      else
-	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
-      if ((unsigned int) length < limit)
-	limit = (unsigned int) length;
-      /* allocate and initialize the marker item */
-      cur_marker = (jpeg_saved_marker_ptr)
-	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				    SIZEOF(struct jpeg_marker_struct) + limit);
-      cur_marker->next = NULL;
-      cur_marker->marker = (UINT8) cinfo->unread_marker;
-      cur_marker->original_length = (unsigned int) length;
-      cur_marker->data_length = limit;
-      /* data area is just beyond the jpeg_marker_struct */
-      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
-      marker->cur_marker = cur_marker;
-      marker->bytes_read = 0;
-      bytes_read = 0;
-      data_length = limit;
-    } else {
-      /* deal with bogus length word */
-      bytes_read = data_length = 0;
-      data = NULL;
-    }
-  } else {
-    /* resume reading a marker */
-    bytes_read = marker->bytes_read;
-    data_length = cur_marker->data_length;
-    data = cur_marker->data + bytes_read;
-  }
-
-  while (bytes_read < data_length) {
-    INPUT_SYNC(cinfo);		/* move the restart point to here */
-    marker->bytes_read = bytes_read;
-    /* If there's not at least one byte in buffer, suspend */
-    MAKE_BYTE_AVAIL(cinfo, return FALSE);
-    /* Copy bytes with reasonable rapidity */
-    while (bytes_read < data_length && bytes_in_buffer > 0) {
-      *data++ = *next_input_byte++;
-      bytes_in_buffer--;
-      bytes_read++;
-    }
-  }
-
-  /* Done reading what we want to read */
-  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
-    /* Add new marker to end of list */
-    if (cinfo->marker_list == NULL) {
-      cinfo->marker_list = cur_marker;
-    } else {
-      jpeg_saved_marker_ptr prev = cinfo->marker_list;
-      while (prev->next != NULL)
-	prev = prev->next;
-      prev->next = cur_marker;
-    }
-    /* Reset pointer & calc remaining data length */
-    data = cur_marker->data;
-    length = cur_marker->original_length - data_length;
-  }
-  /* Reset to initial state for next marker */
-  marker->cur_marker = NULL;
-
-  /* Process the marker if interesting; else just make a generic trace msg */
-  switch (cinfo->unread_marker) {
-  case M_APP0:
-    examine_app0(cinfo, data, data_length, length);
-    break;
-  case M_APP14:
-    examine_app14(cinfo, data, data_length, length);
-    break;
-  default:
-    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
-	     (int) (data_length + length));
-    break;
-  }
-
-  /* skip any remaining data -- could be lots */
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
-  if (length > 0)
-    (*cinfo->src->skip_input_data) (cinfo, (long) length);
-
-  return TRUE;
-}
-
-#endif /* SAVE_MARKERS_SUPPORTED */
-
-
-METHODDEF(boolean)
-skip_variable (j_decompress_ptr cinfo)
-/* Skip over an unknown or uninteresting variable-length marker */
-{
-  INT32 length;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-  
-  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
-
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
-  if (length > 0)
-    (*cinfo->src->skip_input_data) (cinfo, (long) length);
-
-  return TRUE;
-}
-
-
-/*
- * Find the next JPEG marker, save it in cinfo->unread_marker.
- * Returns FALSE if had to suspend before reaching a marker;
- * in that case cinfo->unread_marker is unchanged.
- *
- * Note that the result might not be a valid marker code,
- * but it will never be 0 or FF.
- */
-
-LOCAL(boolean)
-next_marker (j_decompress_ptr cinfo)
-{
-  int c;
-  INPUT_VARS(cinfo);
-
-  for (;;) {
-    INPUT_BYTE(cinfo, c, return FALSE);
-    /* Skip any non-FF bytes.
-     * This may look a bit inefficient, but it will not occur in a valid file.
-     * We sync after each discarded byte so that a suspending data source
-     * can discard the byte from its buffer.
-     */
-    while (c != 0xFF) {
-      cinfo->marker->discarded_bytes++;
-      INPUT_SYNC(cinfo);
-      INPUT_BYTE(cinfo, c, return FALSE);
-    }
-    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
-     * pad bytes, so don't count them in discarded_bytes.  We assume there
-     * will not be so many consecutive FF bytes as to overflow a suspending
-     * data source's input buffer.
-     */
-    do {
-      INPUT_BYTE(cinfo, c, return FALSE);
-    } while (c == 0xFF);
-    if (c != 0)
-      break;			/* found a valid marker, exit loop */
-    /* Reach here if we found a stuffed-zero data sequence (FF/00).
-     * Discard it and loop back to try again.
-     */
-    cinfo->marker->discarded_bytes += 2;
-    INPUT_SYNC(cinfo);
-  }
-
-  if (cinfo->marker->discarded_bytes != 0) {
-    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
-    cinfo->marker->discarded_bytes = 0;
-  }
-
-  cinfo->unread_marker = c;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-first_marker (j_decompress_ptr cinfo)
-/* Like next_marker, but used to obtain the initial SOI marker. */
-/* For this marker, we do not allow preceding garbage or fill; otherwise,
- * we might well scan an entire input file before realizing it ain't JPEG.
- * If an application wants to process non-JFIF files, it must seek to the
- * SOI before calling the JPEG library.
- */
-{
-  int c, c2;
-  INPUT_VARS(cinfo);
-
-  INPUT_BYTE(cinfo, c, return FALSE);
-  INPUT_BYTE(cinfo, c2, return FALSE);
-  if (c != 0xFF || c2 != (int) M_SOI)
-    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
-
-  cinfo->unread_marker = c2;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-/*
- * Read markers until SOS or EOI.
- *
- * Returns same codes as are defined for jpeg_consume_input:
- * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
- *
- * Note: This function may return a pseudo SOS marker (with zero
- * component number) for treat by input controller's consume_input.
- * consume_input itself should filter out (skip) the pseudo marker
- * after processing for the caller.
- */
-
-METHODDEF(int)
-read_markers (j_decompress_ptr cinfo)
-{
-  /* Outer loop repeats once for each marker. */
-  for (;;) {
-    /* Collect the marker proper, unless we already did. */
-    /* NB: first_marker() enforces the requirement that SOI appear first. */
-    if (cinfo->unread_marker == 0) {
-      if (! cinfo->marker->saw_SOI) {
-	if (! first_marker(cinfo))
-	  return JPEG_SUSPENDED;
-      } else {
-	if (! next_marker(cinfo))
-	  return JPEG_SUSPENDED;
-      }
-    }
-    /* At this point cinfo->unread_marker contains the marker code and the
-     * input point is just past the marker proper, but before any parameters.
-     * A suspension will cause us to return with this state still true.
-     */
-    switch (cinfo->unread_marker) {
-    case M_SOI:
-      if (! get_soi(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF0:		/* Baseline */
-      if (! get_sof(cinfo, TRUE, FALSE, FALSE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF1:		/* Extended sequential, Huffman */
-      if (! get_sof(cinfo, FALSE, FALSE, FALSE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF2:		/* Progressive, Huffman */
-      if (! get_sof(cinfo, FALSE, TRUE, FALSE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF9:		/* Extended sequential, arithmetic */
-      if (! get_sof(cinfo, FALSE, FALSE, TRUE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF10:		/* Progressive, arithmetic */
-      if (! get_sof(cinfo, FALSE, TRUE, TRUE))
-	return JPEG_SUSPENDED;
-      break;
-
-    /* Currently unsupported SOFn types */
-    case M_SOF3:		/* Lossless, Huffman */
-    case M_SOF5:		/* Differential sequential, Huffman */
-    case M_SOF6:		/* Differential progressive, Huffman */
-    case M_SOF7:		/* Differential lossless, Huffman */
-    case M_JPG:			/* Reserved for JPEG extensions */
-    case M_SOF11:		/* Lossless, arithmetic */
-    case M_SOF13:		/* Differential sequential, arithmetic */
-    case M_SOF14:		/* Differential progressive, arithmetic */
-    case M_SOF15:		/* Differential lossless, arithmetic */
-      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
-      break;
-
-    case M_SOS:
-      if (! get_sos(cinfo))
-	return JPEG_SUSPENDED;
-      cinfo->unread_marker = 0;	/* processed the marker */
-      return JPEG_REACHED_SOS;
-    
-    case M_EOI:
-      TRACEMS(cinfo, 1, JTRC_EOI);
-      cinfo->unread_marker = 0;	/* processed the marker */
-      return JPEG_REACHED_EOI;
-      
-    case M_DAC:
-      if (! get_dac(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-      
-    case M_DHT:
-      if (! get_dht(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-      
-    case M_DQT:
-      if (! get_dqt(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-      
-    case M_DRI:
-      if (! get_dri(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-      
-    case M_APP0:
-    case M_APP1:
-    case M_APP2:
-    case M_APP3:
-    case M_APP4:
-    case M_APP5:
-    case M_APP6:
-    case M_APP7:
-    case M_APP8:
-    case M_APP9:
-    case M_APP10:
-    case M_APP11:
-    case M_APP12:
-    case M_APP13:
-    case M_APP14:
-    case M_APP15:
-      if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
-		cinfo->unread_marker - (int) M_APP0]) (cinfo))
-	return JPEG_SUSPENDED;
-      break;
-      
-    case M_COM:
-      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_RST0:		/* these are all parameterless */
-    case M_RST1:
-    case M_RST2:
-    case M_RST3:
-    case M_RST4:
-    case M_RST5:
-    case M_RST6:
-    case M_RST7:
-    case M_TEM:
-      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
-      break;
-
-    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
-      if (! skip_variable(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    default:			/* must be DHP, EXP, JPGn, or RESn */
-      /* For now, we treat the reserved markers as fatal errors since they are
-       * likely to be used to signal incompatible JPEG Part 3 extensions.
-       * Once the JPEG 3 version-number marker is well defined, this code
-       * ought to change!
-       */
-      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
-      break;
-    }
-    /* Successfully processed marker, so reset state variable */
-    cinfo->unread_marker = 0;
-  } /* end loop */
-}
-
-
-/*
- * Read a restart marker, which is expected to appear next in the datastream;
- * if the marker is not there, take appropriate recovery action.
- * Returns FALSE if suspension is required.
- *
- * This is called by the entropy decoder after it has read an appropriate
- * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
- * has already read a marker from the data source.  Under normal conditions
- * cinfo->unread_marker will be reset to 0 before returning; if not reset,
- * it holds a marker which the decoder will be unable to read past.
- */
-
-METHODDEF(boolean)
-read_restart_marker (j_decompress_ptr cinfo)
-{
-  /* Obtain a marker unless we already did. */
-  /* Note that next_marker will complain if it skips any data. */
-  if (cinfo->unread_marker == 0) {
-    if (! next_marker(cinfo))
-      return FALSE;
-  }
-
-  if (cinfo->unread_marker ==
-      ((int) M_RST0 + cinfo->marker->next_restart_num)) {
-    /* Normal case --- swallow the marker and let entropy decoder continue */
-    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
-    cinfo->unread_marker = 0;
-  } else {
-    /* Uh-oh, the restart markers have been messed up. */
-    /* Let the data source manager determine how to resync. */
-    if (! (*cinfo->src->resync_to_restart) (cinfo,
-					    cinfo->marker->next_restart_num))
-      return FALSE;
-  }
-
-  /* Update next-restart state */
-  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
-
-  return TRUE;
-}
-
-
-/*
- * This is the default resync_to_restart method for data source managers
- * to use if they don't have any better approach.  Some data source managers
- * may be able to back up, or may have additional knowledge about the data
- * which permits a more intelligent recovery strategy; such managers would
- * presumably supply their own resync method.
- *
- * read_restart_marker calls resync_to_restart if it finds a marker other than
- * the restart marker it was expecting.  (This code is *not* used unless
- * a nonzero restart interval has been declared.)  cinfo->unread_marker is
- * the marker code actually found (might be anything, except 0 or FF).
- * The desired restart marker number (0..7) is passed as a parameter.
- * This routine is supposed to apply whatever error recovery strategy seems
- * appropriate in order to position the input stream to the next data segment.
- * Note that cinfo->unread_marker is treated as a marker appearing before
- * the current data-source input point; usually it should be reset to zero
- * before returning.
- * Returns FALSE if suspension is required.
- *
- * This implementation is substantially constrained by wanting to treat the
- * input as a data stream; this means we can't back up.  Therefore, we have
- * only the following actions to work with:
- *   1. Simply discard the marker and let the entropy decoder resume at next
- *      byte of file.
- *   2. Read forward until we find another marker, discarding intervening
- *      data.  (In theory we could look ahead within the current bufferload,
- *      without having to discard data if we don't find the desired marker.
- *      This idea is not implemented here, in part because it makes behavior
- *      dependent on buffer size and chance buffer-boundary positions.)
- *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
- *      This will cause the entropy decoder to process an empty data segment,
- *      inserting dummy zeroes, and then we will reprocess the marker.
- *
- * #2 is appropriate if we think the desired marker lies ahead, while #3 is
- * appropriate if the found marker is a future restart marker (indicating
- * that we have missed the desired restart marker, probably because it got
- * corrupted).
- * We apply #2 or #3 if the found marker is a restart marker no more than
- * two counts behind or ahead of the expected one.  We also apply #2 if the
- * found marker is not a legal JPEG marker code (it's certainly bogus data).
- * If the found marker is a restart marker more than 2 counts away, we do #1
- * (too much risk that the marker is erroneous; with luck we will be able to
- * resync at some future point).
- * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
- * overrunning the end of a scan.  An implementation limited to single-scan
- * files might find it better to apply #2 for markers other than EOI, since
- * any other marker would have to be bogus data in that case.
- */
-
-GLOBAL(boolean)
-jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired)
-{
-  int marker = cinfo->unread_marker;
-  int action = 1;
-  
-  /* Always put up a warning. */
-  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
-  
-  /* Outer loop handles repeated decision after scanning forward. */
-  for (;;) {
-    if (marker < (int) M_SOF0)
-      action = 2;		/* invalid marker */
-    else if (marker < (int) M_RST0 || marker > (int) M_RST7)
-      action = 3;		/* valid non-restart marker */
-    else {
-      if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
-	  marker == ((int) M_RST0 + ((desired+2) & 7)))
-	action = 3;		/* one of the next two expected restarts */
-      else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
-	       marker == ((int) M_RST0 + ((desired-2) & 7)))
-	action = 2;		/* a prior restart, so advance */
-      else
-	action = 1;		/* desired restart or too far away */
-    }
-    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
-    switch (action) {
-    case 1:
-      /* Discard marker and let entropy decoder resume processing. */
-      cinfo->unread_marker = 0;
-      return TRUE;
-    case 2:
-      /* Scan to the next marker, and repeat the decision loop. */
-      if (! next_marker(cinfo))
-	return FALSE;
-      marker = cinfo->unread_marker;
-      break;
-    case 3:
-      /* Return without advancing past this marker. */
-      /* Entropy decoder will be forced to process an empty segment. */
-      return TRUE;
-    }
-  } /* end loop */
-}
-
-
-/*
- * Reset marker processing state to begin a fresh datastream.
- */
-
-METHODDEF(void)
-reset_marker_reader (j_decompress_ptr cinfo)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-
-  cinfo->comp_info = NULL;		/* until allocated by get_sof */
-  cinfo->input_scan_number = 0;		/* no SOS seen yet */
-  cinfo->unread_marker = 0;		/* no pending marker */
-  marker->pub.saw_SOI = FALSE;		/* set internal state too */
-  marker->pub.saw_SOF = FALSE;
-  marker->pub.discarded_bytes = 0;
-  marker->cur_marker = NULL;
-}
-
-
-/*
- * Initialize the marker reader module.
- * This is called only once, when the decompression object is created.
- */
-
-GLOBAL(void)
-jinit_marker_reader (j_decompress_ptr cinfo)
-{
-  my_marker_ptr marker;
-  int i;
-
-  /* Create subobject in permanent pool */
-  marker = (my_marker_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				SIZEOF(my_marker_reader));
-  cinfo->marker = (struct jpeg_marker_reader *) marker;
-  /* Initialize public method pointers */
-  marker->pub.reset_marker_reader = reset_marker_reader;
-  marker->pub.read_markers = read_markers;
-  marker->pub.read_restart_marker = read_restart_marker;
-  /* Initialize COM/APPn processing.
-   * By default, we examine and then discard APP0 and APP14,
-   * but simply discard COM and all other APPn.
-   */
-  marker->process_COM = skip_variable;
-  marker->length_limit_COM = 0;
-  for (i = 0; i < 16; i++) {
-    marker->process_APPn[i] = skip_variable;
-    marker->length_limit_APPn[i] = 0;
-  }
-  marker->process_APPn[0] = get_interesting_appn;
-  marker->process_APPn[14] = get_interesting_appn;
-  /* Reset marker processing state */
-  reset_marker_reader(cinfo);
-}
-
-
-/*
- * Control saving of COM and APPn markers into marker_list.
- */
-
-#ifdef SAVE_MARKERS_SUPPORTED
-
-GLOBAL(void)
-jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
-		   unsigned int length_limit)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-  long maxlength;
-  jpeg_marker_parser_method processor;
-
-  /* Length limit mustn't be larger than what we can allocate
-   * (should only be a concern in a 16-bit environment).
-   */
-  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
-  if (((long) length_limit) > maxlength)
-    length_limit = (unsigned int) maxlength;
-
-  /* Choose processor routine to use.
-   * APP0/APP14 have special requirements.
-   */
-  if (length_limit) {
-    processor = save_marker;
-    /* If saving APP0/APP14, save at least enough for our internal use. */
-    if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN)
-      length_limit = APP0_DATA_LEN;
-    else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN)
-      length_limit = APP14_DATA_LEN;
-  } else {
-    processor = skip_variable;
-    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
-    if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14)
-      processor = get_interesting_appn;
-  }
-
-  if (marker_code == (int) M_COM) {
-    marker->process_COM = processor;
-    marker->length_limit_COM = length_limit;
-  } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) {
-    marker->process_APPn[marker_code - (int) M_APP0] = processor;
-    marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit;
-  } else
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
-}
-
-#endif /* SAVE_MARKERS_SUPPORTED */
-
-
-/*
- * Install a special processing method for COM or APPn markers.
- */
-
-GLOBAL(void)
-jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
-			   jpeg_marker_parser_method routine)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-
-  if (marker_code == (int) M_COM)
-    marker->process_COM = routine;
-  else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15)
-    marker->process_APPn[marker_code - (int) M_APP0] = routine;
-  else
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
-}
+/*
+ * jdmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2009-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to decode JPEG datastream markers.
+ * Most of the complexity arises from our desire to support input
+ * suspension: if not all of the data for a marker is available,
+ * we must exit back to the application.  On resumption, we reprocess
+ * the marker.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+
+  M_DHT   = 0xc4,
+
+  M_DAC   = 0xcc,
+
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+
+  M_JPG0  = 0xf0,
+  M_JPG8  = 0xf8,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+
+  M_TEM   = 0x01,
+
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_reader pub; /* public fields */
+
+  /* Application-overridable marker processing methods */
+  jpeg_marker_parser_method process_COM;
+  jpeg_marker_parser_method process_APPn[16];
+
+  /* Limit on marker data length to save for each marker type */
+  unsigned int length_limit_COM;
+  unsigned int length_limit_APPn[16];
+
+  /* Status of COM/APPn marker saving */
+  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
+  unsigned int bytes_read;		/* data bytes read so far in marker */
+  /* Note: cur_marker is not linked into marker_list until it's all read. */
+} my_marker_reader;
+
+typedef my_marker_reader * my_marker_ptr;
+
+
+/*
+ * Macros for fetching data from the data source module.
+ *
+ * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
+ * the current restart point; we update them only when we have reached a
+ * suitable place to restart if a suspension occurs.
+ */
+
+/* Declare and initialize local copies of input pointer/count */
+#define INPUT_VARS(cinfo)  \
+	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
+	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
+	size_t bytes_in_buffer = datasrc->bytes_in_buffer
+
+/* Unload the local copies --- do this only at a restart boundary */
+#define INPUT_SYNC(cinfo)  \
+	( datasrc->next_input_byte = next_input_byte,  \
+	  datasrc->bytes_in_buffer = bytes_in_buffer )
+
+/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
+#define INPUT_RELOAD(cinfo)  \
+	( next_input_byte = datasrc->next_input_byte,  \
+	  bytes_in_buffer = datasrc->bytes_in_buffer )
+
+/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
+ * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
+ * but we must reload the local copies after a successful fill.
+ */
+#define MAKE_BYTE_AVAIL(cinfo,action)  \
+	if (bytes_in_buffer == 0) {  \
+	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
+	    { action; }  \
+	  INPUT_RELOAD(cinfo);  \
+	}
+
+/* Read a byte into variable V.
+ * If must suspend, take the specified action (typically "return FALSE").
+ */
+#define INPUT_BYTE(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = GETJOCTET(*next_input_byte++); )
+
+/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
+ * V should be declared unsigned int or perhaps INT32.
+ */
+#define INPUT_2BYTES(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
+		  MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V += GETJOCTET(*next_input_byte++); )
+
+
+/*
+ * Routines to process JPEG markers.
+ *
+ * Entry condition: JPEG marker itself has been read and its code saved
+ *   in cinfo->unread_marker; input restart point is just after the marker.
+ *
+ * Exit: if return TRUE, have read and processed any parameters, and have
+ *   updated the restart point to point after the parameters.
+ *   If return FALSE, was forced to suspend before reaching end of
+ *   marker parameters; restart point has not been moved.  Same routine
+ *   will be called again after application supplies more input data.
+ *
+ * This approach to suspension assumes that all of a marker's parameters
+ * can fit into a single input bufferload.  This should hold for "normal"
+ * markers.  Some COM/APPn markers might have large parameter segments
+ * that might not fit.  If we are simply dropping such a marker, we use
+ * skip_input_data to get past it, and thereby put the problem on the
+ * source manager's shoulders.  If we are saving the marker's contents
+ * into memory, we use a slightly different convention: when forced to
+ * suspend, the marker processor updates the restart point to the end of
+ * what it's consumed (ie, the end of the buffer) before returning FALSE.
+ * On resumption, cinfo->unread_marker still contains the marker code,
+ * but the data source will point to the next chunk of marker data.
+ * The marker processor must retain internal state to deal with this.
+ *
+ * Note that we don't bother to avoid duplicate trace messages if a
+ * suspension occurs within marker parameters.  Other side effects
+ * require more care.
+ */
+
+
+LOCAL(boolean)
+get_soi (j_decompress_ptr cinfo)
+/* Process an SOI marker */
+{
+  int i;
+  
+  TRACEMS(cinfo, 1, JTRC_SOI);
+
+  if (cinfo->marker->saw_SOI)
+    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
+
+  /* Reset all parameters that are defined to be reset by SOI */
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+  cinfo->restart_interval = 0;
+
+  /* Set initial assumptions for colorspace etc */
+
+  cinfo->jpeg_color_space = JCS_UNKNOWN;
+  cinfo->color_transform = JCT_NONE;
+  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
+
+  cinfo->saw_JFIF_marker = FALSE;
+  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;
+  cinfo->X_density = 1;
+  cinfo->Y_density = 1;
+  cinfo->saw_Adobe_marker = FALSE;
+  cinfo->Adobe_transform = 0;
+
+  cinfo->marker->saw_SOI = TRUE;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sof (j_decompress_ptr cinfo, boolean is_baseline, boolean is_prog,
+	 boolean is_arith)
+/* Process a SOFn marker */
+{
+  INT32 length;
+  int c, ci, i;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  cinfo->is_baseline = is_baseline;
+  cinfo->progressive_mode = is_prog;
+  cinfo->arith_code = is_arith;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
+  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
+
+  length -= 8;
+
+  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
+	   (int) cinfo->image_width, (int) cinfo->image_height,
+	   cinfo->num_components);
+
+  if (cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
+
+  /* We don't support files in which the image height is initially specified */
+  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
+  /* might as well have a general sanity check. */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
+      || cinfo->num_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  if (length != (cinfo->num_components * 3))
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
+    cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
+			((j_common_ptr) cinfo, JPOOL_IMAGE,
+			 cinfo->num_components * SIZEOF(jpeg_component_info));
+
+  for (ci = 0; ci < cinfo->num_components; ci++) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Check to see whether component id has already been seen   */
+    /* (in violation of the spec, but unfortunately seen in some */
+    /* files).  If so, create "fake" component id equal to the   */
+    /* max id seen so far + 1. */
+    for (i = 0, compptr = cinfo->comp_info; i < ci; i++, compptr++) {
+      if (c == compptr->component_id) {
+	compptr = cinfo->comp_info;
+	c = compptr->component_id;
+	compptr++;
+	for (i = 1; i < ci; i++, compptr++) {
+	  if (compptr->component_id > c) c = compptr->component_id;
+	}
+	c++;
+	break;
+      }
+    }
+    compptr->component_id = c;
+    compptr->component_index = ci;
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->h_samp_factor = (c >> 4) & 15;
+    compptr->v_samp_factor = (c     ) & 15;
+    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
+
+    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
+	     compptr->component_id, compptr->h_samp_factor,
+	     compptr->v_samp_factor, compptr->quant_tbl_no);
+  }
+
+  cinfo->marker->saw_SOF = TRUE;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sos (j_decompress_ptr cinfo)
+/* Process a SOS marker */
+{
+  INT32 length;
+  int c, ci, i, n;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  if (! cinfo->marker->saw_SOF)
+    ERREXITS(cinfo, JERR_SOF_BEFORE, "SOS");
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
+
+  TRACEMS1(cinfo, 1, JTRC_SOS, n);
+
+  if (length != (n * 2 + 6) || n > MAX_COMPS_IN_SCAN ||
+      (n == 0 && !cinfo->progressive_mode))
+      /* pseudo SOS marker only allowed in progressive mode */
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  cinfo->comps_in_scan = n;
+
+  /* Collect the component-spec parameters */
+
+  for (i = 0; i < n; i++) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+
+    /* Detect the case where component id's are not unique, and, if so, */
+    /* create a fake component id using the same logic as in get_sof.   */
+    for (ci = 0; ci < i; ci++) {
+      if (c == cinfo->cur_comp_info[ci]->component_id) {
+	c = cinfo->cur_comp_info[0]->component_id;
+	for (ci = 1; ci < i; ci++) {
+	  compptr = cinfo->cur_comp_info[ci];
+	  if (compptr->component_id > c) c = compptr->component_id;
+	}
+	c++;
+	break;
+      }
+    }
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (c == compptr->component_id)
+	goto id_found;
+    }
+
+    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, c);
+
+  id_found:
+
+    cinfo->cur_comp_info[i] = compptr;
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->dc_tbl_no = (c >> 4) & 15;
+    compptr->ac_tbl_no = (c     ) & 15;
+
+    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, compptr->component_id,
+	     compptr->dc_tbl_no, compptr->ac_tbl_no);
+  }
+
+  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ss = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Se = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ah = (c >> 4) & 15;
+  cinfo->Al = (c     ) & 15;
+
+  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
+	   cinfo->Ah, cinfo->Al);
+
+  /* Prepare to scan data & restart markers */
+  cinfo->marker->next_restart_num = 0;
+
+  /* Count another (non-pseudo) SOS marker */
+  if (n) cinfo->input_scan_number++;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+#ifdef D_ARITH_CODING_SUPPORTED
+
+LOCAL(boolean)
+get_dac (j_decompress_ptr cinfo)
+/* Process a DAC marker */
+{
+  INT32 length;
+  int index, val;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 0) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+    INPUT_BYTE(cinfo, val, return FALSE);
+
+    length -= 2;
+
+    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
+
+    if (index < 0 || index >= (2*NUM_ARITH_TBLS))
+      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
+
+    if (index >= NUM_ARITH_TBLS) { /* define AC table */
+      cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
+    } else {			/* define DC table */
+      cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
+      cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
+      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
+	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
+    }
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+#else /* ! D_ARITH_CODING_SUPPORTED */
+
+#define get_dac(cinfo)  skip_variable(cinfo)
+
+#endif /* D_ARITH_CODING_SUPPORTED */
+
+
+LOCAL(boolean)
+get_dht (j_decompress_ptr cinfo)
+/* Process a DHT marker */
+{
+  INT32 length;
+  UINT8 bits[17];
+  UINT8 huffval[256];
+  int i, index, count;
+  JHUFF_TBL **htblptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 16) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+
+    TRACEMS1(cinfo, 1, JTRC_DHT, index);
+      
+    bits[0] = 0;
+    count = 0;
+    for (i = 1; i <= 16; i++) {
+      INPUT_BYTE(cinfo, bits[i], return FALSE);
+      count += bits[i];
+    }
+
+    length -= 1 + 16;
+
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[1], bits[2], bits[3], bits[4],
+	     bits[5], bits[6], bits[7], bits[8]);
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[9], bits[10], bits[11], bits[12],
+	     bits[13], bits[14], bits[15], bits[16]);
+
+    /* Here we just do minimal validation of the counts to avoid walking
+     * off the end of our table space.  jdhuff.c will check more carefully.
+     */
+    if (count > 256 || ((INT32) count) > length)
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+    for (i = 0; i < count; i++)
+      INPUT_BYTE(cinfo, huffval[i], return FALSE);
+
+    length -= count;
+
+    if (index & 0x10) {		/* AC table definition */
+      index -= 0x10;
+      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
+    } else {			/* DC table definition */
+      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
+    }
+
+    if (index < 0 || index >= NUM_HUFF_TBLS)
+      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
+
+    if (*htblptr == NULL)
+      *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+  
+    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+    MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval));
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dqt (j_decompress_ptr cinfo)
+/* Process a DQT marker */
+{
+  INT32 length, count, i;
+  int n, prec;
+  unsigned int tmp;
+  JQUANT_TBL *quant_ptr;
+  const int *natural_order;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  while (length > 0) {
+    length--;
+    INPUT_BYTE(cinfo, n, return FALSE);
+    prec = n >> 4;
+    n &= 0x0F;
+
+    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
+
+    if (n >= NUM_QUANT_TBLS)
+      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
+      
+    if (cinfo->quant_tbl_ptrs[n] == NULL)
+      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+    quant_ptr = cinfo->quant_tbl_ptrs[n];
+
+    if (prec) {
+      if (length < DCTSIZE2 * 2) {
+	/* Initialize full table for safety. */
+	for (i = 0; i < DCTSIZE2; i++) {
+	  quant_ptr->quantval[i] = 1;
+	}
+	count = length >> 1;
+      } else
+	count = DCTSIZE2;
+    } else {
+      if (length < DCTSIZE2) {
+	/* Initialize full table for safety. */
+	for (i = 0; i < DCTSIZE2; i++) {
+	  quant_ptr->quantval[i] = 1;
+	}
+	count = length;
+      } else
+	count = DCTSIZE2;
+    }
+
+    switch (count) {
+    case (2*2): natural_order = jpeg_natural_order2; break;
+    case (3*3): natural_order = jpeg_natural_order3; break;
+    case (4*4): natural_order = jpeg_natural_order4; break;
+    case (5*5): natural_order = jpeg_natural_order5; break;
+    case (6*6): natural_order = jpeg_natural_order6; break;
+    case (7*7): natural_order = jpeg_natural_order7; break;
+    default:    natural_order = jpeg_natural_order;  break;
+    }
+
+    for (i = 0; i < count; i++) {
+      if (prec)
+	INPUT_2BYTES(cinfo, tmp, return FALSE);
+      else
+	INPUT_BYTE(cinfo, tmp, return FALSE);
+      /* We convert the zigzag-order table to natural array order. */
+      quant_ptr->quantval[natural_order[i]] = (UINT16) tmp;
+    }
+
+    if (cinfo->err->trace_level >= 2) {
+      for (i = 0; i < DCTSIZE2; i += 8) {
+	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
+		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
+		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
+		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
+		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
+      }
+    }
+
+    length -= count;
+    if (prec) length -= count;
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dri (j_decompress_ptr cinfo)
+/* Process a DRI marker */
+{
+  INT32 length;
+  unsigned int tmp;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  
+  if (length != 4)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+
+  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
+
+  cinfo->restart_interval = tmp;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_lse (j_decompress_ptr cinfo)
+/* Process an LSE marker */
+{
+  INT32 length;
+  unsigned int tmp;
+  int cid;
+  INPUT_VARS(cinfo);
+
+  if (! cinfo->marker->saw_SOF)
+    ERREXITS(cinfo, JERR_SOF_BEFORE, "LSE");
+
+  if (cinfo->num_components < 3) goto bad;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  if (length != 24)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0x0D)	/* ID inverse transform specification */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != MAXJSAMPLE) goto bad;		/* MAXTRANS */
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 3) goto bad;			/* Nt=3 */
+  INPUT_BYTE(cinfo, cid, return FALSE);
+  if (cid != cinfo->comp_info[1].component_id) goto bad;
+  INPUT_BYTE(cinfo, cid, return FALSE);
+  if (cid != cinfo->comp_info[0].component_id) goto bad;
+  INPUT_BYTE(cinfo, cid, return FALSE);
+  if (cid != cinfo->comp_info[2].component_id) goto bad;
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0x80) goto bad;		/* F1: CENTER1=1, NORM1=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;			/* A(1,1)=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;			/* A(1,2)=0 */
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;		/* F2: CENTER2=0, NORM2=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 1) goto bad;			/* A(2,1)=1 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;			/* A(2,2)=0 */
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;		/* F3: CENTER3=0, NORM3=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 1) goto bad;			/* A(3,1)=1 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) {				/* A(3,2)=0 */
+    bad:
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+  }
+
+  /* OK, valid transform that we can handle. */
+  cinfo->color_transform = JCT_SUBTRACT_GREEN;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Routines for processing APPn and COM markers.
+ * These are either saved in memory or discarded, per application request.
+ * APP0 and APP14 are specially checked to see if they are
+ * JFIF and Adobe markers, respectively.
+ */
+
+#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
+#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
+#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
+
+
+LOCAL(void)
+examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	      unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP0.
+ * Take appropriate action if it is a JFIF marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  INT32 totallen = (INT32) datalen + remaining;
+
+  if (datalen >= APP0_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x49 &&
+      GETJOCTET(data[3]) == 0x46 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF APP0 marker: save info */
+    cinfo->saw_JFIF_marker = TRUE;
+    cinfo->JFIF_major_version = GETJOCTET(data[5]);
+    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
+    cinfo->density_unit = GETJOCTET(data[7]);
+    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
+    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
+    /* Check version.
+     * Major version must be 1, anything else signals an incompatible change.
+     * (We used to treat this as an error, but now it's a nonfatal warning,
+     * because some bozo at Hijaak couldn't read the spec.)
+     * Minor version should be 0..2, but process anyway if newer.
+     */
+    if (cinfo->JFIF_major_version != 1)
+      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
+	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
+    /* Generate trace messages */
+    TRACEMS5(cinfo, 1, JTRC_JFIF,
+	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
+    /* Validate thumbnail dimensions and issue appropriate messages */
+    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
+      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
+	       GETJOCTET(data[12]), GETJOCTET(data[13]));
+    totallen -= APP0_DATA_LEN;
+    if (totallen !=
+	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
+      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
+  } else if (datalen >= 6 &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x58 &&
+      GETJOCTET(data[3]) == 0x58 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF "JFXX" extension APP0 marker */
+    /* The library doesn't actually do anything with these,
+     * but we try to produce a helpful trace message.
+     */
+    switch (GETJOCTET(data[5])) {
+    case 0x10:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen);
+      break;
+    case 0x11:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen);
+      break;
+    case 0x13:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen);
+      break;
+    default:
+      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
+	       GETJOCTET(data[5]), (int) totallen);
+      break;
+    }
+  } else {
+    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen);
+  }
+}
+
+
+LOCAL(void)
+examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	       unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP14.
+ * Take appropriate action if it is an Adobe marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  unsigned int version, flags0, flags1, transform;
+
+  if (datalen >= APP14_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x41 &&
+      GETJOCTET(data[1]) == 0x64 &&
+      GETJOCTET(data[2]) == 0x6F &&
+      GETJOCTET(data[3]) == 0x62 &&
+      GETJOCTET(data[4]) == 0x65) {
+    /* Found Adobe APP14 marker */
+    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
+    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
+    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
+    transform = GETJOCTET(data[11]);
+    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
+    cinfo->saw_Adobe_marker = TRUE;
+    cinfo->Adobe_transform = (UINT8) transform;
+  } else {
+    /* Start of APP14 does not match "Adobe", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining));
+  }
+}
+
+
+METHODDEF(boolean)
+get_interesting_appn (j_decompress_ptr cinfo)
+/* Process an APP0 or APP14 marker without saving it */
+{
+  INT32 length;
+  JOCTET b[APPN_DATA_LEN];
+  unsigned int i, numtoread;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  /* get the interesting part of the marker data */
+  if (length >= APPN_DATA_LEN)
+    numtoread = APPN_DATA_LEN;
+  else if (length > 0)
+    numtoread = (unsigned int) length;
+  else
+    numtoread = 0;
+  for (i = 0; i < numtoread; i++)
+    INPUT_BYTE(cinfo, b[i], return FALSE);
+  length -= numtoread;
+
+  /* process it */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  default:
+    /* can't get here unless jpeg_save_markers chooses wrong processor */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+METHODDEF(boolean)
+save_marker (j_decompress_ptr cinfo)
+/* Save an APPn or COM marker into the marker list */
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
+  unsigned int bytes_read, data_length;
+  JOCTET FAR * data;
+  INT32 length = 0;
+  INPUT_VARS(cinfo);
+
+  if (cur_marker == NULL) {
+    /* begin reading a marker */
+    INPUT_2BYTES(cinfo, length, return FALSE);
+    length -= 2;
+    if (length >= 0) {		/* watch out for bogus length word */
+      /* figure out how much we want to save */
+      unsigned int limit;
+      if (cinfo->unread_marker == (int) M_COM)
+	limit = marker->length_limit_COM;
+      else
+	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
+      if ((unsigned int) length < limit)
+	limit = (unsigned int) length;
+      /* allocate and initialize the marker item */
+      cur_marker = (jpeg_saved_marker_ptr)
+	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				    SIZEOF(struct jpeg_marker_struct) + limit);
+      cur_marker->next = NULL;
+      cur_marker->marker = (UINT8) cinfo->unread_marker;
+      cur_marker->original_length = (unsigned int) length;
+      cur_marker->data_length = limit;
+      /* data area is just beyond the jpeg_marker_struct */
+      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
+      marker->cur_marker = cur_marker;
+      marker->bytes_read = 0;
+      bytes_read = 0;
+      data_length = limit;
+    } else {
+      /* deal with bogus length word */
+      bytes_read = data_length = 0;
+      data = NULL;
+    }
+  } else {
+    /* resume reading a marker */
+    bytes_read = marker->bytes_read;
+    data_length = cur_marker->data_length;
+    data = cur_marker->data + bytes_read;
+  }
+
+  while (bytes_read < data_length) {
+    INPUT_SYNC(cinfo);		/* move the restart point to here */
+    marker->bytes_read = bytes_read;
+    /* If there's not at least one byte in buffer, suspend */
+    MAKE_BYTE_AVAIL(cinfo, return FALSE);
+    /* Copy bytes with reasonable rapidity */
+    while (bytes_read < data_length && bytes_in_buffer > 0) {
+      *data++ = *next_input_byte++;
+      bytes_in_buffer--;
+      bytes_read++;
+    }
+  }
+
+  /* Done reading what we want to read */
+  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
+    /* Add new marker to end of list */
+    if (cinfo->marker_list == NULL) {
+      cinfo->marker_list = cur_marker;
+    } else {
+      jpeg_saved_marker_ptr prev = cinfo->marker_list;
+      while (prev->next != NULL)
+	prev = prev->next;
+      prev->next = cur_marker;
+    }
+    /* Reset pointer & calc remaining data length */
+    data = cur_marker->data;
+    length = cur_marker->original_length - data_length;
+  }
+  /* Reset to initial state for next marker */
+  marker->cur_marker = NULL;
+
+  /* Process the marker if interesting; else just make a generic trace msg */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, data, data_length, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, data, data_length, length);
+    break;
+  default:
+    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
+	     (int) (data_length + length));
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+METHODDEF(boolean)
+skip_variable (j_decompress_ptr cinfo)
+/* Skip over an unknown or uninteresting variable-length marker */
+{
+  INT32 length;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
+
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+/*
+ * Find the next JPEG marker, save it in cinfo->unread_marker.
+ * Returns FALSE if had to suspend before reaching a marker;
+ * in that case cinfo->unread_marker is unchanged.
+ *
+ * Note that the result might not be a valid marker code,
+ * but it will never be 0 or FF.
+ */
+
+LOCAL(boolean)
+next_marker (j_decompress_ptr cinfo)
+{
+  int c;
+  INPUT_VARS(cinfo);
+
+  for (;;) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Skip any non-FF bytes.
+     * This may look a bit inefficient, but it will not occur in a valid file.
+     * We sync after each discarded byte so that a suspending data source
+     * can discard the byte from its buffer.
+     */
+    while (c != 0xFF) {
+      cinfo->marker->discarded_bytes++;
+      INPUT_SYNC(cinfo);
+      INPUT_BYTE(cinfo, c, return FALSE);
+    }
+    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
+     * pad bytes, so don't count them in discarded_bytes.  We assume there
+     * will not be so many consecutive FF bytes as to overflow a suspending
+     * data source's input buffer.
+     */
+    do {
+      INPUT_BYTE(cinfo, c, return FALSE);
+    } while (c == 0xFF);
+    if (c != 0)
+      break;			/* found a valid marker, exit loop */
+    /* Reach here if we found a stuffed-zero data sequence (FF/00).
+     * Discard it and loop back to try again.
+     */
+    cinfo->marker->discarded_bytes += 2;
+    INPUT_SYNC(cinfo);
+  }
+
+  if (cinfo->marker->discarded_bytes != 0) {
+    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
+    cinfo->marker->discarded_bytes = 0;
+  }
+
+  cinfo->unread_marker = c;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+first_marker (j_decompress_ptr cinfo)
+/* Like next_marker, but used to obtain the initial SOI marker. */
+/* For this marker, we do not allow preceding garbage or fill; otherwise,
+ * we might well scan an entire input file before realizing it ain't JPEG.
+ * If an application wants to process non-JFIF files, it must seek to the
+ * SOI before calling the JPEG library.
+ */
+{
+  int c, c2;
+  INPUT_VARS(cinfo);
+
+  INPUT_BYTE(cinfo, c, return FALSE);
+  INPUT_BYTE(cinfo, c2, return FALSE);
+  if (c != 0xFF || c2 != (int) M_SOI)
+    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
+
+  cinfo->unread_marker = c2;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Read markers until SOS or EOI.
+ *
+ * Returns same codes as are defined for jpeg_consume_input:
+ * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * Note: This function may return a pseudo SOS marker (with zero
+ * component number) for treat by input controller's consume_input.
+ * consume_input itself should filter out (skip) the pseudo marker
+ * after processing for the caller.
+ */
+
+METHODDEF(int)
+read_markers (j_decompress_ptr cinfo)
+{
+  /* Outer loop repeats once for each marker. */
+  for (;;) {
+    /* Collect the marker proper, unless we already did. */
+    /* NB: first_marker() enforces the requirement that SOI appear first. */
+    if (cinfo->unread_marker == 0) {
+      if (! cinfo->marker->saw_SOI) {
+	if (! first_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      } else {
+	if (! next_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      }
+    }
+    /* At this point cinfo->unread_marker contains the marker code and the
+     * input point is just past the marker proper, but before any parameters.
+     * A suspension will cause us to return with this state still true.
+     */
+    switch (cinfo->unread_marker) {
+    case M_SOI:
+      if (! get_soi(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF0:		/* Baseline */
+      if (! get_sof(cinfo, TRUE, FALSE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF1:		/* Extended sequential, Huffman */
+      if (! get_sof(cinfo, FALSE, FALSE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF2:		/* Progressive, Huffman */
+      if (! get_sof(cinfo, FALSE, TRUE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF9:		/* Extended sequential, arithmetic */
+      if (! get_sof(cinfo, FALSE, FALSE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF10:		/* Progressive, arithmetic */
+      if (! get_sof(cinfo, FALSE, TRUE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    /* Currently unsupported SOFn types */
+    case M_SOF3:		/* Lossless, Huffman */
+    case M_SOF5:		/* Differential sequential, Huffman */
+    case M_SOF6:		/* Differential progressive, Huffman */
+    case M_SOF7:		/* Differential lossless, Huffman */
+    case M_JPG:			/* Reserved for JPEG extensions */
+    case M_SOF11:		/* Lossless, arithmetic */
+    case M_SOF13:		/* Differential sequential, arithmetic */
+    case M_SOF14:		/* Differential progressive, arithmetic */
+    case M_SOF15:		/* Differential lossless, arithmetic */
+      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
+      break;
+
+    case M_SOS:
+      if (! get_sos(cinfo))
+	return JPEG_SUSPENDED;
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_SOS;
+
+    case M_EOI:
+      TRACEMS(cinfo, 1, JTRC_EOI);
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_EOI;
+
+    case M_DAC:
+      if (! get_dac(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_DHT:
+      if (! get_dht(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_DQT:
+      if (! get_dqt(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_DRI:
+      if (! get_dri(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_JPG8:
+      if (! get_lse(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_APP0:
+    case M_APP1:
+    case M_APP2:
+    case M_APP3:
+    case M_APP4:
+    case M_APP5:
+    case M_APP6:
+    case M_APP7:
+    case M_APP8:
+    case M_APP9:
+    case M_APP10:
+    case M_APP11:
+    case M_APP12:
+    case M_APP13:
+    case M_APP14:
+    case M_APP15:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
+		cinfo->unread_marker - (int) M_APP0]) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_COM:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_RST0:		/* these are all parameterless */
+    case M_RST1:
+    case M_RST2:
+    case M_RST3:
+    case M_RST4:
+    case M_RST5:
+    case M_RST6:
+    case M_RST7:
+    case M_TEM:
+      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
+      break;
+
+    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
+      if (! skip_variable(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    default:			/* must be DHP, EXP, JPGn, or RESn */
+      /* For now, we treat the reserved markers as fatal errors since they are
+       * likely to be used to signal incompatible JPEG Part 3 extensions.
+       * Once the JPEG 3 version-number marker is well defined, this code
+       * ought to change!
+       */
+      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+      break;
+    }
+    /* Successfully processed marker, so reset state variable */
+    cinfo->unread_marker = 0;
+  } /* end loop */
+}
+
+
+/*
+ * Read a restart marker, which is expected to appear next in the datastream;
+ * if the marker is not there, take appropriate recovery action.
+ * Returns FALSE if suspension is required.
+ *
+ * This is called by the entropy decoder after it has read an appropriate
+ * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
+ * has already read a marker from the data source.  Under normal conditions
+ * cinfo->unread_marker will be reset to 0 before returning; if not reset,
+ * it holds a marker which the decoder will be unable to read past.
+ */
+
+METHODDEF(boolean)
+read_restart_marker (j_decompress_ptr cinfo)
+{
+  /* Obtain a marker unless we already did. */
+  /* Note that next_marker will complain if it skips any data. */
+  if (cinfo->unread_marker == 0) {
+    if (! next_marker(cinfo))
+      return FALSE;
+  }
+
+  if (cinfo->unread_marker ==
+      ((int) M_RST0 + cinfo->marker->next_restart_num)) {
+    /* Normal case --- swallow the marker and let entropy decoder continue */
+    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
+    cinfo->unread_marker = 0;
+  } else {
+    /* Uh-oh, the restart markers have been messed up. */
+    /* Let the data source manager determine how to resync. */
+    if (! (*cinfo->src->resync_to_restart) (cinfo,
+					    cinfo->marker->next_restart_num))
+      return FALSE;
+  }
+
+  /* Update next-restart state */
+  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
+
+  return TRUE;
+}
+
+
+/*
+ * This is the default resync_to_restart method for data source managers
+ * to use if they don't have any better approach.  Some data source managers
+ * may be able to back up, or may have additional knowledge about the data
+ * which permits a more intelligent recovery strategy; such managers would
+ * presumably supply their own resync method.
+ *
+ * read_restart_marker calls resync_to_restart if it finds a marker other than
+ * the restart marker it was expecting.  (This code is *not* used unless
+ * a nonzero restart interval has been declared.)  cinfo->unread_marker is
+ * the marker code actually found (might be anything, except 0 or FF).
+ * The desired restart marker number (0..7) is passed as a parameter.
+ * This routine is supposed to apply whatever error recovery strategy seems
+ * appropriate in order to position the input stream to the next data segment.
+ * Note that cinfo->unread_marker is treated as a marker appearing before
+ * the current data-source input point; usually it should be reset to zero
+ * before returning.
+ * Returns FALSE if suspension is required.
+ *
+ * This implementation is substantially constrained by wanting to treat the
+ * input as a data stream; this means we can't back up.  Therefore, we have
+ * only the following actions to work with:
+ *   1. Simply discard the marker and let the entropy decoder resume at next
+ *      byte of file.
+ *   2. Read forward until we find another marker, discarding intervening
+ *      data.  (In theory we could look ahead within the current bufferload,
+ *      without having to discard data if we don't find the desired marker.
+ *      This idea is not implemented here, in part because it makes behavior
+ *      dependent on buffer size and chance buffer-boundary positions.)
+ *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
+ *      This will cause the entropy decoder to process an empty data segment,
+ *      inserting dummy zeroes, and then we will reprocess the marker.
+ *
+ * #2 is appropriate if we think the desired marker lies ahead, while #3 is
+ * appropriate if the found marker is a future restart marker (indicating
+ * that we have missed the desired restart marker, probably because it got
+ * corrupted).
+ * We apply #2 or #3 if the found marker is a restart marker no more than
+ * two counts behind or ahead of the expected one.  We also apply #2 if the
+ * found marker is not a legal JPEG marker code (it's certainly bogus data).
+ * If the found marker is a restart marker more than 2 counts away, we do #1
+ * (too much risk that the marker is erroneous; with luck we will be able to
+ * resync at some future point).
+ * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
+ * overrunning the end of a scan.  An implementation limited to single-scan
+ * files might find it better to apply #2 for markers other than EOI, since
+ * any other marker would have to be bogus data in that case.
+ */
+
+GLOBAL(boolean)
+jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired)
+{
+  int marker = cinfo->unread_marker;
+  int action = 1;
+  
+  /* Always put up a warning. */
+  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
+  
+  /* Outer loop handles repeated decision after scanning forward. */
+  for (;;) {
+    if (marker < (int) M_SOF0)
+      action = 2;		/* invalid marker */
+    else if (marker < (int) M_RST0 || marker > (int) M_RST7)
+      action = 3;		/* valid non-restart marker */
+    else {
+      if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
+	  marker == ((int) M_RST0 + ((desired+2) & 7)))
+	action = 3;		/* one of the next two expected restarts */
+      else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
+	       marker == ((int) M_RST0 + ((desired-2) & 7)))
+	action = 2;		/* a prior restart, so advance */
+      else
+	action = 1;		/* desired restart or too far away */
+    }
+    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
+    switch (action) {
+    case 1:
+      /* Discard marker and let entropy decoder resume processing. */
+      cinfo->unread_marker = 0;
+      return TRUE;
+    case 2:
+      /* Scan to the next marker, and repeat the decision loop. */
+      if (! next_marker(cinfo))
+	return FALSE;
+      marker = cinfo->unread_marker;
+      break;
+    case 3:
+      /* Return without advancing past this marker. */
+      /* Entropy decoder will be forced to process an empty segment. */
+      return TRUE;
+    }
+  } /* end loop */
+}
+
+
+/*
+ * Reset marker processing state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  cinfo->comp_info = NULL;		/* until allocated by get_sof */
+  cinfo->input_scan_number = 0;		/* no SOS seen yet */
+  cinfo->unread_marker = 0;		/* no pending marker */
+  marker->pub.saw_SOI = FALSE;		/* set internal state too */
+  marker->pub.saw_SOF = FALSE;
+  marker->pub.discarded_bytes = 0;
+  marker->cur_marker = NULL;
+}
+
+
+/*
+ * Initialize the marker reader module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker;
+  int i;
+
+  /* Create subobject in permanent pool */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_marker_reader));
+  cinfo->marker = &marker->pub;
+  /* Initialize public method pointers */
+  marker->pub.reset_marker_reader = reset_marker_reader;
+  marker->pub.read_markers = read_markers;
+  marker->pub.read_restart_marker = read_restart_marker;
+  /* Initialize COM/APPn processing.
+   * By default, we examine and then discard APP0 and APP14,
+   * but simply discard COM and all other APPn.
+   */
+  marker->process_COM = skip_variable;
+  marker->length_limit_COM = 0;
+  for (i = 0; i < 16; i++) {
+    marker->process_APPn[i] = skip_variable;
+    marker->length_limit_APPn[i] = 0;
+  }
+  marker->process_APPn[0] = get_interesting_appn;
+  marker->process_APPn[14] = get_interesting_appn;
+  /* Reset marker processing state */
+  reset_marker_reader(cinfo);
+}
+
+
+/*
+ * Control saving of COM and APPn markers into marker_list.
+ */
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+GLOBAL(void)
+jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
+		   unsigned int length_limit)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  long maxlength;
+  jpeg_marker_parser_method processor;
+
+  /* Length limit mustn't be larger than what we can allocate
+   * (should only be a concern in a 16-bit environment).
+   */
+  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
+  if (((long) length_limit) > maxlength)
+    length_limit = (unsigned int) maxlength;
+
+  /* Choose processor routine to use.
+   * APP0/APP14 have special requirements.
+   */
+  if (length_limit) {
+    processor = save_marker;
+    /* If saving APP0/APP14, save at least enough for our internal use. */
+    if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN)
+      length_limit = APP0_DATA_LEN;
+    else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN)
+      length_limit = APP14_DATA_LEN;
+  } else {
+    processor = skip_variable;
+    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
+    if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14)
+      processor = get_interesting_appn;
+  }
+
+  if (marker_code == (int) M_COM) {
+    marker->process_COM = processor;
+    marker->length_limit_COM = length_limit;
+  } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) {
+    marker->process_APPn[marker_code - (int) M_APP0] = processor;
+    marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit;
+  } else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+/*
+ * Install a special processing method for COM or APPn markers.
+ */
+
+GLOBAL(void)
+jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
+			   jpeg_marker_parser_method routine)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  if (marker_code == (int) M_COM)
+    marker->process_COM = routine;
+  else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15)
+    marker->process_APPn[marker_code - (int) M_APP0] = routine;
+  else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdmaster.c b/plugins/AdvaImg/src/LibJPEG/jdmaster.c
index fef72a21b4..03d4dd08b8 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdmaster.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdmaster.c
@@ -1,531 +1,531 @@
-/*
- * jdmaster.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains master control logic for the JPEG decompressor.
- * These routines are concerned with selecting the modules to be executed
- * and with determining the number of passes and the work to be done in each
- * pass.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_decomp_master pub; /* public fields */
-
-  int pass_number;		/* # of passes completed */
-
-  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
-
-  /* Saved references to initialized quantizer modules,
-   * in case we need to switch modes.
-   */
-  struct jpeg_color_quantizer * quantizer_1pass;
-  struct jpeg_color_quantizer * quantizer_2pass;
-} my_decomp_master;
-
-typedef my_decomp_master * my_master_ptr;
-
-
-/*
- * Determine whether merged upsample/color conversion should be used.
- * CRUCIAL: this must match the actual capabilities of jdmerge.c!
- */
-
-LOCAL(boolean)
-use_merged_upsample (j_decompress_ptr cinfo)
-{
-#ifdef UPSAMPLE_MERGING_SUPPORTED
-  /* Merging is the equivalent of plain box-filter upsampling */
-  if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling)
-    return FALSE;
-  /* jdmerge.c only supports YCC=>RGB color conversion */
-  if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
-      cinfo->out_color_space != JCS_RGB ||
-      cinfo->out_color_components != RGB_PIXELSIZE)
-    return FALSE;
-  /* and it only handles 2h1v or 2h2v sampling ratios */
-  if (cinfo->comp_info[0].h_samp_factor != 2 ||
-      cinfo->comp_info[1].h_samp_factor != 1 ||
-      cinfo->comp_info[2].h_samp_factor != 1 ||
-      cinfo->comp_info[0].v_samp_factor >  2 ||
-      cinfo->comp_info[1].v_samp_factor != 1 ||
-      cinfo->comp_info[2].v_samp_factor != 1)
-    return FALSE;
-  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
-  if (cinfo->comp_info[0].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
-      cinfo->comp_info[1].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
-      cinfo->comp_info[2].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
-      cinfo->comp_info[0].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
-      cinfo->comp_info[1].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
-      cinfo->comp_info[2].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size)
-    return FALSE;
-  /* ??? also need to test for upsample-time rescaling, when & if supported */
-  return TRUE;			/* by golly, it'll work... */
-#else
-  return FALSE;
-#endif
-}
-
-
-/*
- * Compute output image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- * Also note that it may be called before the master module is initialized!
- */
-
-GLOBAL(void)
-jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
-/* Do computations that are needed before master selection phase.
- * This function is used for full decompression.
- */
-{
-#ifdef IDCT_SCALING_SUPPORTED
-  int ci;
-  jpeg_component_info *compptr;
-#endif
-
-  /* Prevent application from calling me at wrong times */
-  if (cinfo->global_state != DSTATE_READY)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* Compute core output image dimensions and DCT scaling choices. */
-  jpeg_core_output_dimensions(cinfo);
-
-#ifdef IDCT_SCALING_SUPPORTED
-
-  /* In selecting the actual DCT scaling for each component, we try to
-   * scale up the chroma components via IDCT scaling rather than upsampling.
-   * This saves time if the upsampler gets to use 1:1 scaling.
-   * Note this code adapts subsampling ratios which are powers of 2.
-   */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    int ssize = 1;
-    while (cinfo->min_DCT_h_scaled_size * ssize <=
-	   (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
-	   (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) == 0) {
-      ssize = ssize * 2;
-    }
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
-    ssize = 1;
-    while (cinfo->min_DCT_v_scaled_size * ssize <=
-	   (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
-	   (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) == 0) {
-      ssize = ssize * 2;
-    }
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
-
-    /* We don't support IDCT ratios larger than 2. */
-    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
-	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
-    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
-	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
-  }
-
-  /* Recompute downsampled dimensions of components;
-   * application needs to know these if using raw downsampled data.
-   */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Size in samples, after IDCT scaling */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width *
-		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height *
-		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-  }
-
-#endif /* IDCT_SCALING_SUPPORTED */
-
-  /* Report number of components in selected colorspace. */
-  /* Probably this should be in the color conversion module... */
-  switch (cinfo->out_color_space) {
-  case JCS_GRAYSCALE:
-    cinfo->out_color_components = 1;
-    break;
-  case JCS_RGB:
-    cinfo->out_color_components = RGB_PIXELSIZE;
-    break;
-  case JCS_YCbCr:
-    cinfo->out_color_components = 3;
-    break;
-  case JCS_CMYK:
-  case JCS_YCCK:
-    cinfo->out_color_components = 4;
-    break;
-  default:			/* else must be same colorspace as in file */
-    cinfo->out_color_components = cinfo->num_components;
-    break;
-  }
-  cinfo->output_components = (cinfo->quantize_colors ? 1 :
-			      cinfo->out_color_components);
-
-  /* See if upsampler will want to emit more than one row at a time */
-  if (use_merged_upsample(cinfo))
-    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
-  else
-    cinfo->rec_outbuf_height = 1;
-}
-
-
-/*
- * Several decompression processes need to range-limit values to the range
- * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
- * due to noise introduced by quantization, roundoff error, etc.  These
- * processes are inner loops and need to be as fast as possible.  On most
- * machines, particularly CPUs with pipelines or instruction prefetch,
- * a (subscript-check-less) C table lookup
- *		x = sample_range_limit[x];
- * is faster than explicit tests
- *		if (x < 0)  x = 0;
- *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
- * These processes all use a common table prepared by the routine below.
- *
- * For most steps we can mathematically guarantee that the initial value
- * of x is within MAXJSAMPLE+1 of the legal range, so a table running from
- * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient.  But for the initial
- * limiting step (just after the IDCT), a wildly out-of-range value is 
- * possible if the input data is corrupt.  To avoid any chance of indexing
- * off the end of memory and getting a bad-pointer trap, we perform the
- * post-IDCT limiting thus:
- *		x = range_limit[x & MASK];
- * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
- * samples.  Under normal circumstances this is more than enough range and
- * a correct output will be generated; with bogus input data the mask will
- * cause wraparound, and we will safely generate a bogus-but-in-range output.
- * For the post-IDCT step, we want to convert the data from signed to unsigned
- * representation by adding CENTERJSAMPLE at the same time that we limit it.
- * So the post-IDCT limiting table ends up looking like this:
- *   CENTERJSAMPLE,CENTERJSAMPLE+1,...,MAXJSAMPLE,
- *   MAXJSAMPLE (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
- *   0          (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
- *   0,1,...,CENTERJSAMPLE-1
- * Negative inputs select values from the upper half of the table after
- * masking.
- *
- * We can save some space by overlapping the start of the post-IDCT table
- * with the simpler range limiting table.  The post-IDCT table begins at
- * sample_range_limit + CENTERJSAMPLE.
- *
- * Note that the table is allocated in near data space on PCs; it's small
- * enough and used often enough to justify this.
- */
-
-LOCAL(void)
-prepare_range_limit_table (j_decompress_ptr cinfo)
-/* Allocate and fill in the sample_range_limit table */
-{
-  JSAMPLE * table;
-  int i;
-
-  table = (JSAMPLE *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-		(5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE));
-  table += (MAXJSAMPLE+1);	/* allow negative subscripts of simple table */
-  cinfo->sample_range_limit = table;
-  /* First segment of "simple" table: limit[x] = 0 for x < 0 */
-  MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
-  /* Main part of "simple" table: limit[x] = x */
-  for (i = 0; i <= MAXJSAMPLE; i++)
-    table[i] = (JSAMPLE) i;
-  table += CENTERJSAMPLE;	/* Point to where post-IDCT table starts */
-  /* End of simple table, rest of first half of post-IDCT table */
-  for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++)
-    table[i] = MAXJSAMPLE;
-  /* Second half of post-IDCT table */
-  MEMZERO(table + (2 * (MAXJSAMPLE+1)),
-	  (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE));
-  MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE),
-	  cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE));
-}
-
-
-/*
- * Master selection of decompression modules.
- * This is done once at jpeg_start_decompress time.  We determine
- * which modules will be used and give them appropriate initialization calls.
- * We also initialize the decompressor input side to begin consuming data.
- *
- * Since jpeg_read_header has finished, we know what is in the SOF
- * and (first) SOS markers.  We also have all the application parameter
- * settings.
- */
-
-LOCAL(void)
-master_selection (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-  boolean use_c_buffer;
-  long samplesperrow;
-  JDIMENSION jd_samplesperrow;
-
-  /* Initialize dimensions and other stuff */
-  jpeg_calc_output_dimensions(cinfo);
-  prepare_range_limit_table(cinfo);
-
-  /* Width of an output scanline must be representable as JDIMENSION. */
-  samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
-  jd_samplesperrow = (JDIMENSION) samplesperrow;
-  if ((long) jd_samplesperrow != samplesperrow)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-
-  /* Initialize my private state */
-  master->pass_number = 0;
-  master->using_merged_upsample = use_merged_upsample(cinfo);
-
-  /* Color quantizer selection */
-  master->quantizer_1pass = NULL;
-  master->quantizer_2pass = NULL;
-  /* No mode changes if not using buffered-image mode. */
-  if (! cinfo->quantize_colors || ! cinfo->buffered_image) {
-    cinfo->enable_1pass_quant = FALSE;
-    cinfo->enable_external_quant = FALSE;
-    cinfo->enable_2pass_quant = FALSE;
-  }
-  if (cinfo->quantize_colors) {
-    if (cinfo->raw_data_out)
-      ERREXIT(cinfo, JERR_NOTIMPL);
-    /* 2-pass quantizer only works in 3-component color space. */
-    if (cinfo->out_color_components != 3) {
-      cinfo->enable_1pass_quant = TRUE;
-      cinfo->enable_external_quant = FALSE;
-      cinfo->enable_2pass_quant = FALSE;
-      cinfo->colormap = NULL;
-    } else if (cinfo->colormap != NULL) {
-      cinfo->enable_external_quant = TRUE;
-    } else if (cinfo->two_pass_quantize) {
-      cinfo->enable_2pass_quant = TRUE;
-    } else {
-      cinfo->enable_1pass_quant = TRUE;
-    }
-
-    if (cinfo->enable_1pass_quant) {
-#ifdef QUANT_1PASS_SUPPORTED
-      jinit_1pass_quantizer(cinfo);
-      master->quantizer_1pass = cinfo->cquantize;
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    }
-
-    /* We use the 2-pass code to map to external colormaps. */
-    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
-#ifdef QUANT_2PASS_SUPPORTED
-      jinit_2pass_quantizer(cinfo);
-      master->quantizer_2pass = cinfo->cquantize;
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    }
-    /* If both quantizers are initialized, the 2-pass one is left active;
-     * this is necessary for starting with quantization to an external map.
-     */
-  }
-
-  /* Post-processing: in particular, color conversion first */
-  if (! cinfo->raw_data_out) {
-    if (master->using_merged_upsample) {
-#ifdef UPSAMPLE_MERGING_SUPPORTED
-      jinit_merged_upsampler(cinfo); /* does color conversion too */
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    } else {
-      jinit_color_deconverter(cinfo);
-      jinit_upsampler(cinfo);
-    }
-    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
-  }
-  /* Inverse DCT */
-  jinit_inverse_dct(cinfo);
-  /* Entropy decoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_decoder(cinfo);
-  else {
-    jinit_huff_decoder(cinfo);
-  }
-
-  /* Initialize principal buffer controllers. */
-  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
-  jinit_d_coef_controller(cinfo, use_c_buffer);
-
-  if (! cinfo->raw_data_out)
-    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Initialize input side of decompressor to consume first scan. */
-  (*cinfo->inputctl->start_input_pass) (cinfo);
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-  /* If jpeg_start_decompress will read the whole file, initialize
-   * progress monitoring appropriately.  The input step is counted
-   * as one pass.
-   */
-  if (cinfo->progress != NULL && ! cinfo->buffered_image &&
-      cinfo->inputctl->has_multiple_scans) {
-    int nscans;
-    /* Estimate number of scans to set pass_limit. */
-    if (cinfo->progressive_mode) {
-      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
-      nscans = 2 + 3 * cinfo->num_components;
-    } else {
-      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
-      nscans = cinfo->num_components;
-    }
-    cinfo->progress->pass_counter = 0L;
-    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
-    cinfo->progress->completed_passes = 0;
-    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
-    /* Count the input pass as done */
-    master->pass_number++;
-  }
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-}
-
-
-/*
- * Per-pass setup.
- * This is called at the beginning of each output pass.  We determine which
- * modules will be active during this pass and give them appropriate
- * start_pass calls.  We also set is_dummy_pass to indicate whether this
- * is a "real" output pass or a dummy pass for color quantization.
- * (In the latter case, jdapistd.c will crank the pass to completion.)
- */
-
-METHODDEF(void)
-prepare_for_output_pass (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  if (master->pub.is_dummy_pass) {
-#ifdef QUANT_2PASS_SUPPORTED
-    /* Final pass of 2-pass quantization */
-    master->pub.is_dummy_pass = FALSE;
-    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
-    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
-    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif /* QUANT_2PASS_SUPPORTED */
-  } else {
-    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
-      /* Select new quantization method */
-      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
-	cinfo->cquantize = master->quantizer_2pass;
-	master->pub.is_dummy_pass = TRUE;
-      } else if (cinfo->enable_1pass_quant) {
-	cinfo->cquantize = master->quantizer_1pass;
-      } else {
-	ERREXIT(cinfo, JERR_MODE_CHANGE);
-      }
-    }
-    (*cinfo->idct->start_pass) (cinfo);
-    (*cinfo->coef->start_output_pass) (cinfo);
-    if (! cinfo->raw_data_out) {
-      if (! master->using_merged_upsample)
-	(*cinfo->cconvert->start_pass) (cinfo);
-      (*cinfo->upsample->start_pass) (cinfo);
-      if (cinfo->quantize_colors)
-	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
-      (*cinfo->post->start_pass) (cinfo,
-	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
-      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
-    }
-  }
-
-  /* Set up progress monitor's pass info if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->completed_passes = master->pass_number;
-    cinfo->progress->total_passes = master->pass_number +
-				    (master->pub.is_dummy_pass ? 2 : 1);
-    /* In buffered-image mode, we assume one more output pass if EOI not
-     * yet reached, but no more passes if EOI has been reached.
-     */
-    if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) {
-      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
-    }
-  }
-}
-
-
-/*
- * Finish up at end of an output pass.
- */
-
-METHODDEF(void)
-finish_output_pass (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  if (cinfo->quantize_colors)
-    (*cinfo->cquantize->finish_pass) (cinfo);
-  master->pass_number++;
-}
-
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-
-/*
- * Switch to a new external colormap between output passes.
- */
-
-GLOBAL(void)
-jpeg_new_colormap (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  /* Prevent application from calling me at wrong times */
-  if (cinfo->global_state != DSTATE_BUFIMAGE)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
-      cinfo->colormap != NULL) {
-    /* Select 2-pass quantizer for external colormap use */
-    cinfo->cquantize = master->quantizer_2pass;
-    /* Notify quantizer of colormap change */
-    (*cinfo->cquantize->new_color_map) (cinfo);
-    master->pub.is_dummy_pass = FALSE; /* just in case */
-  } else
-    ERREXIT(cinfo, JERR_MODE_CHANGE);
-}
-
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-
-
-/*
- * Initialize master decompression control and select active modules.
- * This is performed at the start of jpeg_start_decompress.
- */
-
-GLOBAL(void)
-jinit_master_decompress (j_decompress_ptr cinfo)
-{
-  my_master_ptr master;
-
-  master = (my_master_ptr)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(my_decomp_master));
-  cinfo->master = (struct jpeg_decomp_master *) master;
-  master->pub.prepare_for_output_pass = prepare_for_output_pass;
-  master->pub.finish_output_pass = finish_output_pass;
-
-  master->pub.is_dummy_pass = FALSE;
-
-  master_selection(cinfo);
-}
+/*
+ * jdmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2002-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG decompressor.
+ * These routines are concerned with selecting the modules to be executed
+ * and with determining the number of passes and the work to be done in each
+ * pass.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_decomp_master pub; /* public fields */
+
+  int pass_number;		/* # of passes completed */
+
+  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
+
+  /* Saved references to initialized quantizer modules,
+   * in case we need to switch modes.
+   */
+  struct jpeg_color_quantizer * quantizer_1pass;
+  struct jpeg_color_quantizer * quantizer_2pass;
+} my_decomp_master;
+
+typedef my_decomp_master * my_master_ptr;
+
+
+/*
+ * Determine whether merged upsample/color conversion should be used.
+ * CRUCIAL: this must match the actual capabilities of jdmerge.c!
+ */
+
+LOCAL(boolean)
+use_merged_upsample (j_decompress_ptr cinfo)
+{
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  /* Merging is the equivalent of plain box-filter upsampling */
+  if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling)
+    return FALSE;
+  /* jdmerge.c only supports YCC=>RGB color conversion */
+  if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
+      cinfo->out_color_space != JCS_RGB ||
+      cinfo->out_color_components != RGB_PIXELSIZE)
+    return FALSE;
+  /* and it only handles 2h1v or 2h2v sampling ratios */
+  if (cinfo->comp_info[0].h_samp_factor != 2 ||
+      cinfo->comp_info[1].h_samp_factor != 1 ||
+      cinfo->comp_info[2].h_samp_factor != 1 ||
+      cinfo->comp_info[0].v_samp_factor >  2 ||
+      cinfo->comp_info[1].v_samp_factor != 1 ||
+      cinfo->comp_info[2].v_samp_factor != 1)
+    return FALSE;
+  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
+  if (cinfo->comp_info[0].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+      cinfo->comp_info[1].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+      cinfo->comp_info[2].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+      cinfo->comp_info[0].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
+      cinfo->comp_info[1].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
+      cinfo->comp_info[2].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size)
+    return FALSE;
+  /* ??? also need to test for upsample-time rescaling, when & if supported */
+  return TRUE;			/* by golly, it'll work... */
+#else
+  return FALSE;
+#endif
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ * Also note that it may be called before the master module is initialized!
+ */
+
+GLOBAL(void)
+jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase.
+ * This function is used for full decompression.
+ */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+#endif
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_READY)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Compute core output image dimensions and DCT scaling choices. */
+  jpeg_core_output_dimensions(cinfo);
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+  /* In selecting the actual DCT scaling for each component, we try to
+   * scale up the chroma components via IDCT scaling rather than upsampling.
+   * This saves time if the upsampler gets to use 1:1 scaling.
+   * Note this code adapts subsampling ratios which are powers of 2.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    int ssize = 1;
+    while (cinfo->min_DCT_h_scaled_size * ssize <=
+	   (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
+	   (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) == 0) {
+      ssize = ssize * 2;
+    }
+    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
+    ssize = 1;
+    while (cinfo->min_DCT_v_scaled_size * ssize <=
+	   (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
+	   (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) == 0) {
+      ssize = ssize * 2;
+    }
+    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
+
+    /* We don't support IDCT ratios larger than 2. */
+    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
+	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
+    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
+	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
+  }
+
+  /* Recompute downsampled dimensions of components;
+   * application needs to know these if using raw downsampled data.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Size in samples, after IDCT scaling */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width *
+		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height *
+		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+  }
+
+#endif /* IDCT_SCALING_SUPPORTED */
+
+  /* Report number of components in selected colorspace. */
+  /* Probably this should be in the color conversion module... */
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    break;
+  case JCS_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    break;
+  case JCS_YCbCr:
+    cinfo->out_color_components = 3;
+    break;
+  case JCS_CMYK:
+  case JCS_YCCK:
+    cinfo->out_color_components = 4;
+    break;
+  default:			/* else must be same colorspace as in file */
+    cinfo->out_color_components = cinfo->num_components;
+    break;
+  }
+  cinfo->output_components = (cinfo->quantize_colors ? 1 :
+			      cinfo->out_color_components);
+
+  /* See if upsampler will want to emit more than one row at a time */
+  if (use_merged_upsample(cinfo))
+    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
+  else
+    cinfo->rec_outbuf_height = 1;
+}
+
+
+/*
+ * Several decompression processes need to range-limit values to the range
+ * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
+ * due to noise introduced by quantization, roundoff error, etc.  These
+ * processes are inner loops and need to be as fast as possible.  On most
+ * machines, particularly CPUs with pipelines or instruction prefetch,
+ * a (subscript-check-less) C table lookup
+ *		x = sample_range_limit[x];
+ * is faster than explicit tests
+ *		if (x < 0)  x = 0;
+ *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
+ * These processes all use a common table prepared by the routine below.
+ *
+ * For most steps we can mathematically guarantee that the initial value
+ * of x is within MAXJSAMPLE+1 of the legal range, so a table running from
+ * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient.  But for the initial
+ * limiting step (just after the IDCT), a wildly out-of-range value is 
+ * possible if the input data is corrupt.  To avoid any chance of indexing
+ * off the end of memory and getting a bad-pointer trap, we perform the
+ * post-IDCT limiting thus:
+ *		x = range_limit[x & MASK];
+ * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
+ * samples.  Under normal circumstances this is more than enough range and
+ * a correct output will be generated; with bogus input data the mask will
+ * cause wraparound, and we will safely generate a bogus-but-in-range output.
+ * For the post-IDCT step, we want to convert the data from signed to unsigned
+ * representation by adding CENTERJSAMPLE at the same time that we limit it.
+ * So the post-IDCT limiting table ends up looking like this:
+ *   CENTERJSAMPLE,CENTERJSAMPLE+1,...,MAXJSAMPLE,
+ *   MAXJSAMPLE (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0          (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0,1,...,CENTERJSAMPLE-1
+ * Negative inputs select values from the upper half of the table after
+ * masking.
+ *
+ * We can save some space by overlapping the start of the post-IDCT table
+ * with the simpler range limiting table.  The post-IDCT table begins at
+ * sample_range_limit + CENTERJSAMPLE.
+ *
+ * Note that the table is allocated in near data space on PCs; it's small
+ * enough and used often enough to justify this.
+ */
+
+LOCAL(void)
+prepare_range_limit_table (j_decompress_ptr cinfo)
+/* Allocate and fill in the sample_range_limit table */
+{
+  JSAMPLE * table;
+  int i;
+
+  table = (JSAMPLE *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		(5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+  table += (MAXJSAMPLE+1);	/* allow negative subscripts of simple table */
+  cinfo->sample_range_limit = table;
+  /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+  MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+  /* Main part of "simple" table: limit[x] = x */
+  for (i = 0; i <= MAXJSAMPLE; i++)
+    table[i] = (JSAMPLE) i;
+  table += CENTERJSAMPLE;	/* Point to where post-IDCT table starts */
+  /* End of simple table, rest of first half of post-IDCT table */
+  for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++)
+    table[i] = MAXJSAMPLE;
+  /* Second half of post-IDCT table */
+  MEMZERO(table + (2 * (MAXJSAMPLE+1)),
+	  (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+  MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE),
+	  cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE));
+}
+
+
+/*
+ * Master selection of decompression modules.
+ * This is done once at jpeg_start_decompress time.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ * We also initialize the decompressor input side to begin consuming data.
+ *
+ * Since jpeg_read_header has finished, we know what is in the SOF
+ * and (first) SOS markers.  We also have all the application parameter
+ * settings.
+ */
+
+LOCAL(void)
+master_selection (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+  boolean use_c_buffer;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* Initialize dimensions and other stuff */
+  jpeg_calc_output_dimensions(cinfo);
+  prepare_range_limit_table(cinfo);
+
+  /* Width of an output scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* Initialize my private state */
+  master->pass_number = 0;
+  master->using_merged_upsample = use_merged_upsample(cinfo);
+
+  /* Color quantizer selection */
+  master->quantizer_1pass = NULL;
+  master->quantizer_2pass = NULL;
+  /* No mode changes if not using buffered-image mode. */
+  if (! cinfo->quantize_colors || ! cinfo->buffered_image) {
+    cinfo->enable_1pass_quant = FALSE;
+    cinfo->enable_external_quant = FALSE;
+    cinfo->enable_2pass_quant = FALSE;
+  }
+  if (cinfo->quantize_colors) {
+    if (cinfo->raw_data_out)
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    /* 2-pass quantizer only works in 3-component color space. */
+    if (cinfo->out_color_components != 3) {
+      cinfo->enable_1pass_quant = TRUE;
+      cinfo->enable_external_quant = FALSE;
+      cinfo->enable_2pass_quant = FALSE;
+      cinfo->colormap = NULL;
+    } else if (cinfo->colormap != NULL) {
+      cinfo->enable_external_quant = TRUE;
+    } else if (cinfo->two_pass_quantize) {
+      cinfo->enable_2pass_quant = TRUE;
+    } else {
+      cinfo->enable_1pass_quant = TRUE;
+    }
+
+    if (cinfo->enable_1pass_quant) {
+#ifdef QUANT_1PASS_SUPPORTED
+      jinit_1pass_quantizer(cinfo);
+      master->quantizer_1pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+
+    /* We use the 2-pass code to map to external colormaps. */
+    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
+#ifdef QUANT_2PASS_SUPPORTED
+      jinit_2pass_quantizer(cinfo);
+      master->quantizer_2pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+    /* If both quantizers are initialized, the 2-pass one is left active;
+     * this is necessary for starting with quantization to an external map.
+     */
+  }
+
+  /* Post-processing: in particular, color conversion first */
+  if (! cinfo->raw_data_out) {
+    if (master->using_merged_upsample) {
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+      jinit_merged_upsampler(cinfo); /* does color conversion too */
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else {
+      jinit_color_deconverter(cinfo);
+      jinit_upsampler(cinfo);
+    }
+    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+  }
+  /* Inverse DCT */
+  jinit_inverse_dct(cinfo);
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code)
+    jinit_arith_decoder(cinfo);
+  else {
+    jinit_huff_decoder(cinfo);
+  }
+
+  /* Initialize principal buffer controllers. */
+  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
+  jinit_d_coef_controller(cinfo, use_c_buffer);
+
+  if (! cinfo->raw_data_out)
+    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* If jpeg_start_decompress will read the whole file, initialize
+   * progress monitoring appropriately.  The input step is counted
+   * as one pass.
+   */
+  if (cinfo->progress != NULL && ! cinfo->buffered_image &&
+      cinfo->inputctl->has_multiple_scans) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
+    /* Count the input pass as done */
+    master->pass_number++;
+  }
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each output pass.  We determine which
+ * modules will be active during this pass and give them appropriate
+ * start_pass calls.  We also set is_dummy_pass to indicate whether this
+ * is a "real" output pass or a dummy pass for color quantization.
+ * (In the latter case, jdapistd.c will crank the pass to completion.)
+ */
+
+METHODDEF(void)
+prepare_for_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (master->pub.is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Final pass of 2-pass quantization */
+    master->pub.is_dummy_pass = FALSE;
+    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
+    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
+    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  } else {
+    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
+      /* Select new quantization method */
+      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+	cinfo->cquantize = master->quantizer_2pass;
+	master->pub.is_dummy_pass = TRUE;
+      } else if (cinfo->enable_1pass_quant) {
+	cinfo->cquantize = master->quantizer_1pass;
+      } else {
+	ERREXIT(cinfo, JERR_MODE_CHANGE);
+      }
+    }
+    (*cinfo->idct->start_pass) (cinfo);
+    (*cinfo->coef->start_output_pass) (cinfo);
+    if (! cinfo->raw_data_out) {
+      if (! master->using_merged_upsample)
+	(*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->upsample->start_pass) (cinfo);
+      if (cinfo->quantize_colors)
+	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
+      (*cinfo->post->start_pass) (cinfo,
+	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+  }
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->pass_number +
+				    (master->pub.is_dummy_pass ? 2 : 1);
+    /* In buffered-image mode, we assume one more output pass if EOI not
+     * yet reached, but no more passes if EOI has been reached.
+     */
+    if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) {
+      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
+    }
+  }
+}
+
+
+/*
+ * Finish up at end of an output pass.
+ */
+
+METHODDEF(void)
+finish_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (cinfo->quantize_colors)
+    (*cinfo->cquantize->finish_pass) (cinfo);
+  master->pass_number++;
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+GLOBAL(void)
+jpeg_new_colormap (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_BUFIMAGE)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
+      cinfo->colormap != NULL) {
+    /* Select 2-pass quantizer for external colormap use */
+    cinfo->cquantize = master->quantizer_2pass;
+    /* Notify quantizer of colormap change */
+    (*cinfo->cquantize->new_color_map) (cinfo);
+    master->pub.is_dummy_pass = FALSE; /* just in case */
+  } else
+    ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+/*
+ * Initialize master decompression control and select active modules.
+ * This is performed at the start of jpeg_start_decompress.
+ */
+
+GLOBAL(void)
+jinit_master_decompress (j_decompress_ptr cinfo)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(my_decomp_master));
+  cinfo->master = (struct jpeg_decomp_master *) master;
+  master->pub.prepare_for_output_pass = prepare_for_output_pass;
+  master->pub.finish_output_pass = finish_output_pass;
+
+  master->pub.is_dummy_pass = FALSE;
+
+  master_selection(cinfo);
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdmerge.c b/plugins/AdvaImg/src/LibJPEG/jdmerge.c
index 37444468c2..9e3a595de0 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdmerge.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdmerge.c
@@ -1,400 +1,400 @@
-/*
- * jdmerge.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains code for merged upsampling/color conversion.
- *
- * This file combines functions from jdsample.c and jdcolor.c;
- * read those files first to understand what's going on.
- *
- * When the chroma components are to be upsampled by simple replication
- * (ie, box filtering), we can save some work in color conversion by
- * calculating all the output pixels corresponding to a pair of chroma
- * samples at one time.  In the conversion equations
- *	R = Y           + K1 * Cr
- *	G = Y + K2 * Cb + K3 * Cr
- *	B = Y + K4 * Cb
- * only the Y term varies among the group of pixels corresponding to a pair
- * of chroma samples, so the rest of the terms can be calculated just once.
- * At typical sampling ratios, this eliminates half or three-quarters of the
- * multiplications needed for color conversion.
- *
- * This file currently provides implementations for the following cases:
- *	YCbCr => RGB color conversion only.
- *	Sampling ratios of 2h1v or 2h2v.
- *	No scaling needed at upsample time.
- *	Corner-aligned (non-CCIR601) sampling alignment.
- * Other special cases could be added, but in most applications these are
- * the only common cases.  (For uncommon cases we fall back on the more
- * general code in jdsample.c and jdcolor.c.)
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-#ifdef UPSAMPLE_MERGING_SUPPORTED
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
-
-  /* Pointer to routine to do actual upsampling/conversion of one row group */
-  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-			   JSAMPARRAY output_buf));
-
-  /* Private state for YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
-
-  /* For 2:1 vertical sampling, we produce two output rows at a time.
-   * We need a "spare" row buffer to hold the second output row if the
-   * application provides just a one-row buffer; we also use the spare
-   * to discard the dummy last row if the image height is odd.
-   */
-  JSAMPROW spare_row;
-  boolean spare_full;		/* T if spare buffer is occupied */
-
-  JDIMENSION out_row_width;	/* samples per output row */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
-} my_upsampler;
-
-typedef my_upsampler * my_upsample_ptr;
-
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
-
-
-/*
- * Initialize tables for YCC->RGB colorspace conversion.
- * This is taken directly from jdcolor.c; see that file for more info.
- */
-
-LOCAL(void)
-build_ycc_rgb_table (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  upsample->Cr_r_tab = (int *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cb_b_tab = (int *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cr_g_tab = (INT32 *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
-  upsample->Cb_g_tab = (INT32 *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.40200 * x */
-    upsample->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.77200 * x */
-    upsample->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.71414 * x */
-    upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x;
-    /* Cb=>G value is scaled-up -0.34414 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    upsample->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
-  }
-}
-
-
-/*
- * Initialize for an upsampling pass.
- */
-
-METHODDEF(void)
-start_pass_merged_upsample (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-
-  /* Mark the spare buffer empty */
-  upsample->spare_full = FALSE;
-  /* Initialize total-height counter for detecting bottom of image */
-  upsample->rows_to_go = cinfo->output_height;
-}
-
-
-/*
- * Control routine to do upsampling (and color conversion).
- *
- * The control routine just handles the row buffering considerations.
- */
-
-METHODDEF(void)
-merged_2v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-/* 2:1 vertical sampling case: may need a spare row. */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  JSAMPROW work_ptrs[2];
-  JDIMENSION num_rows;		/* number of rows returned to caller */
-
-  if (upsample->spare_full) {
-    /* If we have a spare row saved from a previous cycle, just return it. */
-    jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
-		      1, upsample->out_row_width);
-    num_rows = 1;
-    upsample->spare_full = FALSE;
-  } else {
-    /* Figure number of rows to return to caller. */
-    num_rows = 2;
-    /* Not more than the distance to the end of the image. */
-    if (num_rows > upsample->rows_to_go)
-      num_rows = upsample->rows_to_go;
-    /* And not more than what the client can accept: */
-    out_rows_avail -= *out_row_ctr;
-    if (num_rows > out_rows_avail)
-      num_rows = out_rows_avail;
-    /* Create output pointer array for upsampler. */
-    work_ptrs[0] = output_buf[*out_row_ctr];
-    if (num_rows > 1) {
-      work_ptrs[1] = output_buf[*out_row_ctr + 1];
-    } else {
-      work_ptrs[1] = upsample->spare_row;
-      upsample->spare_full = TRUE;
-    }
-    /* Now do the upsampling. */
-    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
-  }
-
-  /* Adjust counts */
-  *out_row_ctr += num_rows;
-  upsample->rows_to_go -= num_rows;
-  /* When the buffer is emptied, declare this input row group consumed */
-  if (! upsample->spare_full)
-    (*in_row_group_ctr)++;
-}
-
-
-METHODDEF(void)
-merged_1v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-/* 1:1 vertical sampling case: much easier, never need a spare row. */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-
-  /* Just do the upsampling. */
-  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
-			 output_buf + *out_row_ctr);
-  /* Adjust counts */
-  (*out_row_ctr)++;
-  (*in_row_group_ctr)++;
-}
-
-
-/*
- * These are the routines invoked by the control routines to do
- * the actual upsampling/conversion.  One row group is processed per call.
- *
- * Note: since we may be writing directly into application-supplied buffers,
- * we have to be honest about the output width; we can't assume the buffer
- * has been rounded up to an even width.
- */
-
-
-/*
- * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
- */
-
-METHODDEF(void)
-h2v1_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr;
-  JSAMPROW inptr0, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  SHIFT_TEMPS
-
-  inptr0 = input_buf[0][in_row_group_ctr];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr = output_buf[0];
-  /* Loop for each pair of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
-    outptr[RGB_RED] =   range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE] =  range_limit[y + cblue];
-    outptr += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr0++);
-    outptr[RGB_RED] =   range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE] =  range_limit[y + cblue];
-    outptr += RGB_PIXELSIZE;
-  }
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
-    outptr[RGB_RED] =   range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE] =  range_limit[y + cblue];
-  }
-}
-
-
-/*
- * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
- */
-
-METHODDEF(void)
-h2v2_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr0, outptr1;
-  JSAMPROW inptr00, inptr01, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  SHIFT_TEMPS
-
-  inptr00 = input_buf[0][in_row_group_ctr*2];
-  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr0 = output_buf[0];
-  outptr1 = output_buf[1];
-  /* Loop for each group of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
-    outptr0[RGB_RED] =   range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE] =  range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr00++);
-    outptr0[RGB_RED] =   range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE] =  range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
-    outptr1[RGB_RED] =   range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE] =  range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
-    outptr1[RGB_RED] =   range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE] =  range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
-  }
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr00);
-    outptr0[RGB_RED] =   range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE] =  range_limit[y + cblue];
-    y  = GETJSAMPLE(*inptr01);
-    outptr1[RGB_RED] =   range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE] =  range_limit[y + cblue];
-  }
-}
-
-
-/*
- * Module initialization routine for merged upsampling/color conversion.
- *
- * NB: this is called under the conditions determined by use_merged_upsample()
- * in jdmaster.c.  That routine MUST correspond to the actual capabilities
- * of this module; no safety checks are made here.
- */
-
-GLOBAL(void)
-jinit_merged_upsampler (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample;
-
-  upsample = (my_upsample_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_upsampler));
-  cinfo->upsample = (struct jpeg_upsampler *) upsample;
-  upsample->pub.start_pass = start_pass_merged_upsample;
-  upsample->pub.need_context_rows = FALSE;
-
-  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
-
-  if (cinfo->max_v_samp_factor == 2) {
-    upsample->pub.upsample = merged_2v_upsample;
-    upsample->upmethod = h2v2_merged_upsample;
-    /* Allocate a spare row buffer */
-    upsample->spare_row = (JSAMPROW)
-      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-		(size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
-  } else {
-    upsample->pub.upsample = merged_1v_upsample;
-    upsample->upmethod = h2v1_merged_upsample;
-    /* No spare row needed */
-    upsample->spare_row = NULL;
-  }
-
-  build_ycc_rgb_table(cinfo);
-}
-
-#endif /* UPSAMPLE_MERGING_SUPPORTED */
+/*
+ * jdmerge.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ *
+ * This file combines functions from jdsample.c and jdcolor.c;
+ * read those files first to understand what's going on.
+ *
+ * When the chroma components are to be upsampled by simple replication
+ * (ie, box filtering), we can save some work in color conversion by
+ * calculating all the output pixels corresponding to a pair of chroma
+ * samples at one time.  In the conversion equations
+ *	R = Y           + K1 * Cr
+ *	G = Y + K2 * Cb + K3 * Cr
+ *	B = Y + K4 * Cb
+ * only the Y term varies among the group of pixels corresponding to a pair
+ * of chroma samples, so the rest of the terms can be calculated just once.
+ * At typical sampling ratios, this eliminates half or three-quarters of the
+ * multiplications needed for color conversion.
+ *
+ * This file currently provides implementations for the following cases:
+ *	YCbCr => RGB color conversion only.
+ *	Sampling ratios of 2h1v or 2h2v.
+ *	No scaling needed at upsample time.
+ *	Corner-aligned (non-CCIR601) sampling alignment.
+ * Other special cases could be added, but in most applications these are
+ * the only common cases.  (For uncommon cases we fall back on the more
+ * general code in jdsample.c and jdcolor.c.)
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+			   JSAMPARRAY output_buf));
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  JSAMPROW spare_row;
+  boolean spare_full;		/* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;	/* samples per output row */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ * This is taken directly from jdcolor.c; see that file for more info.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  upsample->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  upsample->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    upsample->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    upsample->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    upsample->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_merged_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the spare buffer empty */
+  upsample->spare_full = FALSE;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * The control routine just handles the row buffering considerations.
+ */
+
+METHODDEF(void)
+merged_2v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 2:1 vertical sampling case: may need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPROW work_ptrs[2];
+  JDIMENSION num_rows;		/* number of rows returned to caller */
+
+  if (upsample->spare_full) {
+    /* If we have a spare row saved from a previous cycle, just return it. */
+    jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
+		      1, upsample->out_row_width);
+    num_rows = 1;
+    upsample->spare_full = FALSE;
+  } else {
+    /* Figure number of rows to return to caller. */
+    num_rows = 2;
+    /* Not more than the distance to the end of the image. */
+    if (num_rows > upsample->rows_to_go)
+      num_rows = upsample->rows_to_go;
+    /* And not more than what the client can accept: */
+    out_rows_avail -= *out_row_ctr;
+    if (num_rows > out_rows_avail)
+      num_rows = out_rows_avail;
+    /* Create output pointer array for upsampler. */
+    work_ptrs[0] = output_buf[*out_row_ctr];
+    if (num_rows > 1) {
+      work_ptrs[1] = output_buf[*out_row_ctr + 1];
+    } else {
+      work_ptrs[1] = upsample->spare_row;
+      upsample->spare_full = TRUE;
+    }
+    /* Now do the upsampling. */
+    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
+  }
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (! upsample->spare_full)
+    (*in_row_group_ctr)++;
+}
+
+
+METHODDEF(void)
+merged_1v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 1:1 vertical sampling case: much easier, never need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Just do the upsampling. */
+  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
+			 output_buf + *out_row_ctr);
+  /* Adjust counts */
+  (*out_row_ctr)++;
+  (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by the control routines to do
+ * the actual upsampling/conversion.  One row group is processed per call.
+ *
+ * Note: since we may be writing directly into application-supplied buffers,
+ * we have to be honest about the output width; we can't assume the buffer
+ * has been rounded up to an even width.
+ */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+METHODDEF(void)
+h2v1_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+METHODDEF(void)
+h2v2_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr00);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    y  = GETJSAMPLE(*inptr01);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Module initialization routine for merged upsampling/color conversion.
+ *
+ * NB: this is called under the conditions determined by use_merged_upsample()
+ * in jdmaster.c.  That routine MUST correspond to the actual capabilities
+ * of this module; no safety checks are made here.
+ */
+
+GLOBAL(void)
+jinit_merged_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = (struct jpeg_upsampler *) upsample;
+  upsample->pub.start_pass = start_pass_merged_upsample;
+  upsample->pub.need_context_rows = FALSE;
+
+  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
+
+  if (cinfo->max_v_samp_factor == 2) {
+    upsample->pub.upsample = merged_2v_upsample;
+    upsample->upmethod = h2v2_merged_upsample;
+    /* Allocate a spare row buffer */
+    upsample->spare_row = (JSAMPROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		(size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
+  } else {
+    upsample->pub.upsample = merged_1v_upsample;
+    upsample->upmethod = h2v1_merged_upsample;
+    /* No spare row needed */
+    upsample->spare_row = NULL;
+  }
+
+  build_ycc_rgb_table(cinfo);
+}
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jdpostct.c b/plugins/AdvaImg/src/LibJPEG/jdpostct.c
index 571563d728..7ba9eed52e 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdpostct.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdpostct.c
@@ -1,290 +1,290 @@
-/*
- * jdpostct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the decompression postprocessing controller.
- * This controller manages the upsampling, color conversion, and color
- * quantization/reduction steps; specifically, it controls the buffering
- * between upsample/color conversion and color quantization/reduction.
- *
- * If no color quantization/reduction is required, then this module has no
- * work to do, and it just hands off to the upsample/color conversion code.
- * An integrated upsample/convert/quantize process would replace this module
- * entirely.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_d_post_controller pub; /* public fields */
-
-  /* Color quantization source buffer: this holds output data from
-   * the upsample/color conversion step to be passed to the quantizer.
-   * For two-pass color quantization, we need a full-image buffer;
-   * for one-pass operation, a strip buffer is sufficient.
-   */
-  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
-  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
-  JDIMENSION strip_height;	/* buffer size in rows */
-  /* for two-pass mode only: */
-  JDIMENSION starting_row;	/* row # of first row in current strip */
-  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
-} my_post_controller;
-
-typedef my_post_controller * my_post_ptr;
-
-
-/* Forward declarations */
-METHODDEF(void) post_process_1pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
-#ifdef QUANT_2PASS_SUPPORTED
-METHODDEF(void) post_process_prepass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
-METHODDEF(void) post_process_2pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
-#endif
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-    if (cinfo->quantize_colors) {
-      /* Single-pass processing with color quantization. */
-      post->pub.post_process_data = post_process_1pass;
-      /* We could be doing buffered-image output before starting a 2-pass
-       * color quantization; in that case, jinit_d_post_controller did not
-       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
-       */
-      if (post->buffer == NULL) {
-	post->buffer = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, post->whole_image,
-	   (JDIMENSION) 0, post->strip_height, TRUE);
-      }
-    } else {
-      /* For single-pass processing without color quantization,
-       * I have no work to do; just call the upsampler directly.
-       */
-      post->pub.post_process_data = cinfo->upsample->upsample;
-    }
-    break;
-#ifdef QUANT_2PASS_SUPPORTED
-  case JBUF_SAVE_AND_PASS:
-    /* First pass of 2-pass quantization */
-    if (post->whole_image == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    post->pub.post_process_data = post_process_prepass;
-    break;
-  case JBUF_CRANK_DEST:
-    /* Second pass of 2-pass quantization */
-    if (post->whole_image == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    post->pub.post_process_data = post_process_2pass;
-    break;
-#endif /* QUANT_2PASS_SUPPORTED */
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    break;
-  }
-  post->starting_row = post->next_row = 0;
-}
-
-
-/*
- * Process some data in the one-pass (strip buffer) case.
- * This is used for color precision reduction as well as one-pass quantization.
- */
-
-METHODDEF(void)
-post_process_1pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-  JDIMENSION num_rows, max_rows;
-
-  /* Fill the buffer, but not more than what we can dump out in one go. */
-  /* Note we rely on the upsampler to detect bottom of image. */
-  max_rows = out_rows_avail - *out_row_ctr;
-  if (max_rows > post->strip_height)
-    max_rows = post->strip_height;
-  num_rows = 0;
-  (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &num_rows, max_rows);
-  /* Quantize and emit data. */
-  (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
-  *out_row_ctr += num_rows;
-}
-
-
-#ifdef QUANT_2PASS_SUPPORTED
-
-/*
- * Process some data in the first pass of 2-pass quantization.
- */
-
-METHODDEF(void)
-post_process_prepass (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		      JDIMENSION in_row_groups_avail,
-		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		      JDIMENSION out_rows_avail)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-  JDIMENSION old_next_row, num_rows;
-
-  /* Reposition virtual buffer if at start of strip. */
-  if (post->next_row == 0) {
-    post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, TRUE);
-  }
-
-  /* Upsample some data (up to a strip height's worth). */
-  old_next_row = post->next_row;
-  (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &post->next_row, post->strip_height);
-
-  /* Allow quantizer to scan new data.  No data is emitted, */
-  /* but we advance out_row_ctr so outer loop can tell when we're done. */
-  if (post->next_row > old_next_row) {
-    num_rows = post->next_row - old_next_row;
-    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
-					 (JSAMPARRAY) NULL, (int) num_rows);
-    *out_row_ctr += num_rows;
-  }
-
-  /* Advance if we filled the strip. */
-  if (post->next_row >= post->strip_height) {
-    post->starting_row += post->strip_height;
-    post->next_row = 0;
-  }
-}
-
-
-/*
- * Process some data in the second pass of 2-pass quantization.
- */
-
-METHODDEF(void)
-post_process_2pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-  JDIMENSION num_rows, max_rows;
-
-  /* Reposition virtual buffer if at start of strip. */
-  if (post->next_row == 0) {
-    post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, FALSE);
-  }
-
-  /* Determine number of rows to emit. */
-  num_rows = post->strip_height - post->next_row; /* available in strip */
-  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
-  if (num_rows > max_rows)
-    num_rows = max_rows;
-  /* We have to check bottom of image here, can't depend on upsampler. */
-  max_rows = cinfo->output_height - post->starting_row;
-  if (num_rows > max_rows)
-    num_rows = max_rows;
-
-  /* Quantize and emit data. */
-  (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer + post->next_row, output_buf + *out_row_ctr,
-		(int) num_rows);
-  *out_row_ctr += num_rows;
-
-  /* Advance if we filled the strip. */
-  post->next_row += num_rows;
-  if (post->next_row >= post->strip_height) {
-    post->starting_row += post->strip_height;
-    post->next_row = 0;
-  }
-}
-
-#endif /* QUANT_2PASS_SUPPORTED */
-
-
-/*
- * Initialize postprocessing controller.
- */
-
-GLOBAL(void)
-jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
-{
-  my_post_ptr post;
-
-  post = (my_post_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_post_controller));
-  cinfo->post = (struct jpeg_d_post_controller *) post;
-  post->pub.start_pass = start_pass_dpost;
-  post->whole_image = NULL;	/* flag for no virtual arrays */
-  post->buffer = NULL;		/* flag for no strip buffer */
-
-  /* Create the quantization buffer, if needed */
-  if (cinfo->quantize_colors) {
-    /* The buffer strip height is max_v_samp_factor, which is typically
-     * an efficient number of rows for upsampling to return.
-     * (In the presence of output rescaling, we might want to be smarter?)
-     */
-    post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor;
-    if (need_full_buffer) {
-      /* Two-pass color quantization: need full-image storage. */
-      /* We round up the number of rows to a multiple of the strip height. */
-#ifdef QUANT_2PASS_SUPPORTED
-      post->whole_image = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 (JDIMENSION) jround_up((long) cinfo->output_height,
-				(long) post->strip_height),
-	 post->strip_height);
-#else
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif /* QUANT_2PASS_SUPPORTED */
-    } else {
-      /* One-pass color quantization: just make a strip buffer. */
-      post->buffer = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 post->strip_height);
-    }
-  }
-}
+/*
+ * jdpostct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the decompression postprocessing controller.
+ * This controller manages the upsampling, color conversion, and color
+ * quantization/reduction steps; specifically, it controls the buffering
+ * between upsample/color conversion and color quantization/reduction.
+ *
+ * If no color quantization/reduction is required, then this module has no
+ * work to do, and it just hands off to the upsample/color conversion code.
+ * An integrated upsample/convert/quantize process would replace this module
+ * entirely.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_post_controller pub; /* public fields */
+
+  /* Color quantization source buffer: this holds output data from
+   * the upsample/color conversion step to be passed to the quantizer.
+   * For two-pass color quantization, we need a full-image buffer;
+   * for one-pass operation, a strip buffer is sufficient.
+   */
+  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
+  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
+  JDIMENSION strip_height;	/* buffer size in rows */
+  /* for two-pass mode only: */
+  JDIMENSION starting_row;	/* row # of first row in current strip */
+  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
+} my_post_controller;
+
+typedef my_post_controller * my_post_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) post_process_1pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) post_process_prepass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+METHODDEF(void) post_process_2pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->quantize_colors) {
+      /* Single-pass processing with color quantization. */
+      post->pub.post_process_data = post_process_1pass;
+      /* We could be doing buffered-image output before starting a 2-pass
+       * color quantization; in that case, jinit_d_post_controller did not
+       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
+       */
+      if (post->buffer == NULL) {
+	post->buffer = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, post->whole_image,
+	   (JDIMENSION) 0, post->strip_height, TRUE);
+      }
+    } else {
+      /* For single-pass processing without color quantization,
+       * I have no work to do; just call the upsampler directly.
+       */
+      post->pub.post_process_data = cinfo->upsample->upsample;
+    }
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    /* First pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_prepass;
+    break;
+  case JBUF_CRANK_DEST:
+    /* Second pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_2pass;
+    break;
+#endif /* QUANT_2PASS_SUPPORTED */
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+  post->starting_row = post->next_row = 0;
+}
+
+
+/*
+ * Process some data in the one-pass (strip buffer) case.
+ * This is used for color precision reduction as well as one-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_1pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Fill the buffer, but not more than what we can dump out in one go. */
+  /* Note we rely on the upsampler to detect bottom of image. */
+  max_rows = out_rows_avail - *out_row_ctr;
+  if (max_rows > post->strip_height)
+    max_rows = post->strip_height;
+  num_rows = 0;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &num_rows, max_rows);
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
+  *out_row_ctr += num_rows;
+}
+
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+/*
+ * Process some data in the first pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_prepass (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		      JDIMENSION in_row_groups_avail,
+		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		      JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION old_next_row, num_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, TRUE);
+  }
+
+  /* Upsample some data (up to a strip height's worth). */
+  old_next_row = post->next_row;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &post->next_row, post->strip_height);
+
+  /* Allow quantizer to scan new data.  No data is emitted, */
+  /* but we advance out_row_ctr so outer loop can tell when we're done. */
+  if (post->next_row > old_next_row) {
+    num_rows = post->next_row - old_next_row;
+    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
+					 (JSAMPARRAY) NULL, (int) num_rows);
+    *out_row_ctr += num_rows;
+  }
+
+  /* Advance if we filled the strip. */
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+
+/*
+ * Process some data in the second pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_2pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, FALSE);
+  }
+
+  /* Determine number of rows to emit. */
+  num_rows = post->strip_height - post->next_row; /* available in strip */
+  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+  /* We have to check bottom of image here, can't depend on upsampler. */
+  max_rows = cinfo->output_height - post->starting_row;
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer + post->next_row, output_buf + *out_row_ctr,
+		(int) num_rows);
+  *out_row_ctr += num_rows;
+
+  /* Advance if we filled the strip. */
+  post->next_row += num_rows;
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize postprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_post_ptr post;
+
+  post = (my_post_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_post_controller));
+  cinfo->post = (struct jpeg_d_post_controller *) post;
+  post->pub.start_pass = start_pass_dpost;
+  post->whole_image = NULL;	/* flag for no virtual arrays */
+  post->buffer = NULL;		/* flag for no strip buffer */
+
+  /* Create the quantization buffer, if needed */
+  if (cinfo->quantize_colors) {
+    /* The buffer strip height is max_v_samp_factor, which is typically
+     * an efficient number of rows for upsampling to return.
+     * (In the presence of output rescaling, we might want to be smarter?)
+     */
+    post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor;
+    if (need_full_buffer) {
+      /* Two-pass color quantization: need full-image storage. */
+      /* We round up the number of rows to a multiple of the strip height. */
+#ifdef QUANT_2PASS_SUPPORTED
+      post->whole_image = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 (JDIMENSION) jround_up((long) cinfo->output_height,
+				(long) post->strip_height),
+	 post->strip_height);
+#else
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif /* QUANT_2PASS_SUPPORTED */
+    } else {
+      /* One-pass color quantization: just make a strip buffer. */
+      post->buffer = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 post->strip_height);
+    }
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdsample.c b/plugins/AdvaImg/src/LibJPEG/jdsample.c
index 7bc8885b02..94f9599f04 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdsample.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdsample.c
@@ -1,361 +1,361 @@
-/*
- * jdsample.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2002-2008 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains upsampling routines.
- *
- * Upsampling input data is counted in "row groups".  A row group
- * is defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
- * sample rows of each component.  Upsampling will normally produce
- * max_v_samp_factor pixel rows from each row group (but this could vary
- * if the upsampler is applying a scale factor of its own).
- *
- * An excellent reference for image resampling is
- *   Digital Image Warping, George Wolberg, 1990.
- *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Pointer to routine to upsample a single component */
-typedef JMETHOD(void, upsample1_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
-
-  /* Color conversion buffer.  When using separate upsampling and color
-   * conversion steps, this buffer holds one upsampled row group until it
-   * has been color converted and output.
-   * Note: we do not allocate any storage for component(s) which are full-size,
-   * ie do not need rescaling.  The corresponding entry of color_buf[] is
-   * simply set to point to the input data array, thereby avoiding copying.
-   */
-  JSAMPARRAY color_buf[MAX_COMPONENTS];
-
-  /* Per-component upsampling method pointers */
-  upsample1_ptr methods[MAX_COMPONENTS];
-
-  int next_row_out;		/* counts rows emitted from color_buf */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
-
-  /* Height of an input row group for each component. */
-  int rowgroup_height[MAX_COMPONENTS];
-
-  /* These arrays save pixel expansion factors so that int_expand need not
-   * recompute them each time.  They are unused for other upsampling methods.
-   */
-  UINT8 h_expand[MAX_COMPONENTS];
-  UINT8 v_expand[MAX_COMPONENTS];
-} my_upsampler;
-
-typedef my_upsampler * my_upsample_ptr;
-
-
-/*
- * Initialize for an upsampling pass.
- */
-
-METHODDEF(void)
-start_pass_upsample (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-
-  /* Mark the conversion buffer empty */
-  upsample->next_row_out = cinfo->max_v_samp_factor;
-  /* Initialize total-height counter for detecting bottom of image */
-  upsample->rows_to_go = cinfo->output_height;
-}
-
-
-/*
- * Control routine to do upsampling (and color conversion).
- *
- * In this version we upsample each component independently.
- * We upsample one row group into the conversion buffer, then apply
- * color conversion a row at a time.
- */
-
-METHODDEF(void)
-sep_upsample (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	      JDIMENSION in_row_groups_avail,
-	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	      JDIMENSION out_rows_avail)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  int ci;
-  jpeg_component_info * compptr;
-  JDIMENSION num_rows;
-
-  /* Fill the conversion buffer, if it's empty */
-  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      /* Invoke per-component upsample method.  Notice we pass a POINTER
-       * to color_buf[ci], so that fullsize_upsample can change it.
-       */
-      (*upsample->methods[ci]) (cinfo, compptr,
-	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
-	upsample->color_buf + ci);
-    }
-    upsample->next_row_out = 0;
-  }
-
-  /* Color-convert and emit rows */
-
-  /* How many we have in the buffer: */
-  num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out);
-  /* Not more than the distance to the end of the image.  Need this test
-   * in case the image height is not a multiple of max_v_samp_factor:
-   */
-  if (num_rows > upsample->rows_to_go) 
-    num_rows = upsample->rows_to_go;
-  /* And not more than what the client can accept: */
-  out_rows_avail -= *out_row_ctr;
-  if (num_rows > out_rows_avail)
-    num_rows = out_rows_avail;
-
-  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
-				     (JDIMENSION) upsample->next_row_out,
-				     output_buf + *out_row_ctr,
-				     (int) num_rows);
-
-  /* Adjust counts */
-  *out_row_ctr += num_rows;
-  upsample->rows_to_go -= num_rows;
-  upsample->next_row_out += num_rows;
-  /* When the buffer is emptied, declare this input row group consumed */
-  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
-    (*in_row_group_ctr)++;
-}
-
-
-/*
- * These are the routines invoked by sep_upsample to upsample pixel values
- * of a single component.  One row group is processed per call.
- */
-
-
-/*
- * For full-size components, we just make color_buf[ci] point at the
- * input buffer, and thus avoid copying any data.  Note that this is
- * safe only because sep_upsample doesn't declare the input row group
- * "consumed" until we are done color converting and emitting it.
- */
-
-METHODDEF(void)
-fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
-{
-  *output_data_ptr = input_data;
-}
-
-
-/*
- * This is a no-op version used for "uninteresting" components.
- * These components will not be referenced by color conversion.
- */
-
-METHODDEF(void)
-noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
-{
-  *output_data_ptr = NULL;	/* safety check */
-}
-
-
-/*
- * This version handles any integral sampling ratios.
- * This is not used for typical JPEG files, so it need not be fast.
- * Nor, for that matter, is it particularly accurate: the algorithm is
- * simple replication of the input pixel onto the corresponding output
- * pixels.  The hi-falutin sampling literature refers to this as a
- * "box filter".  A box filter tends to introduce visible artifacts,
- * so if you are actually going to use 3:1 or 4:1 sampling ratios
- * you would be well advised to improve this code.
- */
-
-METHODDEF(void)
-int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  register int h;
-  JSAMPROW outend;
-  int h_expand, v_expand;
-  int inrow, outrow;
-
-  h_expand = upsample->h_expand[compptr->component_index];
-  v_expand = upsample->v_expand[compptr->component_index];
-
-  inrow = outrow = 0;
-  while (outrow < cinfo->max_v_samp_factor) {
-    /* Generate one output row with proper horizontal expansion */
-    inptr = input_data[inrow];
-    outptr = output_data[outrow];
-    outend = outptr + cinfo->output_width;
-    while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
-      for (h = h_expand; h > 0; h--) {
-	*outptr++ = invalue;
-      }
-    }
-    /* Generate any additional output rows by duplicating the first one */
-    if (v_expand > 1) {
-      jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
-			v_expand-1, cinfo->output_width);
-    }
-    inrow++;
-    outrow += v_expand;
-  }
-}
-
-
-/*
- * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
- * It's still a box filter.
- */
-
-METHODDEF(void)
-h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
-{
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  JSAMPROW outend;
-  int outrow;
-
-  for (outrow = 0; outrow < cinfo->max_v_samp_factor; outrow++) {
-    inptr = input_data[outrow];
-    outptr = output_data[outrow];
-    outend = outptr + cinfo->output_width;
-    while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
-      *outptr++ = invalue;
-      *outptr++ = invalue;
-    }
-  }
-}
-
-
-/*
- * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
- * It's still a box filter.
- */
-
-METHODDEF(void)
-h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
-{
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  JSAMPROW outend;
-  int inrow, outrow;
-
-  inrow = outrow = 0;
-  while (outrow < cinfo->max_v_samp_factor) {
-    inptr = input_data[inrow];
-    outptr = output_data[outrow];
-    outend = outptr + cinfo->output_width;
-    while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
-      *outptr++ = invalue;
-      *outptr++ = invalue;
-    }
-    jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
-		      1, cinfo->output_width);
-    inrow++;
-    outrow += 2;
-  }
-}
-
-
-/*
- * Module initialization routine for upsampling.
- */
-
-GLOBAL(void)
-jinit_upsampler (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample;
-  int ci;
-  jpeg_component_info * compptr;
-  boolean need_buffer;
-  int h_in_group, v_in_group, h_out_group, v_out_group;
-
-  upsample = (my_upsample_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_upsampler));
-  cinfo->upsample = (struct jpeg_upsampler *) upsample;
-  upsample->pub.start_pass = start_pass_upsample;
-  upsample->pub.upsample = sep_upsample;
-  upsample->pub.need_context_rows = FALSE; /* until we find out differently */
-
-  if (cinfo->CCIR601_sampling)	/* this isn't supported */
-    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
-
-  /* Verify we can handle the sampling factors, select per-component methods,
-   * and create storage as needed.
-   */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Compute size of an "input group" after IDCT scaling.  This many samples
-     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
-     */
-    h_in_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
-		 cinfo->min_DCT_h_scaled_size;
-    v_in_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-		 cinfo->min_DCT_v_scaled_size;
-    h_out_group = cinfo->max_h_samp_factor;
-    v_out_group = cinfo->max_v_samp_factor;
-    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
-    need_buffer = TRUE;
-    if (! compptr->component_needed) {
-      /* Don't bother to upsample an uninteresting component. */
-      upsample->methods[ci] = noop_upsample;
-      need_buffer = FALSE;
-    } else if (h_in_group == h_out_group && v_in_group == v_out_group) {
-      /* Fullsize components can be processed without any work. */
-      upsample->methods[ci] = fullsize_upsample;
-      need_buffer = FALSE;
-    } else if (h_in_group * 2 == h_out_group &&
-	       v_in_group == v_out_group) {
-      /* Special case for 2h1v upsampling */
-      upsample->methods[ci] = h2v1_upsample;
-    } else if (h_in_group * 2 == h_out_group &&
-	       v_in_group * 2 == v_out_group) {
-      /* Special case for 2h2v upsampling */
-      upsample->methods[ci] = h2v2_upsample;
-    } else if ((h_out_group % h_in_group) == 0 &&
-	       (v_out_group % v_in_group) == 0) {
-      /* Generic integral-factors upsampling method */
-      upsample->methods[ci] = int_upsample;
-      upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
-      upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
-    } else
-      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
-    if (need_buffer) {
-      upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 (JDIMENSION) jround_up((long) cinfo->output_width,
-				(long) cinfo->max_h_samp_factor),
-	 (JDIMENSION) cinfo->max_v_samp_factor);
-    }
-  }
-}
+/*
+ * jdsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2002-2008 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains upsampling routines.
+ *
+ * Upsampling input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
+ * sample rows of each component.  Upsampling will normally produce
+ * max_v_samp_factor pixel rows from each row group (but this could vary
+ * if the upsampler is applying a scale factor of its own).
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to upsample a single component */
+typedef JMETHOD(void, upsample1_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Color conversion buffer.  When using separate upsampling and color
+   * conversion steps, this buffer holds one upsampled row group until it
+   * has been color converted and output.
+   * Note: we do not allocate any storage for component(s) which are full-size,
+   * ie do not need rescaling.  The corresponding entry of color_buf[] is
+   * simply set to point to the input data array, thereby avoiding copying.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  /* Per-component upsampling method pointers */
+  upsample1_ptr methods[MAX_COMPONENTS];
+
+  int next_row_out;		/* counts rows emitted from color_buf */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+
+  /* Height of an input row group for each component. */
+  int rowgroup_height[MAX_COMPONENTS];
+
+  /* These arrays save pixel expansion factors so that int_expand need not
+   * recompute them each time.  They are unused for other upsampling methods.
+   */
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the conversion buffer empty */
+  upsample->next_row_out = cinfo->max_v_samp_factor;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * In this version we upsample each component independently.
+ * We upsample one row group into the conversion buffer, then apply
+ * color conversion a row at a time.
+ */
+
+METHODDEF(void)
+sep_upsample (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	      JDIMENSION in_row_groups_avail,
+	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	      JDIMENSION out_rows_avail)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JDIMENSION num_rows;
+
+  /* Fill the conversion buffer, if it's empty */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      /* Invoke per-component upsample method.  Notice we pass a POINTER
+       * to color_buf[ci], so that fullsize_upsample can change it.
+       */
+      (*upsample->methods[ci]) (cinfo, compptr,
+	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
+	upsample->color_buf + ci);
+    }
+    upsample->next_row_out = 0;
+  }
+
+  /* Color-convert and emit rows */
+
+  /* How many we have in the buffer: */
+  num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out);
+  /* Not more than the distance to the end of the image.  Need this test
+   * in case the image height is not a multiple of max_v_samp_factor:
+   */
+  if (num_rows > upsample->rows_to_go) 
+    num_rows = upsample->rows_to_go;
+  /* And not more than what the client can accept: */
+  out_rows_avail -= *out_row_ctr;
+  if (num_rows > out_rows_avail)
+    num_rows = out_rows_avail;
+
+  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
+				     (JDIMENSION) upsample->next_row_out,
+				     output_buf + *out_row_ctr,
+				     (int) num_rows);
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  upsample->next_row_out += num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
+    (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by sep_upsample to upsample pixel values
+ * of a single component.  One row group is processed per call.
+ */
+
+
+/*
+ * For full-size components, we just make color_buf[ci] point at the
+ * input buffer, and thus avoid copying any data.  Note that this is
+ * safe only because sep_upsample doesn't declare the input row group
+ * "consumed" until we are done color converting and emitting it.
+ */
+
+METHODDEF(void)
+fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = input_data;
+}
+
+
+/*
+ * This is a no-op version used for "uninteresting" components.
+ * These components will not be referenced by color conversion.
+ */
+
+METHODDEF(void)
+noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = NULL;	/* safety check */
+}
+
+
+/*
+ * This version handles any integral sampling ratios.
+ * This is not used for typical JPEG files, so it need not be fast.
+ * Nor, for that matter, is it particularly accurate: the algorithm is
+ * simple replication of the input pixel onto the corresponding output
+ * pixels.  The hi-falutin sampling literature refers to this as a
+ * "box filter".  A box filter tends to introduce visible artifacts,
+ * so if you are actually going to use 3:1 or 4:1 sampling ratios
+ * you would be well advised to improve this code.
+ */
+
+METHODDEF(void)
+int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  register int h;
+  JSAMPROW outend;
+  int h_expand, v_expand;
+  int inrow, outrow;
+
+  h_expand = upsample->h_expand[compptr->component_index];
+  v_expand = upsample->v_expand[compptr->component_index];
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    /* Generate one output row with proper horizontal expansion */
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      for (h = h_expand; h > 0; h--) {
+	*outptr++ = invalue;
+      }
+    }
+    /* Generate any additional output rows by duplicating the first one */
+    if (v_expand > 1) {
+      jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+			v_expand-1, cinfo->output_width);
+    }
+    inrow++;
+    outrow += v_expand;
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int outrow;
+
+  for (outrow = 0; outrow < cinfo->max_v_samp_factor; outrow++) {
+    inptr = input_data[outrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int inrow, outrow;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+    jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+		      1, cinfo->output_width);
+    inrow++;
+    outrow += 2;
+  }
+}
+
+
+/*
+ * Module initialization routine for upsampling.
+ */
+
+GLOBAL(void)
+jinit_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  boolean need_buffer;
+  int h_in_group, v_in_group, h_out_group, v_out_group;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = (struct jpeg_upsampler *) upsample;
+  upsample->pub.start_pass = start_pass_upsample;
+  upsample->pub.upsample = sep_upsample;
+  upsample->pub.need_context_rows = FALSE; /* until we find out differently */
+
+  if (cinfo->CCIR601_sampling)	/* this isn't supported */
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* Verify we can handle the sampling factors, select per-component methods,
+   * and create storage as needed.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Compute size of an "input group" after IDCT scaling.  This many samples
+     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
+     */
+    h_in_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
+		 cinfo->min_DCT_h_scaled_size;
+    v_in_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+		 cinfo->min_DCT_v_scaled_size;
+    h_out_group = cinfo->max_h_samp_factor;
+    v_out_group = cinfo->max_v_samp_factor;
+    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
+    need_buffer = TRUE;
+    if (! compptr->component_needed) {
+      /* Don't bother to upsample an uninteresting component. */
+      upsample->methods[ci] = noop_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group == h_out_group && v_in_group == v_out_group) {
+      /* Fullsize components can be processed without any work. */
+      upsample->methods[ci] = fullsize_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group == v_out_group) {
+      /* Special case for 2h1v upsampling */
+      upsample->methods[ci] = h2v1_upsample;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group * 2 == v_out_group) {
+      /* Special case for 2h2v upsampling */
+      upsample->methods[ci] = h2v2_upsample;
+    } else if ((h_out_group % h_in_group) == 0 &&
+	       (v_out_group % v_in_group) == 0) {
+      /* Generic integral-factors upsampling method */
+      upsample->methods[ci] = int_upsample;
+      upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
+      upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+    if (need_buffer) {
+      upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 (JDIMENSION) jround_up((long) cinfo->output_width,
+				(long) cinfo->max_h_samp_factor),
+	 (JDIMENSION) cinfo->max_v_samp_factor);
+    }
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jdtrans.c b/plugins/AdvaImg/src/LibJPEG/jdtrans.c
index 22dd47fb5c..a51d69de44 100644
--- a/plugins/AdvaImg/src/LibJPEG/jdtrans.c
+++ b/plugins/AdvaImg/src/LibJPEG/jdtrans.c
@@ -1,140 +1,140 @@
-/*
- * jdtrans.c
- *
- * Copyright (C) 1995-1997, Thomas G. Lane.
- * Modified 2000-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains library routines for transcoding decompression,
- * that is, reading raw DCT coefficient arrays from an input JPEG file.
- * The routines in jdapimin.c will also be needed by a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Forward declarations */
-LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
-
-
-/*
- * Read the coefficient arrays from a JPEG file.
- * jpeg_read_header must be completed before calling this.
- *
- * The entire image is read into a set of virtual coefficient-block arrays,
- * one per component.  The return value is a pointer to the array of
- * virtual-array descriptors.  These can be manipulated directly via the
- * JPEG memory manager, or handed off to jpeg_write_coefficients().
- * To release the memory occupied by the virtual arrays, call
- * jpeg_finish_decompress() when done with the data.
- *
- * An alternative usage is to simply obtain access to the coefficient arrays
- * during a buffered-image-mode decompression operation.  This is allowed
- * after any jpeg_finish_output() call.  The arrays can be accessed until
- * jpeg_finish_decompress() is called.  (Note that any call to the library
- * may reposition the arrays, so don't rely on access_virt_barray() results
- * to stay valid across library calls.)
- *
- * Returns NULL if suspended.  This case need be checked only if
- * a suspending data source is used.
- */
-
-GLOBAL(jvirt_barray_ptr *)
-jpeg_read_coefficients (j_decompress_ptr cinfo)
-{
-  if (cinfo->global_state == DSTATE_READY) {
-    /* First call: initialize active modules */
-    transdecode_master_selection(cinfo);
-    cinfo->global_state = DSTATE_RDCOEFS;
-  }
-  if (cinfo->global_state == DSTATE_RDCOEFS) {
-    /* Absorb whole file into the coef buffer */
-    for (;;) {
-      int retcode;
-      /* Call progress monitor hook if present */
-      if (cinfo->progress != NULL)
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-      /* Absorb some more input */
-      retcode = (*cinfo->inputctl->consume_input) (cinfo);
-      if (retcode == JPEG_SUSPENDED)
-	return NULL;
-      if (retcode == JPEG_REACHED_EOI)
-	break;
-      /* Advance progress counter if appropriate */
-      if (cinfo->progress != NULL &&
-	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	  /* startup underestimated number of scans; ratchet up one scan */
-	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	}
-      }
-    }
-    /* Set state so that jpeg_finish_decompress does the right thing */
-    cinfo->global_state = DSTATE_STOPPING;
-  }
-  /* At this point we should be in state DSTATE_STOPPING if being used
-   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
-   * to the coefficients during a full buffered-image-mode decompression.
-   */
-  if ((cinfo->global_state == DSTATE_STOPPING ||
-       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
-    return cinfo->coef->coef_arrays;
-  }
-  /* Oops, improper usage */
-  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return NULL;			/* keep compiler happy */
-}
-
-
-/*
- * Master selection of decompression modules for transcoding.
- * This substitutes for jdmaster.c's initialization of the full decompressor.
- */
-
-LOCAL(void)
-transdecode_master_selection (j_decompress_ptr cinfo)
-{
-  /* This is effectively a buffered-image operation. */
-  cinfo->buffered_image = TRUE;
-
-  /* Compute output image dimensions and related values. */
-  jpeg_core_output_dimensions(cinfo);
-
-  /* Entropy decoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_decoder(cinfo);
-  else {
-    jinit_huff_decoder(cinfo);
-  }
-
-  /* Always get a full-image coefficient buffer. */
-  jinit_d_coef_controller(cinfo, TRUE);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Initialize input side of decompressor to consume first scan. */
-  (*cinfo->inputctl->start_input_pass) (cinfo);
-
-  /* Initialize progress monitoring. */
-  if (cinfo->progress != NULL) {
-    int nscans;
-    /* Estimate number of scans to set pass_limit. */
-    if (cinfo->progressive_mode) {
-      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
-      nscans = 2 + 3 * cinfo->num_components;
-    } else if (cinfo->inputctl->has_multiple_scans) {
-      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
-      nscans = cinfo->num_components;
-    } else {
-      nscans = 1;
-    }
-    cinfo->progress->pass_counter = 0L;
-    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
-    cinfo->progress->completed_passes = 0;
-    cinfo->progress->total_passes = 1;
-  }
-}
+/*
+ * jdtrans.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * Modified 2000-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding decompression,
+ * that is, reading raw DCT coefficient arrays from an input JPEG file.
+ * The routines in jdapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Read the coefficient arrays from a JPEG file.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * The entire image is read into a set of virtual coefficient-block arrays,
+ * one per component.  The return value is a pointer to the array of
+ * virtual-array descriptors.  These can be manipulated directly via the
+ * JPEG memory manager, or handed off to jpeg_write_coefficients().
+ * To release the memory occupied by the virtual arrays, call
+ * jpeg_finish_decompress() when done with the data.
+ *
+ * An alternative usage is to simply obtain access to the coefficient arrays
+ * during a buffered-image-mode decompression operation.  This is allowed
+ * after any jpeg_finish_output() call.  The arrays can be accessed until
+ * jpeg_finish_decompress() is called.  (Note that any call to the library
+ * may reposition the arrays, so don't rely on access_virt_barray() results
+ * to stay valid across library calls.)
+ *
+ * Returns NULL if suspended.  This case need be checked only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(jvirt_barray_ptr *)
+jpeg_read_coefficients (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    /* Absorb whole file into the coef buffer */
+    for (;;) {
+      int retcode;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      /* Absorb some more input */
+      retcode = (*cinfo->inputctl->consume_input) (cinfo);
+      if (retcode == JPEG_SUSPENDED)
+	return NULL;
+      if (retcode == JPEG_REACHED_EOI)
+	break;
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	  /* startup underestimated number of scans; ratchet up one scan */
+	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	}
+      }
+    }
+    /* Set state so that jpeg_finish_decompress does the right thing */
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return cinfo->coef->coef_arrays;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return NULL;			/* keep compiler happy */
+}
+
+
+/*
+ * Master selection of decompression modules for transcoding.
+ * This substitutes for jdmaster.c's initialization of the full decompressor.
+ */
+
+LOCAL(void)
+transdecode_master_selection (j_decompress_ptr cinfo)
+{
+  /* This is effectively a buffered-image operation. */
+  cinfo->buffered_image = TRUE;
+
+  /* Compute output image dimensions and related values. */
+  jpeg_core_output_dimensions(cinfo);
+
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code)
+    jinit_arith_decoder(cinfo);
+  else {
+    jinit_huff_decoder(cinfo);
+  }
+
+  /* Always get a full-image coefficient buffer. */
+  jinit_d_coef_controller(cinfo, TRUE);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+  /* Initialize progress monitoring. */
+  if (cinfo->progress != NULL) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else if (cinfo->inputctl->has_multiple_scans) {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    } else {
+      nscans = 1;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = 1;
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jerror.c b/plugins/AdvaImg/src/LibJPEG/jerror.c
index 3da7be86a0..dcba3980e3 100644
--- a/plugins/AdvaImg/src/LibJPEG/jerror.c
+++ b/plugins/AdvaImg/src/LibJPEG/jerror.c
@@ -1,252 +1,253 @@
-/*
- * jerror.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains simple error-reporting and trace-message routines.
- * These are suitable for Unix-like systems and others where writing to
- * stderr is the right thing to do.  Many applications will want to replace
- * some or all of these routines.
- *
- * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
- * you get a Windows-specific hack to display error messages in a dialog box.
- * It ain't much, but it beats dropping error messages into the bit bucket,
- * which is what happens to output to stderr under most Windows C compilers.
- *
- * These routines are used by both the compression and decompression code.
- */
-
-/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jversion.h"
-#include "jerror.h"
-
-#ifdef USE_WINDOWS_MESSAGEBOX
-#include <windows.h>
-#endif
-
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
-#define EXIT_FAILURE  1
-#endif
-
-
-/*
- * Create the message string table.
- * We do this from the master message list in jerror.h by re-reading
- * jerror.h with a suitable definition for macro JMESSAGE.
- * The message table is made an external symbol just in case any applications
- * want to refer to it directly.
- */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_message_table	jMsgTable
-#endif
-
-#define JMESSAGE(code,string)	string ,
-
-const char * const jpeg_std_message_table[] = {
-#include "jerror.h"
-  NULL
-};
-
-
-/*
- * Error exit handler: must not return to caller.
- *
- * Applications may override this if they want to get control back after
- * an error.  Typically one would longjmp somewhere instead of exiting.
- * The setjmp buffer can be made a private field within an expanded error
- * handler object.  Note that the info needed to generate an error message
- * is stored in the error object, so you can generate the message now or
- * later, at your convenience.
- * You should make sure that the JPEG object is cleaned up (with jpeg_abort
- * or jpeg_destroy) at some point.
- */
-
-METHODDEF(void)
-error_exit (j_common_ptr cinfo)
-{
-  /* Always display the message */
-  (*cinfo->err->output_message) (cinfo);
-
-  /* Let the memory manager delete any temp files before we die */
-  jpeg_destroy(cinfo);
-
-  exit(EXIT_FAILURE);
-}
-
-
-/*
- * Actual output of an error or trace message.
- * Applications may override this method to send JPEG messages somewhere
- * other than stderr.
- *
- * On Windows, printing to stderr is generally completely useless,
- * so we provide optional code to produce an error-dialog popup.
- * Most Windows applications will still prefer to override this routine,
- * but if they don't, it'll do something at least marginally useful.
- *
- * NOTE: to use the library in an environment that doesn't support the
- * C stdio library, you may have to delete the call to fprintf() entirely,
- * not just not use this routine.
- */
-
-METHODDEF(void)
-output_message (j_common_ptr cinfo)
-{
-  char buffer[JMSG_LENGTH_MAX];
-
-  /* Create the message */
-  (*cinfo->err->format_message) (cinfo, buffer);
-
-#ifdef USE_WINDOWS_MESSAGEBOX
-  /* Display it in a message dialog box */
-  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
-	     MB_OK | MB_ICONERROR);
-#else
-  /* Send it to stderr, adding a newline */
-  fprintf(stderr, "%s\n", buffer);
-#endif
-}
-
-
-/*
- * Decide whether to emit a trace or warning message.
- * msg_level is one of:
- *   -1: recoverable corrupt-data warning, may want to abort.
- *    0: important advisory messages (always display to user).
- *    1: first level of tracing detail.
- *    2,3,...: successively more detailed tracing messages.
- * An application might override this method if it wanted to abort on warnings
- * or change the policy about which messages to display.
- */
-
-METHODDEF(void)
-emit_message (j_common_ptr cinfo, int msg_level)
-{
-  struct jpeg_error_mgr * err = cinfo->err;
-
-  if (msg_level < 0) {
-    /* It's a warning message.  Since corrupt files may generate many warnings,
-     * the policy implemented here is to show only the first warning,
-     * unless trace_level >= 3.
-     */
-    if (err->num_warnings == 0 || err->trace_level >= 3)
-      (*err->output_message) (cinfo);
-    /* Always count warnings in num_warnings. */
-    err->num_warnings++;
-  } else {
-    /* It's a trace message.  Show it if trace_level >= msg_level. */
-    if (err->trace_level >= msg_level)
-      (*err->output_message) (cinfo);
-  }
-}
-
-
-/*
- * Format a message string for the most recent JPEG error or message.
- * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
- * characters.  Note that no '\n' character is added to the string.
- * Few applications should need to override this method.
- */
-
-METHODDEF(void)
-format_message (j_common_ptr cinfo, char * buffer)
-{
-  struct jpeg_error_mgr * err = cinfo->err;
-  int msg_code = err->msg_code;
-  const char * msgtext = NULL;
-  const char * msgptr;
-  char ch;
-  boolean isstring;
-
-  /* Look up message string in proper table */
-  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
-    msgtext = err->jpeg_message_table[msg_code];
-  } else if (err->addon_message_table != NULL &&
-	     msg_code >= err->first_addon_message &&
-	     msg_code <= err->last_addon_message) {
-    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
-  }
-
-  /* Defend against bogus message number */
-  if (msgtext == NULL) {
-    err->msg_parm.i[0] = msg_code;
-    msgtext = err->jpeg_message_table[0];
-  }
-
-  /* Check for string parameter, as indicated by %s in the message text */
-  isstring = FALSE;
-  msgptr = msgtext;
-  while ((ch = *msgptr++) != '\0') {
-    if (ch == '%') {
-      if (*msgptr == 's') isstring = TRUE;
-      break;
-    }
-  }
-
-  /* Format the message into the passed buffer */
-  if (isstring)
-    sprintf(buffer, msgtext, err->msg_parm.s);
-  else
-    sprintf(buffer, msgtext,
-	    err->msg_parm.i[0], err->msg_parm.i[1],
-	    err->msg_parm.i[2], err->msg_parm.i[3],
-	    err->msg_parm.i[4], err->msg_parm.i[5],
-	    err->msg_parm.i[6], err->msg_parm.i[7]);
-}
-
-
-/*
- * Reset error state variables at start of a new image.
- * This is called during compression startup to reset trace/error
- * processing to default state, without losing any application-specific
- * method pointers.  An application might possibly want to override
- * this method if it has additional error processing state.
- */
-
-METHODDEF(void)
-reset_error_mgr (j_common_ptr cinfo)
-{
-  cinfo->err->num_warnings = 0;
-  /* trace_level is not reset since it is an application-supplied parameter */
-  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
-}
-
-
-/*
- * Fill in the standard error-handling methods in a jpeg_error_mgr object.
- * Typical call is:
- *	struct jpeg_compress_struct cinfo;
- *	struct jpeg_error_mgr err;
- *
- *	cinfo.err = jpeg_std_error(&err);
- * after which the application may override some of the methods.
- */
-
-GLOBAL(struct jpeg_error_mgr *)
-jpeg_std_error (struct jpeg_error_mgr * err)
-{
-  err->error_exit = error_exit;
-  err->emit_message = emit_message;
-  err->output_message = output_message;
-  err->format_message = format_message;
-  err->reset_error_mgr = reset_error_mgr;
-
-  err->trace_level = 0;		/* default = no tracing */
-  err->num_warnings = 0;	/* no warnings emitted yet */
-  err->msg_code = 0;		/* may be useful as a flag for "no error" */
-
-  /* Initialize message table pointers */
-  err->jpeg_message_table = jpeg_std_message_table;
-  err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
-
-  err->addon_message_table = NULL;
-  err->first_addon_message = 0;	/* for safety */
-  err->last_addon_message = 0;
-
-  return err;
-}
+/*
+ * jerror.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains simple error-reporting and trace-message routines.
+ * These are suitable for Unix-like systems and others where writing to
+ * stderr is the right thing to do.  Many applications will want to replace
+ * some or all of these routines.
+ *
+ * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
+ * you get a Windows-specific hack to display error messages in a dialog box.
+ * It ain't much, but it beats dropping error messages into the bit bucket,
+ * which is what happens to output to stderr under most Windows C compilers.
+ *
+ * These routines are used by both the compression and decompression code.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jversion.h"
+#include "jerror.h"
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+#include <windows.h>
+#endif
+
+#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+
+
+/*
+ * Create the message string table.
+ * We do this from the master message list in jerror.h by re-reading
+ * jerror.h with a suitable definition for macro JMESSAGE.
+ * The message table is made an external symbol just in case any applications
+ * want to refer to it directly.
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_message_table	jMsgTable
+#endif
+
+#define JMESSAGE(code,string)	string ,
+
+const char * const jpeg_std_message_table[] = {
+#include "jerror.h"
+  NULL
+};
+
+
+/*
+ * Error exit handler: must not return to caller.
+ *
+ * Applications may override this if they want to get control back after
+ * an error.  Typically one would longjmp somewhere instead of exiting.
+ * The setjmp buffer can be made a private field within an expanded error
+ * handler object.  Note that the info needed to generate an error message
+ * is stored in the error object, so you can generate the message now or
+ * later, at your convenience.
+ * You should make sure that the JPEG object is cleaned up (with jpeg_abort
+ * or jpeg_destroy) at some point.
+ */
+
+METHODDEF(noreturn_t)
+error_exit (j_common_ptr cinfo)
+{
+  /* Always display the message */
+  (*cinfo->err->output_message) (cinfo);
+
+  /* Let the memory manager delete any temp files before we die */
+  jpeg_destroy(cinfo);
+
+  exit(EXIT_FAILURE);
+}
+
+
+/*
+ * Actual output of an error or trace message.
+ * Applications may override this method to send JPEG messages somewhere
+ * other than stderr.
+ *
+ * On Windows, printing to stderr is generally completely useless,
+ * so we provide optional code to produce an error-dialog popup.
+ * Most Windows applications will still prefer to override this routine,
+ * but if they don't, it'll do something at least marginally useful.
+ *
+ * NOTE: to use the library in an environment that doesn't support the
+ * C stdio library, you may have to delete the call to fprintf() entirely,
+ * not just not use this routine.
+ */
+
+METHODDEF(void)
+output_message (j_common_ptr cinfo)
+{
+  char buffer[JMSG_LENGTH_MAX];
+
+  /* Create the message */
+  (*cinfo->err->format_message) (cinfo, buffer);
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+  /* Display it in a message dialog box */
+  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
+	     MB_OK | MB_ICONERROR);
+#else
+  /* Send it to stderr, adding a newline */
+  fprintf(stderr, "%s\n", buffer);
+#endif
+}
+
+
+/*
+ * Decide whether to emit a trace or warning message.
+ * msg_level is one of:
+ *   -1: recoverable corrupt-data warning, may want to abort.
+ *    0: important advisory messages (always display to user).
+ *    1: first level of tracing detail.
+ *    2,3,...: successively more detailed tracing messages.
+ * An application might override this method if it wanted to abort on warnings
+ * or change the policy about which messages to display.
+ */
+
+METHODDEF(void)
+emit_message (j_common_ptr cinfo, int msg_level)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+
+  if (msg_level < 0) {
+    /* It's a warning message.  Since corrupt files may generate many warnings,
+     * the policy implemented here is to show only the first warning,
+     * unless trace_level >= 3.
+     */
+    if (err->num_warnings == 0 || err->trace_level >= 3)
+      (*err->output_message) (cinfo);
+    /* Always count warnings in num_warnings. */
+    err->num_warnings++;
+  } else {
+    /* It's a trace message.  Show it if trace_level >= msg_level. */
+    if (err->trace_level >= msg_level)
+      (*err->output_message) (cinfo);
+  }
+}
+
+
+/*
+ * Format a message string for the most recent JPEG error or message.
+ * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
+ * characters.  Note that no '\n' character is added to the string.
+ * Few applications should need to override this method.
+ */
+
+METHODDEF(void)
+format_message (j_common_ptr cinfo, char * buffer)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+  int msg_code = err->msg_code;
+  const char * msgtext = NULL;
+  const char * msgptr;
+  char ch;
+  boolean isstring;
+
+  /* Look up message string in proper table */
+  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
+    msgtext = err->jpeg_message_table[msg_code];
+  } else if (err->addon_message_table != NULL &&
+	     msg_code >= err->first_addon_message &&
+	     msg_code <= err->last_addon_message) {
+    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
+  }
+
+  /* Defend against bogus message number */
+  if (msgtext == NULL) {
+    err->msg_parm.i[0] = msg_code;
+    msgtext = err->jpeg_message_table[0];
+  }
+
+  /* Check for string parameter, as indicated by %s in the message text */
+  isstring = FALSE;
+  msgptr = msgtext;
+  while ((ch = *msgptr++) != '\0') {
+    if (ch == '%') {
+      if (*msgptr == 's') isstring = TRUE;
+      break;
+    }
+  }
+
+  /* Format the message into the passed buffer */
+  if (isstring)
+    sprintf(buffer, msgtext, err->msg_parm.s);
+  else
+    sprintf(buffer, msgtext,
+	    err->msg_parm.i[0], err->msg_parm.i[1],
+	    err->msg_parm.i[2], err->msg_parm.i[3],
+	    err->msg_parm.i[4], err->msg_parm.i[5],
+	    err->msg_parm.i[6], err->msg_parm.i[7]);
+}
+
+
+/*
+ * Reset error state variables at start of a new image.
+ * This is called during compression startup to reset trace/error
+ * processing to default state, without losing any application-specific
+ * method pointers.  An application might possibly want to override
+ * this method if it has additional error processing state.
+ */
+
+METHODDEF(void)
+reset_error_mgr (j_common_ptr cinfo)
+{
+  cinfo->err->num_warnings = 0;
+  /* trace_level is not reset since it is an application-supplied parameter */
+  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
+}
+
+
+/*
+ * Fill in the standard error-handling methods in a jpeg_error_mgr object.
+ * Typical call is:
+ *	struct jpeg_compress_struct cinfo;
+ *	struct jpeg_error_mgr err;
+ *
+ *	cinfo.err = jpeg_std_error(&err);
+ * after which the application may override some of the methods.
+ */
+
+GLOBAL(struct jpeg_error_mgr *)
+jpeg_std_error (struct jpeg_error_mgr * err)
+{
+  err->error_exit = error_exit;
+  err->emit_message = emit_message;
+  err->output_message = output_message;
+  err->format_message = format_message;
+  err->reset_error_mgr = reset_error_mgr;
+
+  err->trace_level = 0;		/* default = no tracing */
+  err->num_warnings = 0;	/* no warnings emitted yet */
+  err->msg_code = 0;		/* may be useful as a flag for "no error" */
+
+  /* Initialize message table pointers */
+  err->jpeg_message_table = jpeg_std_message_table;
+  err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
+
+  err->addon_message_table = NULL;
+  err->first_addon_message = 0;	/* for safety */
+  err->last_addon_message = 0;
+
+  return err;
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jerror.h b/plugins/AdvaImg/src/LibJPEG/jerror.h
index 1cfb2b19d8..b32da9a5ad 100644
--- a/plugins/AdvaImg/src/LibJPEG/jerror.h
+++ b/plugins/AdvaImg/src/LibJPEG/jerror.h
@@ -1,304 +1,304 @@
-/*
- * jerror.h
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 1997-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file defines the error and message codes for the JPEG library.
- * Edit this file to add new codes, or to translate the message strings to
- * some other language.
- * A set of error-reporting macros are defined too.  Some applications using
- * the JPEG library may wish to include this file to get the error codes
- * and/or the macros.
- */
-
-/*
- * To define the enum list of message codes, include this file without
- * defining macro JMESSAGE.  To create a message string table, include it
- * again with a suitable JMESSAGE definition (see jerror.c for an example).
- */
-#ifndef JMESSAGE
-#ifndef JERROR_H
-/* First time through, define the enum list */
-#define JMAKE_ENUM_LIST
-#else
-/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
-#define JMESSAGE(code,string)
-#endif /* JERROR_H */
-#endif /* JMESSAGE */
-
-#ifdef JMAKE_ENUM_LIST
-
-typedef enum {
-
-#define JMESSAGE(code,string)	code ,
-
-#endif /* JMAKE_ENUM_LIST */
-
-JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
-
-/* For maintenance convenience, list is alphabetical by message code name */
-JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
-JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
-JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
-JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
-JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
-JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
-JMESSAGE(JERR_BAD_DCTSIZE, "DCT scaled block size %dx%d not supported")
-JMESSAGE(JERR_BAD_DROP_SAMPLING,
-	 "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
-JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
-JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
-JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
-JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
-JMESSAGE(JERR_BAD_LIB_VERSION,
-	 "Wrong JPEG library version: library is %d, caller expects %d")
-JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
-JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
-JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
-JMESSAGE(JERR_BAD_PROGRESSION,
-	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
-JMESSAGE(JERR_BAD_PROG_SCRIPT,
-	 "Invalid progressive parameters at scan script entry %d")
-JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
-JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
-JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
-JMESSAGE(JERR_BAD_STRUCT_SIZE,
-	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
-JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
-JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
-JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
-JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
-JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
-JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
-JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
-JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
-JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
-JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
-JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
-JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
-JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
-JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
-JMESSAGE(JERR_FILE_READ, "Input file read error")
-JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
-JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
-JMESSAGE(JERR_HUFF_CLEN_OVERFLOW, "Huffman code size table overflow")
-JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
-JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
-JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
-JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
-JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
-	 "Cannot transcode due to multiple use of quantization table %d")
-JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
-JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
-JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
-JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
-JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
-JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
-JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
-JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
-JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
-JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
-JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
-JMESSAGE(JERR_QUANT_COMPONENTS,
-	 "Cannot quantize more than %d color components")
-JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
-JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
-JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
-JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
-JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
-JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
-JMESSAGE(JERR_SOS_NO_SOF, "Invalid JPEG file structure: SOS before SOF")
-JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
-JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
-JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
-JMESSAGE(JERR_TFILE_WRITE,
-	 "Write failed on temporary file --- out of disk space?")
-JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
-JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
-JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
-JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
-JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
-JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
-JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
-JMESSAGE(JMSG_VERSION, JVERSION)
-JMESSAGE(JTRC_16BIT_TABLES,
-	 "Caution: quantization tables are too coarse for baseline JPEG")
-JMESSAGE(JTRC_ADOBE,
-	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
-JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
-JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
-JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
-JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
-JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
-JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
-JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
-JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
-JMESSAGE(JTRC_EOI, "End Of Image")
-JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
-JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
-JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
-	 "Warning: thumbnail image size does not match data length %u")
-JMESSAGE(JTRC_JFIF_EXTENSION,
-	 "JFIF extension marker: type 0x%02x, length %u")
-JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
-JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
-JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
-JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
-JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
-JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
-JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
-JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
-JMESSAGE(JTRC_RST, "RST%d")
-JMESSAGE(JTRC_SMOOTH_NOTIMPL,
-	 "Smoothing not supported with nonstandard sampling ratios")
-JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
-JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
-JMESSAGE(JTRC_SOI, "Start of Image")
-JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
-JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
-JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
-JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
-JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
-JMESSAGE(JTRC_THUMB_JPEG,
-	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
-JMESSAGE(JTRC_THUMB_PALETTE,
-	 "JFIF extension marker: palette thumbnail image, length %u")
-JMESSAGE(JTRC_THUMB_RGB,
-	 "JFIF extension marker: RGB thumbnail image, length %u")
-JMESSAGE(JTRC_UNKNOWN_IDS,
-	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
-JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
-JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
-JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
-JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
-JMESSAGE(JWRN_BOGUS_PROGRESSION,
-	 "Inconsistent progression sequence for component %d coefficient %d")
-JMESSAGE(JWRN_EXTRANEOUS_DATA,
-	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
-JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
-JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
-JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
-JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
-JMESSAGE(JWRN_MUST_RESYNC,
-	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
-JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
-JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
-
-#ifdef JMAKE_ENUM_LIST
-
-  JMSG_LASTMSGCODE
-} J_MESSAGE_CODE;
-
-#undef JMAKE_ENUM_LIST
-#endif /* JMAKE_ENUM_LIST */
-
-/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
-#undef JMESSAGE
-
-
-#ifndef JERROR_H
-#define JERROR_H
-
-/* Macros to simplify using the error and trace message stuff */
-/* The first parameter is either type of cinfo pointer */
-
-/* Fatal errors (print message and exit) */
-#define ERREXIT(cinfo,code)  \
-  ((cinfo)->err->msg_code = (code), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT1(cinfo,code,p1)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT2(cinfo,code,p1,p2)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT3(cinfo,code,p1,p2,p3)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (cinfo)->err->msg_parm.i[2] = (p3), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT4(cinfo,code,p1,p2,p3,p4)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (cinfo)->err->msg_parm.i[2] = (p3), \
-   (cinfo)->err->msg_parm.i[3] = (p4), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT6(cinfo,code,p1,p2,p3,p4,p5,p6)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (cinfo)->err->msg_parm.i[2] = (p3), \
-   (cinfo)->err->msg_parm.i[3] = (p4), \
-   (cinfo)->err->msg_parm.i[4] = (p5), \
-   (cinfo)->err->msg_parm.i[5] = (p6), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXITS(cinfo,code,str)  \
-  ((cinfo)->err->msg_code = (code), \
-   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-
-#define MAKESTMT(stuff)		do { stuff } while (0)
-
-/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
-#define WARNMS(cinfo,code)  \
-  ((cinfo)->err->msg_code = (code), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
-#define WARNMS1(cinfo,code,p1)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
-#define WARNMS2(cinfo,code,p1,p2)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
-
-/* Informational/debugging messages */
-#define TRACEMS(cinfo,lvl,code)  \
-  ((cinfo)->err->msg_code = (code), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-#define TRACEMS1(cinfo,lvl,code,p1)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-#define TRACEMS2(cinfo,lvl,code,p1,p2)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-#define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMSS(cinfo,lvl,code,str)  \
-  ((cinfo)->err->msg_code = (code), \
-   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-
-#endif /* JERROR_H */
+/*
+ * jerror.h
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 1997-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the error and message codes for the JPEG library.
+ * Edit this file to add new codes, or to translate the message strings to
+ * some other language.
+ * A set of error-reporting macros are defined too.  Some applications using
+ * the JPEG library may wish to include this file to get the error codes
+ * and/or the macros.
+ */
+
+/*
+ * To define the enum list of message codes, include this file without
+ * defining macro JMESSAGE.  To create a message string table, include it
+ * again with a suitable JMESSAGE definition (see jerror.c for an example).
+ */
+#ifndef JMESSAGE
+#ifndef JERROR_H
+/* First time through, define the enum list */
+#define JMAKE_ENUM_LIST
+#else
+/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
+#define JMESSAGE(code,string)
+#endif /* JERROR_H */
+#endif /* JMESSAGE */
+
+#ifdef JMAKE_ENUM_LIST
+
+typedef enum {
+
+#define JMESSAGE(code,string)	code ,
+
+#endif /* JMAKE_ENUM_LIST */
+
+JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
+
+/* For maintenance convenience, list is alphabetical by message code name */
+JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
+JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
+JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
+JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
+JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
+JMESSAGE(JERR_BAD_DCTSIZE, "DCT scaled block size %dx%d not supported")
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+	 "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
+JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
+JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
+JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
+JMESSAGE(JERR_BAD_LIB_VERSION,
+	 "Wrong JPEG library version: library is %d, caller expects %d")
+JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
+JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
+JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
+JMESSAGE(JERR_BAD_PROGRESSION,
+	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
+JMESSAGE(JERR_BAD_PROG_SCRIPT,
+	 "Invalid progressive parameters at scan script entry %d")
+JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
+JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
+JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
+JMESSAGE(JERR_BAD_STRUCT_SIZE,
+	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
+JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
+JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
+JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
+JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
+JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
+JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
+JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
+JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
+JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
+JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
+JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
+JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
+JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
+JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
+JMESSAGE(JERR_FILE_READ, "Input file read error")
+JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
+JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
+JMESSAGE(JERR_HUFF_CLEN_OVERFLOW, "Huffman code size table overflow")
+JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
+JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
+JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
+JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
+JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
+	 "Cannot transcode due to multiple use of quantization table %d")
+JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
+JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
+JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
+JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
+JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
+JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
+JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
+JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
+JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
+JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
+JMESSAGE(JERR_QUANT_COMPONENTS,
+	 "Cannot quantize more than %d color components")
+JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
+JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
+JMESSAGE(JERR_SOF_BEFORE, "Invalid JPEG file structure: %s before SOF")
+JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
+JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
+JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
+JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
+JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
+JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
+JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
+JMESSAGE(JERR_TFILE_WRITE,
+	 "Write failed on temporary file --- out of disk space?")
+JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
+JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
+JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
+JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
+JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
+JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
+JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
+JMESSAGE(JMSG_VERSION, JVERSION)
+JMESSAGE(JTRC_16BIT_TABLES,
+	 "Caution: quantization tables are too coarse for baseline JPEG")
+JMESSAGE(JTRC_ADOBE,
+	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
+JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
+JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
+JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
+JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
+JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
+JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
+JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
+JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
+JMESSAGE(JTRC_EOI, "End Of Image")
+JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
+JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
+JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
+	 "Warning: thumbnail image size does not match data length %u")
+JMESSAGE(JTRC_JFIF_EXTENSION,
+	 "JFIF extension marker: type 0x%02x, length %u")
+JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
+JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
+JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
+JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
+JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
+JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
+JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
+JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
+JMESSAGE(JTRC_RST, "RST%d")
+JMESSAGE(JTRC_SMOOTH_NOTIMPL,
+	 "Smoothing not supported with nonstandard sampling ratios")
+JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
+JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
+JMESSAGE(JTRC_SOI, "Start of Image")
+JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
+JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
+JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
+JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
+JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
+JMESSAGE(JTRC_THUMB_JPEG,
+	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_PALETTE,
+	 "JFIF extension marker: palette thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_RGB,
+	 "JFIF extension marker: RGB thumbnail image, length %u")
+JMESSAGE(JTRC_UNKNOWN_IDS,
+	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
+JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
+JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
+JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
+JMESSAGE(JWRN_BOGUS_PROGRESSION,
+	 "Inconsistent progression sequence for component %d coefficient %d")
+JMESSAGE(JWRN_EXTRANEOUS_DATA,
+	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
+JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
+JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
+JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
+JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
+JMESSAGE(JWRN_MUST_RESYNC,
+	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
+JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
+JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
+
+#ifdef JMAKE_ENUM_LIST
+
+  JMSG_LASTMSGCODE
+} J_MESSAGE_CODE;
+
+#undef JMAKE_ENUM_LIST
+#endif /* JMAKE_ENUM_LIST */
+
+/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
+#undef JMESSAGE
+
+
+#ifndef JERROR_H
+#define JERROR_H
+
+/* Macros to simplify using the error and trace message stuff */
+/* The first parameter is either type of cinfo pointer */
+
+/* Fatal errors (print message and exit) */
+#define ERREXIT(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT3(cinfo,code,p1,p2,p3)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT4(cinfo,code,p1,p2,p3,p4)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT6(cinfo,code,p1,p2,p3,p4,p5,p6)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (cinfo)->err->msg_parm.i[4] = (p5), \
+   (cinfo)->err->msg_parm.i[5] = (p6), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXITS(cinfo,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+
+#define MAKESTMT(stuff)		do { stuff } while (0)
+
+/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
+#define WARNMS(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+
+/* Informational/debugging messages */
+#define TRACEMS(cinfo,lvl,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS1(cinfo,lvl,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS2(cinfo,lvl,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMSS(cinfo,lvl,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+
+#endif /* JERROR_H */
diff --git a/plugins/AdvaImg/src/LibJPEG/jfdctflt.c b/plugins/AdvaImg/src/LibJPEG/jfdctflt.c
index 74d0d862dc..3c1b174801 100644
--- a/plugins/AdvaImg/src/LibJPEG/jfdctflt.c
+++ b/plugins/AdvaImg/src/LibJPEG/jfdctflt.c
@@ -1,174 +1,174 @@
-/*
- * jfdctflt.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a floating-point implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * This implementation should be more accurate than either of the integer
- * DCT implementations.  However, it may not give the same results on all
- * machines because of differences in roundoff behavior.  Speed will depend
- * on the hardware's floating point capacity.
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with a fixed-point
- * implementation, accuracy is lost due to imprecise representation of the
- * scaled quantization values.  However, that problem does not arise if
- * we use floating point arithmetic.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_FLOAT_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
-#endif
-
-
-/*
- * Perform the forward DCT on one block of samples.
- */
-
-GLOBAL(void)
-jpeg_fdct_float (FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
-  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
-  FAST_FLOAT *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-
-  /* Pass 1: process rows. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Load data into workspace */
-    tmp0 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]));
-    tmp7 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]));
-    tmp1 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]));
-    tmp6 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]));
-    tmp2 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]));
-    tmp5 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]));
-    tmp3 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]));
-    tmp4 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]));
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
-    dataptr[4] = tmp10 - tmp11;
-
-    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
-    dataptr[6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
-    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
-    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
-    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[5] = z13 + z2;	/* phase 6 */
-    dataptr[3] = z13 - z2;
-    dataptr[1] = z11 + z4;
-    dataptr[7] = z11 - z4;
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns. */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
-    dataptr[DCTSIZE*4] = tmp10 - tmp11;
-
-    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
-    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
-    dataptr[DCTSIZE*6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
-    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
-    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
-    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
-    dataptr[DCTSIZE*3] = z13 - z2;
-    dataptr[DCTSIZE*1] = z11 + z4;
-    dataptr[DCTSIZE*7] = z11 - z4;
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-#endif /* DCT_FLOAT_SUPPORTED */
+/*
+ * jfdctflt.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * This implementation should be more accurate than either of the integer
+ * DCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_float (FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
+  FAST_FLOAT *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Load data into workspace */
+    tmp0 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]));
+    tmp7 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]));
+    tmp1 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]));
+    tmp6 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]));
+    tmp2 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]));
+    tmp5 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]));
+    tmp3 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]));
+    tmp4 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]));
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jfdctfst.c b/plugins/AdvaImg/src/LibJPEG/jfdctfst.c
index 8cad5f2293..82b92317b0 100644
--- a/plugins/AdvaImg/src/LibJPEG/jfdctfst.c
+++ b/plugins/AdvaImg/src/LibJPEG/jfdctfst.c
@@ -1,230 +1,230 @@
-/*
- * jfdctfst.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a fast, not so accurate integer implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with fixed-point math,
- * accuracy is lost due to imprecise representation of the scaled
- * quantization values.  The smaller the quantization table entry, the less
- * precise the scaled value, so this implementation does worse with high-
- * quality-setting files than with low-quality ones.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_IFAST_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
-#endif
-
-
-/* Scaling decisions are generally the same as in the LL&M algorithm;
- * see jfdctint.c for more details.  However, we choose to descale
- * (right shift) multiplication products as soon as they are formed,
- * rather than carrying additional fractional bits into subsequent additions.
- * This compromises accuracy slightly, but it lets us save a few shifts.
- * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
- * everywhere except in the multiplications proper; this saves a good deal
- * of work on 16-bit-int machines.
- *
- * Again to save a few shifts, the intermediate results between pass 1 and
- * pass 2 are not upscaled, but are represented only to integral precision.
- *
- * A final compromise is to represent the multiplicative constants to only
- * 8 fractional bits, rather than 13.  This saves some shifting work on some
- * machines, and may also reduce the cost of multiplication (since there
- * are fewer one-bits in the constants).
- */
-
-#define CONST_BITS  8
-
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 8
-#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
-#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
-#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
-#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
-#else
-#define FIX_0_382683433  FIX(0.382683433)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_707106781  FIX(0.707106781)
-#define FIX_1_306562965  FIX(1.306562965)
-#endif
-
-
-/* We can gain a little more speed, with a further compromise in accuracy,
- * by omitting the addition in a descaling shift.  This yields an incorrectly
- * rounded result half the time...
- */
-
-#ifndef USE_ACCURATE_ROUNDING
-#undef DESCALE
-#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
-#endif
-
-
-/* Multiply a DCTELEM variable by an INT32 constant, and immediately
- * descale to yield a DCTELEM result.
- */
-
-#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
-
-
-/*
- * Perform the forward DCT on one block of samples.
- */
-
-GLOBAL(void)
-jpeg_fdct_ifast (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  DCTELEM tmp10, tmp11, tmp12, tmp13;
-  DCTELEM z1, z2, z3, z4, z5, z11, z13;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Load data into workspace */
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp7 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp6 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp5 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-    tmp4 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
-    dataptr[4] = tmp10 - tmp11;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
-    dataptr[6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
-    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
-    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
-    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[5] = z13 + z2;	/* phase 6 */
-    dataptr[3] = z13 - z2;
-    dataptr[1] = z11 + z4;
-    dataptr[7] = z11 - z4;
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns. */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
-    dataptr[DCTSIZE*4] = tmp10 - tmp11;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
-    dataptr[DCTSIZE*6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
-    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
-    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
-    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
-    dataptr[DCTSIZE*3] = z13 - z2;
-    dataptr[DCTSIZE*1] = z11 + z4;
-    dataptr[DCTSIZE*7] = z11 - z4;
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-#endif /* DCT_IFAST_SUPPORTED */
+/*
+ * jfdctfst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jfdctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * Again to save a few shifts, the intermediate results between pass 1 and
+ * pass 2 are not upscaled, but are represented only to integral precision.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#define CONST_BITS  8
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
+#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
+#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
+#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
+#else
+#define FIX_0_382683433  FIX(0.382683433)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_707106781  FIX(0.707106781)
+#define FIX_1_306562965  FIX(1.306562965)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_ifast (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z1, z2, z3, z4, z5, z11, z13;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Load data into workspace */
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+    tmp7 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+    tmp6 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+    tmp5 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+    tmp4 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jfdctint.c b/plugins/AdvaImg/src/LibJPEG/jfdctint.c
index 1dde58c499..529eaf8670 100644
--- a/plugins/AdvaImg/src/LibJPEG/jfdctint.c
+++ b/plugins/AdvaImg/src/LibJPEG/jfdctint.c
@@ -1,4348 +1,4348 @@
-/*
- * jfdctint.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modification developed 2003-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a slow-but-accurate integer implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on an algorithm described in
- *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
- *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
- *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
- * The primary algorithm described there uses 11 multiplies and 29 adds.
- * We use their alternate method with 12 multiplies and 32 adds.
- * The advantage of this method is that no data path contains more than one
- * multiplication; this allows a very simple and accurate implementation in
- * scaled fixed-point arithmetic, with a minimal number of shifts.
- *
- * We also provide FDCT routines with various input sample block sizes for
- * direct resolution reduction or enlargement and for direct resolving the
- * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
- * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block.
- *
- * For N<8 we fill the remaining block coefficients with zero.
- * For N>8 we apply a partial N-point FDCT on the input samples, computing
- * just the lower 8 frequency coefficients and discarding the rest.
- *
- * We must scale the output coefficients of the N-point FDCT appropriately
- * to the standard 8-point FDCT level by 8/N per 1-D pass.  This scaling
- * is folded into the constant multipliers (pass 2) and/or final/initial
- * shifting.
- *
- * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
- * since there would be too many additional constants to pre-calculate.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_ISLOW_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/*
- * The poop on this scaling stuff is as follows:
- *
- * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
- * larger than the true DCT outputs.  The final outputs are therefore
- * a factor of N larger than desired; since N=8 this can be cured by
- * a simple right shift at the end of the algorithm.  The advantage of
- * this arrangement is that we save two multiplications per 1-D DCT,
- * because the y0 and y4 outputs need not be divided by sqrt(N).
- * In the IJG code, this factor of 8 is removed by the quantization step
- * (in jcdctmgr.c), NOT in this module.
- *
- * We have to do addition and subtraction of the integer inputs, which
- * is no problem, and multiplication by fractional constants, which is
- * a problem to do in integer arithmetic.  We multiply all the constants
- * by CONST_SCALE and convert them to integer constants (thus retaining
- * CONST_BITS bits of precision in the constants).  After doing a
- * multiplication we have to divide the product by CONST_SCALE, with proper
- * rounding, to produce the correct output.  This division can be done
- * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
- * as long as possible so that partial sums can be added together with
- * full fractional precision.
- *
- * The outputs of the first pass are scaled up by PASS1_BITS bits so that
- * they are represented to better-than-integral precision.  These outputs
- * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
- * with the recommended scaling.  (For 12-bit sample data, the intermediate
- * array is INT32 anyway.)
- *
- * To avoid overflow of the 32-bit intermediate results in pass 2, we must
- * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
- * shows that the values given below are the most effective.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  13
-#define PASS1_BITS  2
-#else
-#define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
-#else
-#define FIX_0_298631336  FIX(0.298631336)
-#define FIX_0_390180644  FIX(0.390180644)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_765366865  FIX(0.765366865)
-#define FIX_0_899976223  FIX(0.899976223)
-#define FIX_1_175875602  FIX(1.175875602)
-#define FIX_1_501321110  FIX(1.501321110)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_1_961570560  FIX(1.961570560)
-#define FIX_2_053119869  FIX(2.053119869)
-#define FIX_2_562915447  FIX(2.562915447)
-#define FIX_3_072711026  FIX(3.072711026)
-#endif
-
-
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
- * For 8-bit samples with the recommended scaling, all the variable
- * and constant values involved are no more than 16 bits wide, so a
- * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
- * For 12-bit samples, a full 32-bit multiplication will be needed.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
-#else
-#define MULTIPLY(var,const)  ((var) * (const))
-#endif
-
-
-/*
- * Perform the forward DCT on one block of samples.
- */
-
-GLOBAL(void)
-jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
-     */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-				       CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-				       CONST_BITS-PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * cK represents sqrt(2) * cos(K*pi/16).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
-    tmp12 += z1;
-    tmp13 += z1;
-
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
-     */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-
-    /* Add fudge factor here for final descale. */
-    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * cK represents sqrt(2) * cos(K*pi/16).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
-    tmp12 += z1;
-    tmp13 += z1;
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-#ifdef DCT_SCALING_SUPPORTED
-
-
-/*
- * Perform the forward DCT on a 7x7 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* cK represents sqrt(2) * cos(K*pi/14). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 7; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
-    tmp3 = GETJSAMPLE(elemptr[3]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
-
-    z1 = tmp0 + tmp2;
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
-    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
-    dataptr[4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/7)**2 = 64/49, which we fold
-   * into the constant multipliers:
-   * cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 7; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
-    tmp3 = dataptr[DCTSIZE*3];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
-    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
-
-    z1 = tmp0 + tmp2;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
-	      CONST_BITS+PASS1_BITS);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 6x6 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11, tmp12;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* cK represents sqrt(2) * cos(K*pi/12). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
-    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
-		    CONST_BITS-PASS1_BITS);
-
-    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
-    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
-    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)**2 = 16/9, which we fold
-   * into the constant multipliers:
-   * cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
-    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 5x5 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/10). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 5; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
-    tmp2 = GETJSAMPLE(elemptr[2]);
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
-    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
-    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
-	      CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/5)**2 = 64/25, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 5; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
-    tmp2 = dataptr[DCTSIZE*2];
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 4x4 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */
-  /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
-    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
-
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS-2);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS-2);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 3x3 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2**2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/6). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
-    tmp1 = GETJSAMPLE(elemptr[1]);
-
-    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
-	      CONST_BITS-PASS1_BITS-2);
-
-    /* Odd part */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
-	      CONST_BITS-PASS1_BITS-2);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/3)**2 = 64/9, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
-    tmp1 = dataptr[DCTSIZE*1];
-
-    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 2x2 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  JSAMPROW elemptr;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-
-  /* Row 0 */
-  elemptr = sample_data[0] + start_col;
-
-  tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
-  tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
-
-  /* Row 1 */
-  elemptr = sample_data[1] + start_col;
-
-  tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
-  tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/2)**2 = 2**4.
-   */
-
-  /* Column 0 */
-  /* Apply unsigned->signed conversion */
-  data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4);
-  data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4);
-
-  /* Column 1 */
-  data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4);
-  data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4);
-}
-
-
-/*
- * Perform the forward DCT on a 1x1 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* We leave the result scaled up by an overall factor of 8. */
-  /* We must also scale the output by (8/1)**2 = 2**6. */
-  /* Apply unsigned->signed conversion */
-  data[0] = (DCTELEM)
-    ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6);
-}
-
-
-/*
- * Perform the forward DCT on a 9x9 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2;
-  DCTELEM workspace[8];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* we scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/18). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
-    tmp4 = GETJSAMPLE(elemptr[4]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
-
-    z1 = tmp0 + tmp2 + tmp3;
-    z2 = tmp1 + tmp4;
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
-    dataptr[6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)),  /* c6 */
-	      CONST_BITS-1);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049));        /* c2 */
-    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441))    /* c4 */
-	      + z1 + z2, CONST_BITS-1);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608))    /* c8 */
-	      + z1 - z2, CONST_BITS-1);
-
-    /* Odd part */
-
-    dataptr[3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
-	      CONST_BITS-1);
-
-    tmp11 = MULTIPLY(tmp11, FIX(1.224744871));        /* c3 */
-    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
-    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1);
-
-    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
-
-    dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1);
-    dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 9)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/9)**2 = 64/81, which we partially
-   * fold into the constant multipliers and final/initial shifting:
-   * cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
-    tmp4 = dataptr[DCTSIZE*4];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
-    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
-    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
-
-    z1 = tmp0 + tmp2 + tmp3;
-    z2 = tmp1 + tmp4;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)),       /* 128/81 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)),  /* c6 */
-	      CONST_BITS+2);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287));        /* c2 */
-    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190))    /* c4 */
-	      + z1 + z2, CONST_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096))    /* c8 */
-	      + z1 - z2, CONST_BITS+2);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
-	      CONST_BITS+2);
-
-    tmp11 = MULTIPLY(tmp11, FIX(1.935399303));        /* c3 */
-    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
-    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2);
-
-    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
-
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 10x10 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  DCTELEM workspace[8*2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* we scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/20). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
-    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
-    tmp12 += tmp12;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
-	      CONST_BITS-1);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
-	      CONST_BITS-1);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
-	      CONST_BITS-1);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
-    tmp2 <<= CONST_BITS;
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
-	      CONST_BITS-1);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
-	    (tmp11 << (CONST_BITS - 1)) - tmp2;
-    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1);
-    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 10)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/10)**2 = 16/25, which we partially
-   * fold into the constant multipliers and final/initial shifting:
-   * cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
-    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
-	      CONST_BITS+2);
-    tmp12 += tmp12;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
-	      CONST_BITS+2);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
-	      CONST_BITS+2);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
-	      CONST_BITS+2);
-    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
-	      CONST_BITS+2);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
-	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on an 11x11 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 z1, z2, z3;
-  DCTELEM workspace[8*3];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* we scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/22). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
-    tmp5 = GETJSAMPLE(elemptr[5]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
-    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
-    tmp5 += tmp5;
-    tmp0 -= tmp5;
-    tmp1 -= tmp5;
-    tmp2 -= tmp5;
-    tmp3 -= tmp5;
-    tmp4 -= tmp5;
-    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) +       /* c2 */
-	 MULTIPLY(tmp2 + tmp4, FIX(0.201263574));        /* c10 */
-    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931));        /* c6 */
-    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156));        /* c4 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
-	      - MULTIPLY(tmp4, FIX(1.390975730)),        /* c4+c10 */
-	      CONST_BITS-1);
-    dataptr[4] = (DCTELEM)
-      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
-	      - MULTIPLY(tmp2, FIX(1.356927976))         /* c2 */
-	      + MULTIPLY(tmp4, FIX(0.587485545)),        /* c8 */
-	      CONST_BITS-1);
-    dataptr[6] = (DCTELEM)
-      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
-	      - MULTIPLY(tmp2, FIX(0.788749120)),        /* c8+c10 */
-	      CONST_BITS-1);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905));    /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298));    /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576));    /* c7 */
-    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
-	   + MULTIPLY(tmp14, FIX(0.398430003));          /* c9 */
-    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576));  /* -c7 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907));  /* -c1 */
-    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
-	    - MULTIPLY(tmp14, FIX(1.068791298));         /* c5 */
-    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003));   /* c9 */
-    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
-	    + MULTIPLY(tmp14, FIX(1.399818907));         /* c1 */
-    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
-	    - MULTIPLY(tmp14, FIX(1.286413905));         /* c3 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 11)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/11)**2 = 64/121, which we partially
-   * fold into the constant multipliers and final/initial shifting:
-   * cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
-    tmp5 = dataptr[DCTSIZE*5];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
-    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
-    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
-    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
-    tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
-		       FIX(1.057851240)),                /* 128/121 */
-	      CONST_BITS+2);
-    tmp5 += tmp5;
-    tmp0 -= tmp5;
-    tmp1 -= tmp5;
-    tmp2 -= tmp5;
-    tmp3 -= tmp5;
-    tmp4 -= tmp5;
-    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) +       /* c2 */
-	 MULTIPLY(tmp2 + tmp4, FIX(0.212906922));        /* c10 */
-    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713));        /* c6 */
-    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479));        /* c4 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
-	      - MULTIPLY(tmp4, FIX(1.471445400)),        /* c4+c10 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
-	      - MULTIPLY(tmp2, FIX(1.435427942))         /* c2 */
-	      + MULTIPLY(tmp4, FIX(0.621472312)),        /* c8 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
-	      - MULTIPLY(tmp2, FIX(0.834379234)),        /* c8+c10 */
-	      CONST_BITS+2);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544));    /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199));    /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568));    /* c7 */
-    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
-	   + MULTIPLY(tmp14, FIX(0.421479672));          /* c9 */
-    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568));  /* -c7 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167));  /* -c1 */
-    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
-	    - MULTIPLY(tmp14, FIX(1.130622199));         /* c5 */
-    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672));   /* c9 */
-    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
-	    + MULTIPLY(tmp14, FIX(1.480800167));         /* c1 */
-    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
-	    - MULTIPLY(tmp14, FIX(1.360834544));         /* c3 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 12x12 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  DCTELEM workspace[8*4];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/24). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
-    dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
-	      CONST_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
-	      CONST_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 12)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/12)**2 = 4/9, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
-	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
-	      CONST_BITS+1);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 13x13 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 z1, z2;
-  DCTELEM workspace[8*5];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/26). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
-    tmp6 = GETJSAMPLE(elemptr[6]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
-    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
-    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
-    tmp6 += tmp6;
-    tmp0 -= tmp6;
-    tmp1 -= tmp6;
-    tmp2 -= tmp6;
-    tmp3 -= tmp6;
-    tmp4 -= tmp6;
-    tmp5 -= tmp6;
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) +   /* c2 */
-	      MULTIPLY(tmp1, FIX(1.058554052)) +   /* c6 */
-	      MULTIPLY(tmp2, FIX(0.501487041)) -   /* c10 */
-	      MULTIPLY(tmp3, FIX(0.170464608)) -   /* c12 */
-	      MULTIPLY(tmp4, FIX(0.803364869)) -   /* c8 */
-	      MULTIPLY(tmp5, FIX(1.252223920)),    /* c4 */
-	      CONST_BITS);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
-	 MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
-	 MULTIPLY(tmp1 - tmp5, FIX(0.316450131));  /* (c8-c12)/2 */
-    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
-	 MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
-	 MULTIPLY(tmp1 + tmp5, FIX(0.486914739));  /* (c8+c12)/2 */
-
-    dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651));   /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945));   /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) +  /* c7 */
-	   MULTIPLY(tmp14 + tmp15, FIX(0.338443458));   /* c11 */
-    tmp0 = tmp1 + tmp2 + tmp3 -
-	   MULTIPLY(tmp10, FIX(2.020082300)) +          /* c3+c5+c7-c1 */
-	   MULTIPLY(tmp14, FIX(0.318774355));           /* c9-c11 */
-    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) -  /* c7 */
-	   MULTIPLY(tmp11 + tmp12, FIX(0.338443458));   /* c11 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
-    tmp1 += tmp4 + tmp5 +
-	    MULTIPLY(tmp11, FIX(0.837223564)) -         /* c5+c9+c11-c3 */
-	    MULTIPLY(tmp14, FIX(2.341699410));          /* c1+c7 */
-    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
-    tmp2 += tmp4 + tmp6 -
-	    MULTIPLY(tmp12, FIX(1.572116027)) +         /* c1+c5-c9-c11 */
-	    MULTIPLY(tmp15, FIX(2.260109708));          /* c3+c7 */
-    tmp3 += tmp5 + tmp6 +
-	    MULTIPLY(tmp13, FIX(2.205608352)) -         /* c3+c5+c9-c7 */
-	    MULTIPLY(tmp15, FIX(1.742345811));          /* c1+c11 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 13)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/13)**2 = 64/169, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
-    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
-    tmp6 = dataptr[DCTSIZE*6];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
-    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
-    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
-    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
-    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
-    tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
-		       FIX(0.757396450)),          /* 128/169 */
-	      CONST_BITS+1);
-    tmp6 += tmp6;
-    tmp0 -= tmp6;
-    tmp1 -= tmp6;
-    tmp2 -= tmp6;
-    tmp3 -= tmp6;
-    tmp4 -= tmp6;
-    tmp5 -= tmp6;
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) +   /* c2 */
-	      MULTIPLY(tmp1, FIX(0.801745081)) +   /* c6 */
-	      MULTIPLY(tmp2, FIX(0.379824504)) -   /* c10 */
-	      MULTIPLY(tmp3, FIX(0.129109289)) -   /* c12 */
-	      MULTIPLY(tmp4, FIX(0.608465700)) -   /* c8 */
-	      MULTIPLY(tmp5, FIX(0.948429952)),    /* c4 */
-	      CONST_BITS+1);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
-	 MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
-	 MULTIPLY(tmp1 - tmp5, FIX(0.239678205));  /* (c8-c12)/2 */
-    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
-	 MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
-	 MULTIPLY(tmp1 + tmp5, FIX(0.368787494));  /* (c8+c12)/2 */
-
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908));   /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751));   /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) +  /* c7 */
-	   MULTIPLY(tmp14 + tmp15, FIX(0.256335874));   /* c11 */
-    tmp0 = tmp1 + tmp2 + tmp3 -
-	   MULTIPLY(tmp10, FIX(1.530003162)) +          /* c3+c5+c7-c1 */
-	   MULTIPLY(tmp14, FIX(0.241438564));           /* c9-c11 */
-    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) -  /* c7 */
-	   MULTIPLY(tmp11 + tmp12, FIX(0.256335874));   /* c11 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
-    tmp1 += tmp4 + tmp5 +
-	    MULTIPLY(tmp11, FIX(0.634110155)) -         /* c5+c9+c11-c3 */
-	    MULTIPLY(tmp14, FIX(1.773594819));          /* c1+c7 */
-    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
-    tmp2 += tmp4 + tmp6 -
-	    MULTIPLY(tmp12, FIX(1.190715098)) +         /* c1+c5-c9-c11 */
-	    MULTIPLY(tmp15, FIX(1.711799069));          /* c3+c7 */
-    tmp3 += tmp5 + tmp6 +
-	    MULTIPLY(tmp13, FIX(1.670519935)) -         /* c3+c5+c9-c7 */
-	    MULTIPLY(tmp15, FIX(1.319646532));          /* c1+c11 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 14x14 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  DCTELEM workspace[8*6];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/28). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
-    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
-    tmp13 += tmp13;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
-	      CONST_BITS);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
-	      CONST_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
-	      CONST_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
-    tmp3 <<= CONST_BITS;
-    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
-    dataptr[5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
-	      CONST_BITS);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
-	      CONST_BITS);
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
-	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
-	      CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 14)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/14)**2 = 16/49, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
-    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+1);
-    tmp13 += tmp13;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
-	      CONST_BITS+1);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
-	      CONST_BITS+1);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+1);
-    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
-    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
-	      CONST_BITS+1);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3
-	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
-	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
-	      CONST_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 15x15 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 z1, z2, z3;
-  DCTELEM workspace[8*7];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/30). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
-    tmp7 = GETJSAMPLE(elemptr[7]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
-    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
-    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
-    tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
-
-    z1 = tmp0 + tmp4 + tmp5;
-    z2 = tmp1 + tmp3 + tmp6;
-    z3 = tmp2 + tmp7;
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
-    z3 += z3;
-    dataptr[6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
-	      MULTIPLY(z2 - z3, FIX(0.437016024)),  /* c12 */
-	      CONST_BITS);
-    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
-    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) -  /* c2+c14 */
-         MULTIPLY(tmp6 - tmp2, FIX(2.238241955));   /* c4+c8 */
-    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) -  /* c8-c14 */
-	 MULTIPLY(tmp0 - tmp2, FIX(0.091361227));   /* c2-c4 */
-    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) +  /* c2 */
-	 MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) +  /* c8 */
-	 MULTIPLY(tmp1 - tmp4, FIX(0.790569415));   /* (c6+c12)/2 */
-
-    dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
-    dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
-
-    /* Odd part */
-
-    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
-		    FIX(1.224744871));                         /* c5 */
-    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
-	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876));  /* c9 */
-    tmp12 = MULTIPLY(tmp12, FIX(1.224744871));                 /* c5 */
-    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) +         /* c1 */
-	   MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) +         /* c3 */
-	   MULTIPLY(tmp13 + tmp15, FIX(0.575212477));          /* c11 */
-    tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) -                 /* c7-c11 */
-	   MULTIPLY(tmp14, FIX(0.513743148)) +                 /* c3-c9 */
-	   MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12;   /* c1+c13 */
-    tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) -               /* -(c1-c7) */
-	   MULTIPLY(tmp11, FIX(2.176250899)) -                 /* c3+c9 */
-	   MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12;   /* c11+c13 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 15)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/15)**2 = 64/225, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
-    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
-    tmp7 = dataptr[DCTSIZE*7];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
-    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
-    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
-    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
-    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
-    tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
-    tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
-
-    z1 = tmp0 + tmp4 + tmp5;
-    z2 = tmp1 + tmp3 + tmp6;
-    z3 = tmp2 + tmp7;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
-	      CONST_BITS+2);
-    z3 += z3;
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
-	      MULTIPLY(z2 - z3, FIX(0.497227121)),  /* c12 */
-	      CONST_BITS+2);
-    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
-    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) -  /* c2+c14 */
-         MULTIPLY(tmp6 - tmp2, FIX(2.546621957));   /* c4+c8 */
-    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) -  /* c8-c14 */
-	 MULTIPLY(tmp0 - tmp2, FIX(0.103948774));   /* c2-c4 */
-    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) +  /* c2 */
-	 MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) +  /* c8 */
-	 MULTIPLY(tmp1 - tmp4, FIX(0.899492312));   /* (c6+c12)/2 */
-
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2);
-
-    /* Odd part */
-
-    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
-		    FIX(1.393487498));                         /* c5 */
-    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
-	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187));  /* c9 */
-    tmp12 = MULTIPLY(tmp12, FIX(1.393487498));                 /* c5 */
-    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) +         /* c1 */
-	   MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) +         /* c3 */
-	   MULTIPLY(tmp13 + tmp15, FIX(0.654463974));          /* c11 */
-    tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) -                 /* c7-c11 */
-	   MULTIPLY(tmp14, FIX(0.584525538)) +                 /* c3-c9 */
-	   MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12;   /* c1+c13 */
-    tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) -               /* -(c1-c7) */
-	   MULTIPLY(tmp11, FIX(2.476089912)) -                 /* c3+c9 */
-	   MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12;   /* c11+c13 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 16x16 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
-  DCTELEM workspace[DCTSIZE2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* cK represents sqrt(2) * cos(K*pi/32). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS-PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == DCTSIZE * 2)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/16)**2 = 1/2**2.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS+PASS1_BITS+2);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+10 */
-	      CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS+PASS1_BITS+2);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 16x8 sample block.
- *
- * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */
-
-  dataptr = data;
-  ctr = 0;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS-PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by 8/16 = 1/2.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
-     */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-					   CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
-
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
-    tmp12 += z1;
-    tmp13 += z1;
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
-					   CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 14x7 sample block.
- *
- * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 z1, z2, z3;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero bottom row of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 7; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
-    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp13 += tmp13;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
-	      CONST_BITS-PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS);
-    tmp3 <<= CONST_BITS;
-    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
-    dataptr[5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
-	      CONST_BITS-PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
-	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
-	      CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/14)*(8/7) = 32/49, which we
-   * partially fold into the constant multipliers and final shifting:
-   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
-    tmp3 = dataptr[DCTSIZE*3];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
-    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
-
-    z1 = tmp0 + tmp2;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
-	      CONST_BITS+PASS1_BITS+1);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 12x6 sample block.
- *
- * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero 2 bottom rows of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/12)*(8/6) = 8/9, which we
-   * partially fold into the constant multipliers and final shifting:
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
-    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 10x5 sample block.
- *
- * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero 3 bottom rows of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 5; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
-    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp12 += tmp12;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
-	      CONST_BITS-PASS1_BITS);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
-    tmp2 <<= CONST_BITS;
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
-	      CONST_BITS-PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
-	    (tmp11 << (CONST_BITS - 1)) - tmp2;
-    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/10)*(8/5) = 32/25, which we
-   * fold into the constant multipliers:
-   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
-    tmp2 = dataptr[DCTSIZE*2];
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on an 8x4 sample block.
- *
- * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero 4 bottom rows of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by 8/4 = 2, which we add here. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
-     */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
-    dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-				       CONST_BITS-PASS1_BITS-1);
-    dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-				       CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
-
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
-    tmp12 += z1;
-    tmp13 += z1;
-
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1);
-    dataptr[5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1);
-    dataptr[7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);   /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 6x3 sample block.
- *
- * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11, tmp12;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
-    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
-	      CONST_BITS-PASS1_BITS-1);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part */
-
-    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
-		    CONST_BITS-PASS1_BITS-1);
-
-    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
-    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
-    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
-    tmp1 = dataptr[DCTSIZE*1];
-
-    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 4x2 sample block.
- *
- * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */
-  /* 4-point FDCT kernel, */
-  /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 2; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3));
-    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3));
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-4);
-
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS-3);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS-3);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = dataptr[DCTSIZE*0] + (ONE << (PASS1_BITS-1));
-    tmp1 = dataptr[DCTSIZE*1];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 2x1 sample block.
- *
- * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1;
-  JSAMPROW elemptr;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  elemptr = sample_data[0] + start_col;
-
-  tmp0 = GETJSAMPLE(elemptr[0]);
-  tmp1 = GETJSAMPLE(elemptr[1]);
-
-  /* We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/2)*(8/1) = 2**5.
-   */
-
-  /* Even part */
-  /* Apply unsigned->signed conversion */
-  data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
-
-  /* Odd part */
-  data[1] = (DCTELEM) ((tmp0 - tmp1) << 5);
-}
-
-
-/*
- * Perform the forward DCT on an 8x16 sample block.
- *
- * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
-  INT32 z1;
-  DCTELEM workspace[DCTSIZE2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
-     */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-				   CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-				   CONST_BITS-PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
-
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
-    tmp12 += z1;
-    tmp13 += z1;
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == DCTSIZE * 2)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by 8/16 = 1/2.
-   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS+PASS1_BITS+1);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 7x14 sample block.
- *
- * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 z1, z2, z3;
-  DCTELEM workspace[8*6];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
-    tmp3 = GETJSAMPLE(elemptr[3]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
-
-    z1 = tmp0 + tmp2;
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
-    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
-    dataptr[4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 14)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/7)*(8/14) = 32/49, which we
-   * fold into the constant multipliers:
-   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
-    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+PASS1_BITS);
-    tmp13 += tmp13;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
-	      CONST_BITS+PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+PASS1_BITS);
-    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
-    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
-	      CONST_BITS+PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3
-	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
-	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 6x12 sample block.
- *
- * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  DCTELEM workspace[8*4];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
-    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
-		    CONST_BITS-PASS1_BITS);
-
-    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
-    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
-    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 12)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)*(8/12) = 8/9, which we
-   * fold into the constant multipliers:
-   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
-	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 5x10 sample block.
- *
- * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  DCTELEM workspace[8*2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
-    tmp2 = GETJSAMPLE(elemptr[2]);
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
-    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
-	      CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 10)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/5)*(8/10) = 32/25, which we
-   * fold into the constant multipliers:
-   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
-    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp12 += tmp12;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
-	      CONST_BITS+PASS1_BITS);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
-	      CONST_BITS+PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
-	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 4x8 sample block.
- *
- * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by 8/4 = 2, which we add here. */
-  /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
-
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
-     */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-
-    /* Add fudge factor here for final descale. */
-    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
-    tmp12 += z1;
-    tmp13 += z1;
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 3x6 sample block.
- *
- * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11, tmp12;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
-    tmp1 = GETJSAMPLE(elemptr[1]);
-
-    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
-    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 2x4 sample block.
- *
- * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]);
-    tmp1 = GETJSAMPLE(elemptr[1]);
-
-    /* Apply unsigned->signed conversion */
-    dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3);
-
-    /* Odd part */
-
-    dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * 4-point FDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 2; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1);
-    dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1);
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-1);
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 1x2 sample block.
- *
- * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
-  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
-
-  /* We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/1)*(8/2) = 2**5.
-   */
-
-  /* Even part */
-  /* Apply unsigned->signed conversion */
-  data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
-
-  /* Odd part */
-  data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5);
-}
-
-#endif /* DCT_SCALING_SUPPORTED */
-#endif /* DCT_ISLOW_SUPPORTED */
+/*
+ * jfdctint.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modification developed 2003-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide FDCT routines with various input sample block sizes for
+ * direct resolution reduction or enlargement and for direct resolving the
+ * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
+ * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block.
+ *
+ * For N<8 we fill the remaining block coefficients with zero.
+ * For N>8 we apply a partial N-point FDCT on the input samples, computing
+ * just the lower 8 frequency coefficients and discarding the rest.
+ *
+ * We must scale the output coefficients of the N-point FDCT appropriately
+ * to the standard 8-point FDCT level by 8/N per 1-D pass.  This scaling
+ * is folded into the constant multipliers (pass 2) and/or final/initial
+ * shifting.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (For 12-bit sample data, the intermediate
+ * array is INT32 anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+
+    tmp10 = tmp0 + tmp3;
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
+				       CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
+				       CONST_BITS-PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents sqrt(2) * cos(K*pi/16).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
+    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
+    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
+    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
+    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
+    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
+
+    tmp12 += z1;
+    tmp13 += z1;
+
+    dataptr[1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM)
+      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM)
+      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM)
+      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents sqrt(2) * cos(K*pi/16).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
+    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
+    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
+    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
+    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
+    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
+
+    tmp12 += z1;
+    tmp13 += z1;
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM)
+      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#ifdef DCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform the forward DCT on a 7x7 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* cK represents sqrt(2) * cos(K*pi/14). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 7; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
+    tmp3 = GETJSAMPLE(elemptr[3]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
+
+    z1 = tmp0 + tmp2;
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp3 += tmp3;
+    z1 -= tmp3;
+    z1 -= tmp3;
+    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
+    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
+    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
+    z1 -= z2;
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
+    dataptr[4] = (DCTELEM)
+      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
+    tmp1 += tmp2;
+    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
+    tmp0 += tmp3;
+    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/7)**2 = 64/49, which we fold
+   * into the constant multipliers:
+   * cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 7; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
+    tmp3 = dataptr[DCTSIZE*3];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
+    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
+
+    z1 = tmp0 + tmp2;
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
+	      CONST_BITS+PASS1_BITS);
+    tmp3 += tmp3;
+    z1 -= tmp3;
+    z1 -= tmp3;
+    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
+    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS);
+    z1 -= z2;
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
+    tmp1 += tmp2;
+    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
+    tmp0 += tmp3;
+    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 6x6 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  INT32 tmp10, tmp11, tmp12;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* cK represents sqrt(2) * cos(K*pi/12). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
+    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
+		    CONST_BITS-PASS1_BITS);
+
+    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
+    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
+    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/6)**2 = 16/9, which we fold
+   * into the constant multipliers:
+   * cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
+    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 5x5 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  INT32 tmp10, tmp11;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* We scale the results further by 2 as part of output adaption */
+  /* scaling for different DCT size. */
+  /* cK represents sqrt(2) * cos(K*pi/10). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 5; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
+    tmp2 = GETJSAMPLE(elemptr[2]);
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
+    tmp10 -= tmp2 << 2;
+    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
+    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
+    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
+
+    dataptr[1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
+	      CONST_BITS-PASS1_BITS-1);
+    dataptr[3] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
+	      CONST_BITS-PASS1_BITS-1);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/5)**2 = 64/25, which we partially
+   * fold into the constant multipliers (other part was done in pass 1):
+   * cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 5; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
+    tmp2 = dataptr[DCTSIZE*2];
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
+	      CONST_BITS+PASS1_BITS);
+    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
+    tmp10 -= tmp2 << 2;
+    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 4x4 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1;
+  INT32 tmp10, tmp11;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */
+  /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
+    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
+
+    dataptr[1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS-2);
+    dataptr[3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS-2);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 3x3 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* We scale the results further by 2**2 as part of output adaption */
+  /* scaling for different DCT size. */
+  /* cK represents sqrt(2) * cos(K*pi/6). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 3; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
+    tmp1 = GETJSAMPLE(elemptr[1]);
+
+    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
+	      CONST_BITS-PASS1_BITS-2);
+
+    /* Odd part */
+
+    dataptr[1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
+	      CONST_BITS-PASS1_BITS-2);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/3)**2 = 64/9, which we partially
+   * fold into the constant multipliers (other part was done in pass 1):
+   * cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 3; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
+    tmp1 = dataptr[DCTSIZE*1];
+
+    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 2x2 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  JSAMPROW elemptr;
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+
+  /* Row 0 */
+  elemptr = sample_data[0] + start_col;
+
+  tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
+  tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
+
+  /* Row 1 */
+  elemptr = sample_data[1] + start_col;
+
+  tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
+  tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/2)**2 = 2**4.
+   */
+
+  /* Column 0 */
+  /* Apply unsigned->signed conversion */
+  data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4);
+  data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4);
+
+  /* Column 1 */
+  data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4);
+  data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4);
+}
+
+
+/*
+ * Perform the forward DCT on a 1x1 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* We leave the result scaled up by an overall factor of 8. */
+  /* We must also scale the output by (8/1)**2 = 2**6. */
+  /* Apply unsigned->signed conversion */
+  data[0] = (DCTELEM)
+    ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6);
+}
+
+
+/*
+ * Perform the forward DCT on a 9x9 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2;
+  DCTELEM workspace[8];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* we scale the results further by 2 as part of output adaption */
+  /* scaling for different DCT size. */
+  /* cK represents sqrt(2) * cos(K*pi/18). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
+    tmp4 = GETJSAMPLE(elemptr[4]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
+    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
+
+    z1 = tmp0 + tmp2 + tmp3;
+    z2 = tmp1 + tmp4;
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
+    dataptr[6] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)),  /* c6 */
+	      CONST_BITS-1);
+    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049));        /* c2 */
+    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441))    /* c4 */
+	      + z1 + z2, CONST_BITS-1);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608))    /* c8 */
+	      + z1 - z2, CONST_BITS-1);
+
+    /* Odd part */
+
+    dataptr[3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
+	      CONST_BITS-1);
+
+    tmp11 = MULTIPLY(tmp11, FIX(1.224744871));        /* c3 */
+    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
+    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1);
+
+    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
+
+    dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1);
+    dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 9)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/9)**2 = 64/81, which we partially
+   * fold into the constant multipliers and final/initial shifting:
+   * cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
+    tmp4 = dataptr[DCTSIZE*4];
+
+    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
+    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
+    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
+
+    z1 = tmp0 + tmp2 + tmp3;
+    z2 = tmp1 + tmp4;
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)),       /* 128/81 */
+	      CONST_BITS+2);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)),  /* c6 */
+	      CONST_BITS+2);
+    z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287));        /* c2 */
+    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190))    /* c4 */
+	      + z1 + z2, CONST_BITS+2);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096))    /* c8 */
+	      + z1 - z2, CONST_BITS+2);
+
+    /* Odd part */
+
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
+	      CONST_BITS+2);
+
+    tmp11 = MULTIPLY(tmp11, FIX(1.935399303));        /* c3 */
+    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
+    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2);
+
+    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
+
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM)
+      DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 10x10 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  DCTELEM workspace[8*2];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* we scale the results further by 2 as part of output adaption */
+  /* scaling for different DCT size. */
+  /* cK represents sqrt(2) * cos(K*pi/20). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
+    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
+
+    tmp10 = tmp0 + tmp4;
+    tmp13 = tmp0 - tmp4;
+    tmp11 = tmp1 + tmp3;
+    tmp14 = tmp1 - tmp3;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
+    tmp12 += tmp12;
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
+	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
+	      CONST_BITS-1);
+    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
+	      CONST_BITS-1);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
+	      CONST_BITS-1);
+
+    /* Odd part */
+
+    tmp10 = tmp0 + tmp4;
+    tmp11 = tmp1 - tmp3;
+    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
+    tmp2 <<= CONST_BITS;
+    dataptr[1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
+	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
+	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
+	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
+	      CONST_BITS-1);
+    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
+	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
+    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
+	    (tmp11 << (CONST_BITS - 1)) - tmp2;
+    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1);
+    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 10)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/10)**2 = 16/25, which we partially
+   * fold into the constant multipliers and final/initial shifting:
+   * cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
+    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+
+    tmp10 = tmp0 + tmp4;
+    tmp13 = tmp0 - tmp4;
+    tmp11 = tmp1 + tmp3;
+    tmp14 = tmp1 - tmp3;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
+    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
+	      CONST_BITS+2);
+    tmp12 += tmp12;
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
+	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
+	      CONST_BITS+2);
+    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
+	      CONST_BITS+2);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
+	      CONST_BITS+2);
+
+    /* Odd part */
+
+    tmp10 = tmp0 + tmp4;
+    tmp11 = tmp1 - tmp3;
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
+	      CONST_BITS+2);
+    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
+	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
+	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
+	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
+	      CONST_BITS+2);
+    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
+	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
+    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
+	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on an 11x11 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 z1, z2, z3;
+  DCTELEM workspace[8*3];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* we scale the results further by 2 as part of output adaption */
+  /* scaling for different DCT size. */
+  /* cK represents sqrt(2) * cos(K*pi/22). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
+    tmp5 = GETJSAMPLE(elemptr[5]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
+    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
+    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
+    tmp5 += tmp5;
+    tmp0 -= tmp5;
+    tmp1 -= tmp5;
+    tmp2 -= tmp5;
+    tmp3 -= tmp5;
+    tmp4 -= tmp5;
+    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) +       /* c2 */
+	 MULTIPLY(tmp2 + tmp4, FIX(0.201263574));        /* c10 */
+    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931));        /* c6 */
+    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156));        /* c4 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
+	      - MULTIPLY(tmp4, FIX(1.390975730)),        /* c4+c10 */
+	      CONST_BITS-1);
+    dataptr[4] = (DCTELEM)
+      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
+	      - MULTIPLY(tmp2, FIX(1.356927976))         /* c2 */
+	      + MULTIPLY(tmp4, FIX(0.587485545)),        /* c8 */
+	      CONST_BITS-1);
+    dataptr[6] = (DCTELEM)
+      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
+	      - MULTIPLY(tmp2, FIX(0.788749120)),        /* c8+c10 */
+	      CONST_BITS-1);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905));    /* c3 */
+    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298));    /* c5 */
+    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576));    /* c7 */
+    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
+	   + MULTIPLY(tmp14, FIX(0.398430003));          /* c9 */
+    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576));  /* -c7 */
+    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907));  /* -c1 */
+    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
+	    - MULTIPLY(tmp14, FIX(1.068791298));         /* c5 */
+    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003));   /* c9 */
+    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
+	    + MULTIPLY(tmp14, FIX(1.399818907));         /* c1 */
+    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
+	    - MULTIPLY(tmp14, FIX(1.286413905));         /* c3 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 11)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/11)**2 = 64/121, which we partially
+   * fold into the constant multipliers and final/initial shifting:
+   * cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
+    tmp5 = dataptr[DCTSIZE*5];
+
+    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
+    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
+    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
+    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
+    tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
+		       FIX(1.057851240)),                /* 128/121 */
+	      CONST_BITS+2);
+    tmp5 += tmp5;
+    tmp0 -= tmp5;
+    tmp1 -= tmp5;
+    tmp2 -= tmp5;
+    tmp3 -= tmp5;
+    tmp4 -= tmp5;
+    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) +       /* c2 */
+	 MULTIPLY(tmp2 + tmp4, FIX(0.212906922));        /* c10 */
+    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713));        /* c6 */
+    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479));        /* c4 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
+	      - MULTIPLY(tmp4, FIX(1.471445400)),        /* c4+c10 */
+	      CONST_BITS+2);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
+	      - MULTIPLY(tmp2, FIX(1.435427942))         /* c2 */
+	      + MULTIPLY(tmp4, FIX(0.621472312)),        /* c8 */
+	      CONST_BITS+2);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
+	      - MULTIPLY(tmp2, FIX(0.834379234)),        /* c8+c10 */
+	      CONST_BITS+2);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544));    /* c3 */
+    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199));    /* c5 */
+    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568));    /* c7 */
+    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
+	   + MULTIPLY(tmp14, FIX(0.421479672));          /* c9 */
+    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568));  /* -c7 */
+    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167));  /* -c1 */
+    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
+	    - MULTIPLY(tmp14, FIX(1.130622199));         /* c5 */
+    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672));   /* c9 */
+    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
+	    + MULTIPLY(tmp14, FIX(1.480800167));         /* c1 */
+    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
+	    - MULTIPLY(tmp14, FIX(1.360834544));         /* c3 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 12x12 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  DCTELEM workspace[8*4];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+  /* cK represents sqrt(2) * cos(K*pi/24). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
+
+    tmp10 = tmp0 + tmp5;
+    tmp13 = tmp0 - tmp5;
+    tmp11 = tmp1 + tmp4;
+    tmp14 = tmp1 - tmp4;
+    tmp12 = tmp2 + tmp3;
+    tmp15 = tmp2 - tmp3;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
+    dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
+	      CONST_BITS);
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
+	      CONST_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
+    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
+    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
+    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
+	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
+    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
+    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
+	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
+    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
+	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
+    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
+	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 12)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/12)**2 = 4/9, which we partially
+   * fold into the constant multipliers and final shifting:
+   * cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
+    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
+
+    tmp10 = tmp0 + tmp5;
+    tmp13 = tmp0 - tmp5;
+    tmp11 = tmp1 + tmp4;
+    tmp14 = tmp1 - tmp4;
+    tmp12 = tmp2 + tmp3;
+    tmp15 = tmp2 - tmp3;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
+    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
+    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
+	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
+	      CONST_BITS+1);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
+    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
+    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
+    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
+	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
+    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
+    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
+	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
+    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
+	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
+    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
+	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 13x13 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 z1, z2;
+  DCTELEM workspace[8*5];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+  /* cK represents sqrt(2) * cos(K*pi/26). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
+    tmp6 = GETJSAMPLE(elemptr[6]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
+    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
+    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
+    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
+    tmp6 += tmp6;
+    tmp0 -= tmp6;
+    tmp1 -= tmp6;
+    tmp2 -= tmp6;
+    tmp3 -= tmp6;
+    tmp4 -= tmp6;
+    tmp5 -= tmp6;
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) +   /* c2 */
+	      MULTIPLY(tmp1, FIX(1.058554052)) +   /* c6 */
+	      MULTIPLY(tmp2, FIX(0.501487041)) -   /* c10 */
+	      MULTIPLY(tmp3, FIX(0.170464608)) -   /* c12 */
+	      MULTIPLY(tmp4, FIX(0.803364869)) -   /* c8 */
+	      MULTIPLY(tmp5, FIX(1.252223920)),    /* c4 */
+	      CONST_BITS);
+    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
+	 MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
+	 MULTIPLY(tmp1 - tmp5, FIX(0.316450131));  /* (c8-c12)/2 */
+    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
+	 MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
+	 MULTIPLY(tmp1 + tmp5, FIX(0.486914739));  /* (c8+c12)/2 */
+
+    dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651));   /* c3 */
+    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945));   /* c5 */
+    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) +  /* c7 */
+	   MULTIPLY(tmp14 + tmp15, FIX(0.338443458));   /* c11 */
+    tmp0 = tmp1 + tmp2 + tmp3 -
+	   MULTIPLY(tmp10, FIX(2.020082300)) +          /* c3+c5+c7-c1 */
+	   MULTIPLY(tmp14, FIX(0.318774355));           /* c9-c11 */
+    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) -  /* c7 */
+	   MULTIPLY(tmp11 + tmp12, FIX(0.338443458));   /* c11 */
+    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
+    tmp1 += tmp4 + tmp5 +
+	    MULTIPLY(tmp11, FIX(0.837223564)) -         /* c5+c9+c11-c3 */
+	    MULTIPLY(tmp14, FIX(2.341699410));          /* c1+c7 */
+    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
+    tmp2 += tmp4 + tmp6 -
+	    MULTIPLY(tmp12, FIX(1.572116027)) +         /* c1+c5-c9-c11 */
+	    MULTIPLY(tmp15, FIX(2.260109708));          /* c3+c7 */
+    tmp3 += tmp5 + tmp6 +
+	    MULTIPLY(tmp13, FIX(2.205608352)) -         /* c3+c5+c9-c7 */
+	    MULTIPLY(tmp15, FIX(1.742345811));          /* c1+c11 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 13)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/13)**2 = 64/169, which we partially
+   * fold into the constant multipliers and final shifting:
+   * cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
+    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
+    tmp6 = dataptr[DCTSIZE*6];
+
+    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
+    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
+    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
+    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
+    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
+    tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
+		       FIX(0.757396450)),          /* 128/169 */
+	      CONST_BITS+1);
+    tmp6 += tmp6;
+    tmp0 -= tmp6;
+    tmp1 -= tmp6;
+    tmp2 -= tmp6;
+    tmp3 -= tmp6;
+    tmp4 -= tmp6;
+    tmp5 -= tmp6;
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) +   /* c2 */
+	      MULTIPLY(tmp1, FIX(0.801745081)) +   /* c6 */
+	      MULTIPLY(tmp2, FIX(0.379824504)) -   /* c10 */
+	      MULTIPLY(tmp3, FIX(0.129109289)) -   /* c12 */
+	      MULTIPLY(tmp4, FIX(0.608465700)) -   /* c8 */
+	      MULTIPLY(tmp5, FIX(0.948429952)),    /* c4 */
+	      CONST_BITS+1);
+    z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
+	 MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
+	 MULTIPLY(tmp1 - tmp5, FIX(0.239678205));  /* (c8-c12)/2 */
+    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
+	 MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
+	 MULTIPLY(tmp1 + tmp5, FIX(0.368787494));  /* (c8+c12)/2 */
+
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908));   /* c3 */
+    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751));   /* c5 */
+    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) +  /* c7 */
+	   MULTIPLY(tmp14 + tmp15, FIX(0.256335874));   /* c11 */
+    tmp0 = tmp1 + tmp2 + tmp3 -
+	   MULTIPLY(tmp10, FIX(1.530003162)) +          /* c3+c5+c7-c1 */
+	   MULTIPLY(tmp14, FIX(0.241438564));           /* c9-c11 */
+    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) -  /* c7 */
+	   MULTIPLY(tmp11 + tmp12, FIX(0.256335874));   /* c11 */
+    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
+    tmp1 += tmp4 + tmp5 +
+	    MULTIPLY(tmp11, FIX(0.634110155)) -         /* c5+c9+c11-c3 */
+	    MULTIPLY(tmp14, FIX(1.773594819));          /* c1+c7 */
+    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
+    tmp2 += tmp4 + tmp6 -
+	    MULTIPLY(tmp12, FIX(1.190715098)) +         /* c1+c5-c9-c11 */
+	    MULTIPLY(tmp15, FIX(1.711799069));          /* c3+c7 */
+    tmp3 += tmp5 + tmp6 +
+	    MULTIPLY(tmp13, FIX(1.670519935)) -         /* c3+c5+c9-c7 */
+	    MULTIPLY(tmp15, FIX(1.319646532));          /* c1+c11 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 14x14 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  DCTELEM workspace[8*6];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+  /* cK represents sqrt(2) * cos(K*pi/28). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
+    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
+
+    tmp10 = tmp0 + tmp6;
+    tmp14 = tmp0 - tmp6;
+    tmp11 = tmp1 + tmp5;
+    tmp15 = tmp1 - tmp5;
+    tmp12 = tmp2 + tmp4;
+    tmp16 = tmp2 - tmp4;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
+    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
+    tmp13 += tmp13;
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
+	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
+	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
+	      CONST_BITS);
+
+    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
+
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
+	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
+	      CONST_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
+	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
+	      CONST_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp1 + tmp2;
+    tmp11 = tmp5 - tmp4;
+    dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
+    tmp3 <<= CONST_BITS;
+    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
+    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
+    tmp10 += tmp11 - tmp3;
+    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
+	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
+    dataptr[5] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
+	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
+	      CONST_BITS);
+    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
+	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
+    dataptr[3] = (DCTELEM)
+      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
+	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
+	      CONST_BITS);
+    dataptr[1] = (DCTELEM)
+      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
+	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
+	      CONST_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 14)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/14)**2 = 16/49, which we partially
+   * fold into the constant multipliers and final shifting:
+   * cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
+    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
+    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+
+    tmp10 = tmp0 + tmp6;
+    tmp14 = tmp0 - tmp6;
+    tmp11 = tmp1 + tmp5;
+    tmp15 = tmp1 - tmp5;
+    tmp12 = tmp2 + tmp4;
+    tmp16 = tmp2 - tmp4;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
+    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
+    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
+		       FIX(0.653061224)),                 /* 32/49 */
+	      CONST_BITS+1);
+    tmp13 += tmp13;
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
+	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
+	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
+	      CONST_BITS+1);
+
+    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
+	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
+	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
+	      CONST_BITS+1);
+
+    /* Odd part */
+
+    tmp10 = tmp1 + tmp2;
+    tmp11 = tmp5 - tmp4;
+    dataptr[DCTSIZE*7] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
+		       FIX(0.653061224)),                 /* 32/49 */
+	      CONST_BITS+1);
+    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
+    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
+    tmp10 += tmp11 - tmp3;
+    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
+	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
+	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
+	      CONST_BITS+1);
+    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
+	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
+	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp11 + tmp12 + tmp3
+	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
+	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
+	      CONST_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 15x15 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 z1, z2, z3;
+  DCTELEM workspace[8*7];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+  /* cK represents sqrt(2) * cos(K*pi/30). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
+    tmp7 = GETJSAMPLE(elemptr[7]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
+    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
+    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
+    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
+    tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
+
+    z1 = tmp0 + tmp4 + tmp5;
+    z2 = tmp1 + tmp3 + tmp6;
+    z3 = tmp2 + tmp7;
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
+    z3 += z3;
+    dataptr[6] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
+	      MULTIPLY(z2 - z3, FIX(0.437016024)),  /* c12 */
+	      CONST_BITS);
+    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
+    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) -  /* c2+c14 */
+         MULTIPLY(tmp6 - tmp2, FIX(2.238241955));   /* c4+c8 */
+    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) -  /* c8-c14 */
+	 MULTIPLY(tmp0 - tmp2, FIX(0.091361227));   /* c2-c4 */
+    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) +  /* c2 */
+	 MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) +  /* c8 */
+	 MULTIPLY(tmp1 - tmp4, FIX(0.790569415));   /* (c6+c12)/2 */
+
+    dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
+    dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
+
+    /* Odd part */
+
+    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
+		    FIX(1.224744871));                         /* c5 */
+    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
+	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876));  /* c9 */
+    tmp12 = MULTIPLY(tmp12, FIX(1.224744871));                 /* c5 */
+    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) +         /* c1 */
+	   MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) +         /* c3 */
+	   MULTIPLY(tmp13 + tmp15, FIX(0.575212477));          /* c11 */
+    tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) -                 /* c7-c11 */
+	   MULTIPLY(tmp14, FIX(0.513743148)) +                 /* c3-c9 */
+	   MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12;   /* c1+c13 */
+    tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) -               /* -(c1-c7) */
+	   MULTIPLY(tmp11, FIX(2.176250899)) -                 /* c3+c9 */
+	   MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12;   /* c11+c13 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 15)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/15)**2 = 64/225, which we partially
+   * fold into the constant multipliers and final shifting:
+   * cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
+    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
+    tmp7 = dataptr[DCTSIZE*7];
+
+    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
+    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
+    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
+    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
+    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
+    tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
+    tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
+
+    z1 = tmp0 + tmp4 + tmp5;
+    z2 = tmp1 + tmp3 + tmp6;
+    z3 = tmp2 + tmp7;
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
+	      CONST_BITS+2);
+    z3 += z3;
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
+	      MULTIPLY(z2 - z3, FIX(0.497227121)),  /* c12 */
+	      CONST_BITS+2);
+    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
+    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) -  /* c2+c14 */
+         MULTIPLY(tmp6 - tmp2, FIX(2.546621957));   /* c4+c8 */
+    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) -  /* c8-c14 */
+	 MULTIPLY(tmp0 - tmp2, FIX(0.103948774));   /* c2-c4 */
+    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) +  /* c2 */
+	 MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) +  /* c8 */
+	 MULTIPLY(tmp1 - tmp4, FIX(0.899492312));   /* (c6+c12)/2 */
+
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2);
+
+    /* Odd part */
+
+    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
+		    FIX(1.393487498));                         /* c5 */
+    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
+	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187));  /* c9 */
+    tmp12 = MULTIPLY(tmp12, FIX(1.393487498));                 /* c5 */
+    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) +         /* c1 */
+	   MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) +         /* c3 */
+	   MULTIPLY(tmp13 + tmp15, FIX(0.654463974));          /* c11 */
+    tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) -                 /* c7-c11 */
+	   MULTIPLY(tmp14, FIX(0.584525538)) +                 /* c3-c9 */
+	   MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12;   /* c1+c13 */
+    tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) -               /* -(c1-c7) */
+	   MULTIPLY(tmp11, FIX(2.476089912)) -                 /* c3+c9 */
+	   MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12;   /* c11+c13 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 16x16 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
+  DCTELEM workspace[DCTSIZE2];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* cK represents sqrt(2) * cos(K*pi/32). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
+    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
+
+    tmp10 = tmp0 + tmp7;
+    tmp14 = tmp0 - tmp7;
+    tmp11 = tmp1 + tmp6;
+    tmp15 = tmp1 - tmp6;
+    tmp12 = tmp2 + tmp5;
+    tmp16 = tmp2 - tmp5;
+    tmp13 = tmp3 + tmp4;
+    tmp17 = tmp3 - tmp4;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
+    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
+    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
+	      CONST_BITS-PASS1_BITS);
+
+    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
+	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
+
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
+	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
+	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
+	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
+	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
+	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
+    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
+	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
+    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
+	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
+    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
+	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
+	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
+    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
+    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
+    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == DCTSIZE * 2)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/16)**2 = 1/2**2.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
+    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
+    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
+
+    tmp10 = tmp0 + tmp7;
+    tmp14 = tmp0 - tmp7;
+    tmp11 = tmp1 + tmp6;
+    tmp15 = tmp1 - tmp6;
+    tmp12 = tmp2 + tmp5;
+    tmp16 = tmp2 - tmp5;
+    tmp13 = tmp3 + tmp4;
+    tmp17 = tmp3 - tmp4;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
+    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
+    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
+    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
+	      CONST_BITS+PASS1_BITS+2);
+
+    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
+	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
+	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+10 */
+	      CONST_BITS+PASS1_BITS+2);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
+	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
+	      CONST_BITS+PASS1_BITS+2);
+
+    /* Odd part */
+
+    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
+	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
+	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
+	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
+    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
+	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
+    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
+	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
+    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
+	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
+	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
+    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
+    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
+    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 16x8 sample block.
+ *
+ * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
+  INT32 z1;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */
+
+  dataptr = data;
+  ctr = 0;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
+    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
+
+    tmp10 = tmp0 + tmp7;
+    tmp14 = tmp0 - tmp7;
+    tmp11 = tmp1 + tmp6;
+    tmp15 = tmp1 - tmp6;
+    tmp12 = tmp2 + tmp5;
+    tmp16 = tmp2 - tmp5;
+    tmp13 = tmp3 + tmp4;
+    tmp17 = tmp3 - tmp4;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
+    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
+    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
+	      CONST_BITS-PASS1_BITS);
+
+    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
+	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
+
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
+	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
+	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
+	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
+	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
+	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
+    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
+	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
+    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
+	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
+    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
+	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
+	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
+    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
+    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
+    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by 8/16 = 1/2.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+
+    tmp10 = tmp0 + tmp3;
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
+					   CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
+					   CONST_BITS+PASS1_BITS+1);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
+    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
+    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
+    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
+    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
+    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
+
+    tmp12 += z1;
+    tmp13 += z1;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
+					   CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
+					   CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
+					   CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
+					   CONST_BITS+PASS1_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 14x7 sample block.
+ *
+ * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 z1, z2, z3;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Zero bottom row of output coefficient block. */
+  MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 7; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
+    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
+
+    tmp10 = tmp0 + tmp6;
+    tmp14 = tmp0 - tmp6;
+    tmp11 = tmp1 + tmp5;
+    tmp15 = tmp1 - tmp5;
+    tmp12 = tmp2 + tmp4;
+    tmp16 = tmp2 - tmp4;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
+    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp13 += tmp13;
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
+	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
+	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
+	      CONST_BITS-PASS1_BITS);
+
+    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
+
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
+	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
+	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp1 + tmp2;
+    tmp11 = tmp5 - tmp4;
+    dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS);
+    tmp3 <<= CONST_BITS;
+    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
+    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
+    tmp10 += tmp11 - tmp3;
+    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
+	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
+    dataptr[5] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
+	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
+	      CONST_BITS-PASS1_BITS);
+    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
+	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
+    dataptr[3] = (DCTELEM)
+      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
+	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM)
+      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
+	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
+	      CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/14)*(8/7) = 32/49, which we
+   * partially fold into the constant multipliers and final shifting:
+   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
+    tmp3 = dataptr[DCTSIZE*3];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
+    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
+
+    z1 = tmp0 + tmp2;
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
+	      CONST_BITS+PASS1_BITS+1);
+    tmp3 += tmp3;
+    z1 -= tmp3;
+    z1 -= tmp3;
+    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
+    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1);
+    z1 -= z2;
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
+    tmp1 += tmp2;
+    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
+    tmp0 += tmp3;
+    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 12x6 sample block.
+ *
+ * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Zero 2 bottom rows of output coefficient block. */
+  MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
+
+    tmp10 = tmp0 + tmp5;
+    tmp13 = tmp0 - tmp5;
+    tmp11 = tmp1 + tmp4;
+    tmp14 = tmp1 - tmp4;
+    tmp12 = tmp2 + tmp3;
+    tmp15 = tmp2 - tmp3;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
+    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
+    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
+    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
+	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
+    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
+    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
+	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
+    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
+	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
+    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
+	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/12)*(8/6) = 8/9, which we
+   * partially fold into the constant multipliers and final shifting:
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
+    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
+	      CONST_BITS+PASS1_BITS+1);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 10x5 sample block.
+ *
+ * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Zero 3 bottom rows of output coefficient block. */
+  MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 5; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
+    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
+
+    tmp10 = tmp0 + tmp4;
+    tmp13 = tmp0 - tmp4;
+    tmp11 = tmp1 + tmp3;
+    tmp14 = tmp1 - tmp3;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp12 += tmp12;
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
+	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
+	      CONST_BITS-PASS1_BITS);
+    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp0 + tmp4;
+    tmp11 = tmp1 - tmp3;
+    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
+    tmp2 <<= CONST_BITS;
+    dataptr[1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
+	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
+	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
+	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
+	      CONST_BITS-PASS1_BITS);
+    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
+	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
+    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
+	    (tmp11 << (CONST_BITS - 1)) - tmp2;
+    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/10)*(8/5) = 32/25, which we
+   * fold into the constant multipliers:
+   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
+    tmp2 = dataptr[DCTSIZE*2];
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
+	      CONST_BITS+PASS1_BITS);
+    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
+    tmp10 -= tmp2 << 2;
+    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on an 8x4 sample block.
+ *
+ * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Zero 4 bottom rows of output coefficient block. */
+  MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* We must also scale the output by 8/4 = 2, which we add here. */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+
+    tmp10 = tmp0 + tmp3;
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
+    dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
+				       CONST_BITS-PASS1_BITS-1);
+    dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
+				       CONST_BITS-PASS1_BITS-1);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
+
+    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
+    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
+    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
+    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
+    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
+    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
+
+    tmp12 += z1;
+    tmp13 += z1;
+
+    dataptr[1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1);
+    dataptr[3] = (DCTELEM)
+      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1);
+    dataptr[5] = (DCTELEM)
+      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1);
+    dataptr[7] = (DCTELEM)
+      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);   /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 6x3 sample block.
+ *
+ * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  INT32 tmp10, tmp11, tmp12;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* We scale the results further by 2 as part of output adaption */
+  /* scaling for different DCT size. */
+  /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 3; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
+    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
+	      CONST_BITS-PASS1_BITS-1);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
+	      CONST_BITS-PASS1_BITS-1);
+
+    /* Odd part */
+
+    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
+		    CONST_BITS-PASS1_BITS-1);
+
+    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
+    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
+    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
+   * fold into the constant multipliers (other part was done in pass 1):
+   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
+    tmp1 = dataptr[DCTSIZE*1];
+
+    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 4x2 sample block.
+ *
+ * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1;
+  INT32 tmp10, tmp11;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */
+  /* 4-point FDCT kernel, */
+  /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 2; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3));
+    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3));
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-4);
+
+    dataptr[1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS-3);
+    dataptr[3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS-3);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = dataptr[DCTSIZE*0] + (ONE << (PASS1_BITS-1));
+    tmp1 = dataptr[DCTSIZE*1];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
+
+    /* Odd part */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 2x1 sample block.
+ *
+ * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1;
+  JSAMPROW elemptr;
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  elemptr = sample_data[0] + start_col;
+
+  tmp0 = GETJSAMPLE(elemptr[0]);
+  tmp1 = GETJSAMPLE(elemptr[1]);
+
+  /* We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/2)*(8/1) = 2**5.
+   */
+
+  /* Even part */
+  /* Apply unsigned->signed conversion */
+  data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
+
+  /* Odd part */
+  data[1] = (DCTELEM) ((tmp0 - tmp1) << 5);
+}
+
+
+/*
+ * Perform the forward DCT on an 8x16 sample block.
+ *
+ * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
+  INT32 z1;
+  DCTELEM workspace[DCTSIZE2];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+
+    tmp10 = tmp0 + tmp3;
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
+				   CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
+				   CONST_BITS-PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
+    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
+    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
+    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
+    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
+    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
+
+    tmp12 += z1;
+    tmp13 += z1;
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == DCTSIZE * 2)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by 8/16 = 1/2.
+   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
+    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
+    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
+
+    tmp10 = tmp0 + tmp7;
+    tmp14 = tmp0 - tmp7;
+    tmp11 = tmp1 + tmp6;
+    tmp15 = tmp1 - tmp6;
+    tmp12 = tmp2 + tmp5;
+    tmp16 = tmp2 - tmp5;
+    tmp13 = tmp3 + tmp4;
+    tmp17 = tmp3 - tmp4;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
+    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
+    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
+    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
+	      CONST_BITS+PASS1_BITS+1);
+
+    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
+	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
+	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
+	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
+	      CONST_BITS+PASS1_BITS+1);
+
+    /* Odd part */
+
+    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
+	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
+	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
+	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
+    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
+	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
+    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
+	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
+    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
+	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
+	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
+    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
+    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
+    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 7x14 sample block.
+ *
+ * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 z1, z2, z3;
+  DCTELEM workspace[8*6];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
+    tmp3 = GETJSAMPLE(elemptr[3]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
+
+    z1 = tmp0 + tmp2;
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp3 += tmp3;
+    z1 -= tmp3;
+    z1 -= tmp3;
+    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
+    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
+    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
+    z1 -= z2;
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
+    dataptr[4] = (DCTELEM)
+      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
+    tmp1 += tmp2;
+    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
+    tmp0 += tmp3;
+    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 14)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/7)*(8/14) = 32/49, which we
+   * fold into the constant multipliers:
+   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
+    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
+    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+
+    tmp10 = tmp0 + tmp6;
+    tmp14 = tmp0 - tmp6;
+    tmp11 = tmp1 + tmp5;
+    tmp15 = tmp1 - tmp5;
+    tmp12 = tmp2 + tmp4;
+    tmp16 = tmp2 - tmp4;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
+    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
+    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
+		       FIX(0.653061224)),                 /* 32/49 */
+	      CONST_BITS+PASS1_BITS);
+    tmp13 += tmp13;
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
+	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
+	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
+	      CONST_BITS+PASS1_BITS);
+
+    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
+	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
+	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp1 + tmp2;
+    tmp11 = tmp5 - tmp4;
+    dataptr[DCTSIZE*7] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
+		       FIX(0.653061224)),                 /* 32/49 */
+	      CONST_BITS+PASS1_BITS);
+    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
+    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
+    tmp10 += tmp11 - tmp3;
+    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
+	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
+	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
+	      CONST_BITS+PASS1_BITS);
+    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
+	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
+	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp11 + tmp12 + tmp3
+	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
+	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 6x12 sample block.
+ *
+ * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  DCTELEM workspace[8*4];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
+    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
+		    CONST_BITS-PASS1_BITS);
+
+    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
+    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
+    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 12)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/6)*(8/12) = 8/9, which we
+   * fold into the constant multipliers:
+   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
+    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
+
+    tmp10 = tmp0 + tmp5;
+    tmp13 = tmp0 - tmp5;
+    tmp11 = tmp1 + tmp4;
+    tmp14 = tmp1 - tmp4;
+    tmp12 = tmp2 + tmp3;
+    tmp15 = tmp2 - tmp3;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
+    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
+    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
+	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
+    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
+    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
+    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
+	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
+    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
+    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
+	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
+    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
+	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
+    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
+	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 5x10 sample block.
+ *
+ * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  DCTELEM workspace[8*2];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
+    tmp2 = GETJSAMPLE(elemptr[2]);
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
+    tmp10 -= tmp2 << 2;
+    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
+    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
+
+    dataptr[1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
+	      CONST_BITS-PASS1_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 10)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/5)*(8/10) = 32/25, which we
+   * fold into the constant multipliers:
+   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
+    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+
+    tmp10 = tmp0 + tmp4;
+    tmp13 = tmp0 - tmp4;
+    tmp11 = tmp1 + tmp3;
+    tmp14 = tmp1 - tmp3;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
+    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
+	      CONST_BITS+PASS1_BITS);
+    tmp12 += tmp12;
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
+	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
+	      CONST_BITS+PASS1_BITS);
+    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp0 + tmp4;
+    tmp11 = tmp1 - tmp3;
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
+	      CONST_BITS+PASS1_BITS);
+    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
+	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
+	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
+	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
+	      CONST_BITS+PASS1_BITS);
+    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
+	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
+    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
+	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 4x8 sample block.
+ *
+ * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* We must also scale the output by 8/4 = 2, which we add here. */
+  /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
+
+    dataptr[1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS-1);
+    dataptr[3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS-1);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
+    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
+    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
+    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
+    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
+    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
+
+    tmp12 += z1;
+    tmp13 += z1;
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM)
+      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 3x6 sample block.
+ *
+ * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  INT32 tmp10, tmp11, tmp12;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* We scale the results further by 2 as part of output adaption */
+  /* scaling for different DCT size. */
+  /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
+    tmp1 = GETJSAMPLE(elemptr[1]);
+
+    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
+	      CONST_BITS-PASS1_BITS-1);
+
+    /* Odd part */
+
+    dataptr[1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
+	      CONST_BITS-PASS1_BITS-1);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
+   * fold into the constant multipliers (other part was done in pass 1):
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 3; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
+    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 2x4 sample block.
+ *
+ * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1;
+  INT32 tmp10, tmp11;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+  /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]);
+    tmp1 = GETJSAMPLE(elemptr[1]);
+
+    /* Apply unsigned->signed conversion */
+    dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3);
+
+    /* Odd part */
+
+    dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 2; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1);
+    dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1);
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-1);
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 1x2 sample block.
+ *
+ * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1;
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
+  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
+
+  /* We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/1)*(8/2) = 2**5.
+   */
+
+  /* Even part */
+  /* Apply unsigned->signed conversion */
+  data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
+
+  /* Odd part */
+  data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5);
+}
+
+#endif /* DCT_SCALING_SUPPORTED */
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jidctflt.c b/plugins/AdvaImg/src/LibJPEG/jidctflt.c
index 23ae9d333b..f399600c89 100644
--- a/plugins/AdvaImg/src/LibJPEG/jidctflt.c
+++ b/plugins/AdvaImg/src/LibJPEG/jidctflt.c
@@ -1,235 +1,235 @@
-/*
- * jidctflt.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2010 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a floating-point implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * This implementation should be more accurate than either of the integer
- * IDCT implementations.  However, it may not give the same results on all
- * machines because of differences in roundoff behavior.  Speed will depend
- * on the hardware's floating point capacity.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with a fixed-point
- * implementation, accuracy is lost due to imprecise representation of the
- * scaled quantization values.  However, that problem does not arise if
- * we use floating point arithmetic.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_FLOAT_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
-#endif
-
-
-/* Dequantize a coefficient by multiplying it by the multiplier-table
- * entry; produce a float result.
- */
-
-#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- */
-
-GLOBAL(void)
-jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
-  FAST_FLOAT z5, z10, z11, z12, z13;
-  JCOEFPTR inptr;
-  FLOAT_MULT_TYPE * quantptr;
-  FAST_FLOAT * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
-  int ctr;
-  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-    
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-      
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-    
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
-    tmp11 = tmp0 - tmp2;
-
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
-    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-    
-    /* Odd part */
-
-    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z13 = tmp6 + tmp5;		/* phase 6 */
-    z10 = tmp6 - tmp5;
-    z11 = tmp4 + tmp7;
-    z12 = tmp4 - tmp7;
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
-
-    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    wsptr[DCTSIZE*0] = tmp0 + tmp7;
-    wsptr[DCTSIZE*7] = tmp0 - tmp7;
-    wsptr[DCTSIZE*1] = tmp1 + tmp6;
-    wsptr[DCTSIZE*6] = tmp1 - tmp6;
-    wsptr[DCTSIZE*2] = tmp2 + tmp5;
-    wsptr[DCTSIZE*5] = tmp2 - tmp5;
-    wsptr[DCTSIZE*3] = tmp3 + tmp4;
-    wsptr[DCTSIZE*4] = tmp3 - tmp4;
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-  
-  /* Pass 2: process rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * And testing floats for zero is relatively expensive, so we don't bother.
-     */
-    
-    /* Even part */
-
-    /* Apply signed->unsigned and prepare float->int conversion */
-    z5 = wsptr[0] + ((FAST_FLOAT) CENTERJSAMPLE + (FAST_FLOAT) 0.5);
-    tmp10 = z5 + wsptr[4];
-    tmp11 = z5 - wsptr[4];
-
-    tmp13 = wsptr[2] + wsptr[6];
-    tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
-
-    tmp0 = tmp10 + tmp13;
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z13 = wsptr[5] + wsptr[3];
-    z10 = wsptr[5] - wsptr[3];
-    z11 = wsptr[1] + wsptr[7];
-    z12 = wsptr[1] - wsptr[7];
-
-    tmp7 = z11 + z13;
-    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
-
-    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    /* Final output stage: float->int conversion and range-limit */
-
-    outptr[0] = range_limit[((int) (tmp0 + tmp7)) & RANGE_MASK];
-    outptr[7] = range_limit[((int) (tmp0 - tmp7)) & RANGE_MASK];
-    outptr[1] = range_limit[((int) (tmp1 + tmp6)) & RANGE_MASK];
-    outptr[6] = range_limit[((int) (tmp1 - tmp6)) & RANGE_MASK];
-    outptr[2] = range_limit[((int) (tmp2 + tmp5)) & RANGE_MASK];
-    outptr[5] = range_limit[((int) (tmp2 - tmp5)) & RANGE_MASK];
-    outptr[3] = range_limit[((int) (tmp3 + tmp4)) & RANGE_MASK];
-    outptr[4] = range_limit[((int) (tmp3 - tmp4)) & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-#endif /* DCT_FLOAT_SUPPORTED */
+/*
+ * jidctflt.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2010 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * This implementation should be more accurate than either of the integer
+ * IDCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a float result.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  FLOAT_MULT_TYPE * quantptr;
+  FAST_FLOAT * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  int ctr;
+  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    wsptr[DCTSIZE*0] = tmp0 + tmp7;
+    wsptr[DCTSIZE*7] = tmp0 - tmp7;
+    wsptr[DCTSIZE*1] = tmp1 + tmp6;
+    wsptr[DCTSIZE*6] = tmp1 - tmp6;
+    wsptr[DCTSIZE*2] = tmp2 + tmp5;
+    wsptr[DCTSIZE*5] = tmp2 - tmp5;
+    wsptr[DCTSIZE*3] = tmp3 + tmp4;
+    wsptr[DCTSIZE*4] = tmp3 - tmp4;
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * And testing floats for zero is relatively expensive, so we don't bother.
+     */
+    
+    /* Even part */
+
+    /* Apply signed->unsigned and prepare float->int conversion */
+    z5 = wsptr[0] + ((FAST_FLOAT) CENTERJSAMPLE + (FAST_FLOAT) 0.5);
+    tmp10 = z5 + wsptr[4];
+    tmp11 = z5 - wsptr[4];
+
+    tmp13 = wsptr[2] + wsptr[6];
+    tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = wsptr[5] + wsptr[3];
+    z10 = wsptr[5] - wsptr[3];
+    z11 = wsptr[1] + wsptr[7];
+    z12 = wsptr[1] - wsptr[7];
+
+    tmp7 = z11 + z13;
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    /* Final output stage: float->int conversion and range-limit */
+
+    outptr[0] = range_limit[((int) (tmp0 + tmp7)) & RANGE_MASK];
+    outptr[7] = range_limit[((int) (tmp0 - tmp7)) & RANGE_MASK];
+    outptr[1] = range_limit[((int) (tmp1 + tmp6)) & RANGE_MASK];
+    outptr[6] = range_limit[((int) (tmp1 - tmp6)) & RANGE_MASK];
+    outptr[2] = range_limit[((int) (tmp2 + tmp5)) & RANGE_MASK];
+    outptr[5] = range_limit[((int) (tmp2 - tmp5)) & RANGE_MASK];
+    outptr[3] = range_limit[((int) (tmp3 + tmp4)) & RANGE_MASK];
+    outptr[4] = range_limit[((int) (tmp3 - tmp4)) & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jidctfst.c b/plugins/AdvaImg/src/LibJPEG/jidctfst.c
index dba4216fb9..078b8c444e 100644
--- a/plugins/AdvaImg/src/LibJPEG/jidctfst.c
+++ b/plugins/AdvaImg/src/LibJPEG/jidctfst.c
@@ -1,368 +1,368 @@
-/*
- * jidctfst.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a fast, not so accurate integer implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with fixed-point math,
- * accuracy is lost due to imprecise representation of the scaled
- * quantization values.  The smaller the quantization table entry, the less
- * precise the scaled value, so this implementation does worse with high-
- * quality-setting files than with low-quality ones.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_IFAST_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
-#endif
-
-
-/* Scaling decisions are generally the same as in the LL&M algorithm;
- * see jidctint.c for more details.  However, we choose to descale
- * (right shift) multiplication products as soon as they are formed,
- * rather than carrying additional fractional bits into subsequent additions.
- * This compromises accuracy slightly, but it lets us save a few shifts.
- * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
- * everywhere except in the multiplications proper; this saves a good deal
- * of work on 16-bit-int machines.
- *
- * The dequantized coefficients are not integers because the AA&N scaling
- * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
- * so that the first and second IDCT rounds have the same input scaling.
- * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
- * avoid a descaling shift; this compromises accuracy rather drastically
- * for small quantization table entries, but it saves a lot of shifts.
- * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
- * so we use a much larger scaling factor to preserve accuracy.
- *
- * A final compromise is to represent the multiplicative constants to only
- * 8 fractional bits, rather than 13.  This saves some shifting work on some
- * machines, and may also reduce the cost of multiplication (since there
- * are fewer one-bits in the constants).
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  8
-#define PASS1_BITS  2
-#else
-#define CONST_BITS  8
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 8
-#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
-#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
-#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
-#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
-#else
-#define FIX_1_082392200  FIX(1.082392200)
-#define FIX_1_414213562  FIX(1.414213562)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_2_613125930  FIX(2.613125930)
-#endif
-
-
-/* We can gain a little more speed, with a further compromise in accuracy,
- * by omitting the addition in a descaling shift.  This yields an incorrectly
- * rounded result half the time...
- */
-
-#ifndef USE_ACCURATE_ROUNDING
-#undef DESCALE
-#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
-#endif
-
-
-/* Multiply a DCTELEM variable by an INT32 constant, and immediately
- * descale to yield a DCTELEM result.
- */
-
-#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
-
-
-/* Dequantize a coefficient by multiplying it by the multiplier-table
- * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
- * multiplication will do.  For 12-bit data, the multiplier table is
- * declared INT32, so a 32-bit multiply will be used.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
-#else
-#define DEQUANTIZE(coef,quantval)  \
-	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
-#endif
-
-
-/* Like DESCALE, but applies to a DCTELEM and produces an int.
- * We assume that int right shift is unsigned if INT32 right shift is.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	DCTELEM ishift_temp;
-#if BITS_IN_JSAMPLE == 8
-#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
-#else
-#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
-#endif
-#define IRIGHT_SHIFT(x,shft)  \
-    ((ishift_temp = (x)) < 0 ? \
-     (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
-     (ishift_temp >> (shft)))
-#else
-#define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-#ifdef USE_ACCURATE_ROUNDING
-#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))
-#else
-#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT(x, n))
-#endif
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- */
-
-GLOBAL(void)
-jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  DCTELEM tmp10, tmp11, tmp12, tmp13;
-  DCTELEM z5, z10, z11, z12, z13;
-  JCOEFPTR inptr;
-  IFAST_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
-  SHIFT_TEMPS			/* for DESCALE */
-  ISHIFT_TEMPS			/* for IDESCALE */
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-    
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-    
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
-    tmp11 = tmp0 - tmp2;
-
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
-    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-    
-    /* Odd part */
-
-    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z13 = tmp6 + tmp5;		/* phase 6 */
-    z10 = tmp6 - tmp5;
-    z11 = tmp4 + tmp7;
-    z12 = tmp4 - tmp7;
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-
-    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
-    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
-    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 + tmp5;
-
-    wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
-    wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
-    wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
-    wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
-    wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
-    wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
-    wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
-    wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-  
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * On machines with very fast multiplication, it's possible that the
-     * test takes more time than it's worth.  In that case this section
-     * may be commented out.
-     */
-    
-#ifndef NO_ZERO_ROW_TEST
-    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
-      /* AC terms all zero */
-      JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-      
-      outptr[0] = dcval;
-      outptr[1] = dcval;
-      outptr[2] = dcval;
-      outptr[3] = dcval;
-      outptr[4] = dcval;
-      outptr[5] = dcval;
-      outptr[6] = dcval;
-      outptr[7] = dcval;
-
-      wsptr += DCTSIZE;		/* advance pointer to next row */
-      continue;
-    }
-#endif
-    
-    /* Even part */
-
-    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
-    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
-
-    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
-    tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
-	    - tmp13;
-
-    tmp0 = tmp10 + tmp13;
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-
-    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
-    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
-    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 + tmp5;
-
-    /* Final output stage: scale down by a factor of 8 and range-limit */
-
-    outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-#endif /* DCT_IFAST_SUPPORTED */
+/*
+ * jidctfst.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jidctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * The dequantized coefficients are not integers because the AA&N scaling
+ * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
+ * so that the first and second IDCT rounds have the same input scaling.
+ * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
+ * avoid a descaling shift; this compromises accuracy rather drastically
+ * for small quantization table entries, but it saves a lot of shifts.
+ * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
+ * so we use a much larger scaling factor to preserve accuracy.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  8
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  8
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
+#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
+#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
+#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
+#else
+#define FIX_1_082392200  FIX(1.082392200)
+#define FIX_1_414213562  FIX(1.414213562)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_613125930  FIX(2.613125930)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
+ * multiplication will do.  For 12-bit data, the multiplier table is
+ * declared INT32, so a 32-bit multiply will be used.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
+#else
+#define DEQUANTIZE(coef,quantval)  \
+	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
+#endif
+
+
+/* Like DESCALE, but applies to a DCTELEM and produces an int.
+ * We assume that int right shift is unsigned if INT32 right shift is.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	DCTELEM ishift_temp;
+#if BITS_IN_JSAMPLE == 8
+#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
+#else
+#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
+#endif
+#define IRIGHT_SHIFT(x,shft)  \
+    ((ishift_temp = (x)) < 0 ? \
+     (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
+     (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+#ifdef USE_ACCURATE_ROUNDING
+#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))
+#else
+#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT(x, n))
+#endif
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  IFAST_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS			/* for DESCALE */
+  ISHIFT_TEMPS			/* for IDESCALE */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+    wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+    wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+    wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+    wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
+    wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+    wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
+    wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+    
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+
+    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
+    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
+
+    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
+    tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
+	    - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
+    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
+    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
+    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jidctint.c b/plugins/AdvaImg/src/LibJPEG/jidctint.c
index dcdf7ce454..49ef79f560 100644
--- a/plugins/AdvaImg/src/LibJPEG/jidctint.c
+++ b/plugins/AdvaImg/src/LibJPEG/jidctint.c
@@ -1,5137 +1,5137 @@
-/*
- * jidctint.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a slow-but-accurate integer implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on an algorithm described in
- *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
- *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
- *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
- * The primary algorithm described there uses 11 multiplies and 29 adds.
- * We use their alternate method with 12 multiplies and 32 adds.
- * The advantage of this method is that no data path contains more than one
- * multiplication; this allows a very simple and accurate implementation in
- * scaled fixed-point arithmetic, with a minimal number of shifts.
- *
- * We also provide IDCT routines with various output sample block sizes for
- * direct resolution reduction or enlargement and for direct resolving the
- * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
- * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
- *
- * For N<8 we simply take the corresponding low-frequency coefficients of
- * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
- * to yield the downscaled outputs.
- * This can be seen as direct low-pass downsampling from the DCT domain
- * point of view rather than the usual spatial domain point of view,
- * yielding significant computational savings and results at least
- * as good as common bilinear (averaging) spatial downsampling.
- *
- * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
- * lower frequencies and higher frequencies assumed to be zero.
- * It turns out that the computational effort is similar to the 8x8 IDCT
- * regarding the output size.
- * Furthermore, the scaling and descaling is the same for all IDCT sizes.
- *
- * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
- * since there would be too many additional constants to pre-calculate.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_ISLOW_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/*
- * The poop on this scaling stuff is as follows:
- *
- * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
- * larger than the true IDCT outputs.  The final outputs are therefore
- * a factor of N larger than desired; since N=8 this can be cured by
- * a simple right shift at the end of the algorithm.  The advantage of
- * this arrangement is that we save two multiplications per 1-D IDCT,
- * because the y0 and y4 inputs need not be divided by sqrt(N).
- *
- * We have to do addition and subtraction of the integer inputs, which
- * is no problem, and multiplication by fractional constants, which is
- * a problem to do in integer arithmetic.  We multiply all the constants
- * by CONST_SCALE and convert them to integer constants (thus retaining
- * CONST_BITS bits of precision in the constants).  After doing a
- * multiplication we have to divide the product by CONST_SCALE, with proper
- * rounding, to produce the correct output.  This division can be done
- * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
- * as long as possible so that partial sums can be added together with
- * full fractional precision.
- *
- * The outputs of the first pass are scaled up by PASS1_BITS bits so that
- * they are represented to better-than-integral precision.  These outputs
- * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
- * with the recommended scaling.  (To scale up 12-bit sample data further, an
- * intermediate INT32 array would be needed.)
- *
- * To avoid overflow of the 32-bit intermediate results in pass 2, we must
- * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
- * shows that the values given below are the most effective.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  13
-#define PASS1_BITS  2
-#else
-#define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
-#else
-#define FIX_0_298631336  FIX(0.298631336)
-#define FIX_0_390180644  FIX(0.390180644)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_765366865  FIX(0.765366865)
-#define FIX_0_899976223  FIX(0.899976223)
-#define FIX_1_175875602  FIX(1.175875602)
-#define FIX_1_501321110  FIX(1.501321110)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_1_961570560  FIX(1.961570560)
-#define FIX_2_053119869  FIX(2.053119869)
-#define FIX_2_562915447  FIX(2.562915447)
-#define FIX_3_072711026  FIX(3.072711026)
-#endif
-
-
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
- * For 8-bit samples with the recommended scaling, all the variable
- * and constant values involved are no more than 16 bits wide, so a
- * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
- * For 12-bit samples, a full 32-bit multiplication will be needed.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
-#else
-#define MULTIPLY(var,const)  ((var) * (const))
-#endif
-
-
-/* Dequantize a coefficient by multiplying it by the multiplier-table
- * entry; produce an int result.  In this module, both inputs and result
- * are 16 bits or less, so either int or short multiply will work.
- */
-
-#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- */
-
-GLOBAL(void)
-jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 <<= CONST_BITS;
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0 = z2 + z3;
-    tmp1 = z2 - z3;
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-    
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * On machines with very fast multiplication, it's possible that the
-     * test takes more time than it's worth.  In that case this section
-     * may be commented out.
-     */
-
-#ifndef NO_ZERO_ROW_TEST
-    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
-      /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-
-      outptr[0] = dcval;
-      outptr[1] = dcval;
-      outptr[2] = dcval;
-      outptr[3] = dcval;
-      outptr[4] = dcval;
-      outptr[5] = dcval;
-      outptr[6] = dcval;
-      outptr[7] = dcval;
-
-      wsptr += DCTSIZE;		/* advance pointer to next row */
-      continue;
-    }
-#endif
-
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-
-    /* Add fudge factor here for final descale. */
-    z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-    
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-#ifdef IDCT_SCALING_SUPPORTED
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 7x7 output block.
- *
- * Optimized algorithm with 12 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/14).
- */
-
-GLOBAL(void)
-jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[7*7];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp13 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
-    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp0 = z1 + z3;
-    z2 -= tmp0;
-    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
-    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
-    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
-    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-
-    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
-    tmp1 += tmp2;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
-    tmp0 += z2;
-    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 7 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp13 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
-    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp0 = z1 + z3;
-    z2 -= tmp0;
-    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
-    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
-    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
-    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-
-    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
-    tmp1 += tmp2;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
-    tmp0 += z2;
-    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 7;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 6x6 output block.
- *
- * Optimized algorithm with 3 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/12).
- */
-
-GLOBAL(void)
-jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[6*6];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*1] = (int) (tmp11 + tmp1);
-    wsptr[6*4] = (int) (tmp11 - tmp1);
-    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 6 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[4];
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = tmp0 - tmp10 - tmp10;
-    tmp10 = (INT32) wsptr[2];
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << CONST_BITS;
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 6;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 5x5 output block.
- *
- * Optimized algorithm with 5 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/10).
- */
-
-GLOBAL(void)
-jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[5*5];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp12 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
-    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
-    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
-
-    /* Final output stage */
-
-    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 5 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp12 <<= CONST_BITS;
-    tmp0 = (INT32) wsptr[2];
-    tmp1 = (INT32) wsptr[4];
-    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
-    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
-    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 5;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 4x4 output block.
- *
- * Optimized algorithm with 3 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
- */
-
-GLOBAL(void)
-jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[4*4];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    
-    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
-    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
-		       CONST_BITS-PASS1_BITS);
-    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
-		       CONST_BITS-PASS1_BITS);
-
-    /* Final output stage */
-
-    wsptr[4*0] = (int) (tmp10 + tmp0);
-    wsptr[4*3] = (int) (tmp10 - tmp0);
-    wsptr[4*1] = (int) (tmp12 + tmp2);
-    wsptr[4*2] = (int) (tmp12 - tmp2);
-  }
-
-  /* Pass 2: process 4 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp2 = (INT32) wsptr[2];
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 4;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 3x3 output block.
- *
- * Optimized algorithm with 2 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/6).
- */
-
-GLOBAL(void)
-jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[3*3];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 3 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[2];
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = (INT32) wsptr[1];
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 3;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 2x2 output block.
- *
- * Multiplication-less algorithm.
- */
-
-GLOBAL(void)
-jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-
-  /* Column 0 */
-  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
-  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
-  /* Add fudge factor here for final descale. */
-  tmp4 += ONE << 2;
-
-  tmp0 = tmp4 + tmp5;
-  tmp2 = tmp4 - tmp5;
-
-  /* Column 1 */
-  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
-  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
-
-  tmp1 = tmp4 + tmp5;
-  tmp3 = tmp4 - tmp5;
-
-  /* Pass 2: process 2 rows, store into output array. */
-
-  /* Row 0 */
-  outptr = output_buf[0] + output_col;
-
-  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
-
-  /* Row 1 */
-  outptr = output_buf[1] + output_col;
-
-  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 1x1 output block.
- *
- * We hardly need an inverse DCT routine for this: just take the
- * average pixel value, which is one-eighth of the DC coefficient.
- */
-
-GLOBAL(void)
-jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  int dcval;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  SHIFT_TEMPS
-
-  /* 1x1 is trivial: just take the DC coefficient divided by 8. */
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
-  dcval = (int) DESCALE((INT32) dcval, 3);
-
-  output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 9x9 output block.
- *
- * Optimized algorithm with 10 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/18).
- */
-
-GLOBAL(void)
-jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*9];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
-    tmp1 = tmp0 + tmp3;
-    tmp2 = tmp0 - tmp3 - tmp3;
-
-    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
-    tmp11 = tmp2 + tmp0;
-    tmp14 = tmp2 - tmp0 - tmp0;
-
-    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
-    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
-    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
-
-    tmp10 = tmp1 + tmp0 - tmp3;
-    tmp12 = tmp1 - tmp0 + tmp2;
-    tmp13 = tmp1 - tmp2 + tmp3;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
-
-    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
-    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
-    tmp0 = tmp2 + tmp3 - z2;
-    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
-    tmp2 += z2 - tmp1;
-    tmp3 += z2 + tmp1;
-    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 9 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 9; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp0 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
-    tmp1 = tmp0 + tmp3;
-    tmp2 = tmp0 - tmp3 - tmp3;
-
-    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
-    tmp11 = tmp2 + tmp0;
-    tmp14 = tmp2 - tmp0 - tmp0;
-
-    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
-    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
-    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
-
-    tmp10 = tmp1 + tmp0 - tmp3;
-    tmp12 = tmp1 - tmp0 + tmp2;
-    tmp13 = tmp1 - tmp2 + tmp3;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
-
-    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
-    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
-    tmp0 = tmp2 + tmp3 - z2;
-    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
-    tmp2 += z2 - tmp1;
-    tmp3 += z2 + tmp1;
-    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 10x10 output block.
- *
- * Optimized algorithm with 12 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/20).
- */
-
-GLOBAL(void)
-jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4, z5;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*10];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-    z5 = z3 << CONST_BITS;
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z5 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) (tmp22 + tmp12);
-    wsptr[8*7] = (int) (tmp22 - tmp12);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 10 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 10; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z3 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 11x11 output block.
- *
- * Optimized algorithm with 24 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/22).
- */
-
-GLOBAL(void)
-jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*11];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp10 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
-    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
-    z4 = z1 + z3;
-    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
-    z4 -= z2;
-    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
-    tmp21 = tmp20 + tmp23 + tmp25 -
-	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
-    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
-    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
-    tmp24 += tmp25;
-    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
-    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
-	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
-    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z1 + z2;
-    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
-    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
-    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
-    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
-    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
-    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
-    tmp11 += z1;
-    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
-    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
-	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
-	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 11 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 11; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp10 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
-    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
-    z4 = z1 + z3;
-    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
-    z4 -= z2;
-    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
-    tmp21 = tmp20 + tmp23 + tmp25 -
-	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
-    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
-    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
-    tmp24 += tmp25;
-    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
-    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
-	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
-    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z1 + z2;
-    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
-    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
-    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
-    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
-    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
-    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
-    tmp11 += z1;
-    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
-    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
-	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
-	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 12x12 output block.
- *
- * Optimized algorithm with 15 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/24).
- */
-
-GLOBAL(void)
-jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*12];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 12 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 12; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 <<= CONST_BITS;
-
-    z4 = (INT32) wsptr[4];
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = (INT32) wsptr[2];
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = (INT32) wsptr[6];
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 13x13 output block.
- *
- * Optimized algorithm with 29 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/26).
- */
-
-GLOBAL(void)
-jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*13];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
-
-    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
-    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
-
-    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
-    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
-
-    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
-    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
-
-    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
-    tmp15 = z1 + z4;
-    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
-    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
-    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
-    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
-    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
-    tmp11 += tmp14;
-    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
-    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
-    tmp12 += tmp14;
-    tmp13 += tmp14;
-    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
-    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
-	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
-    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
-    tmp14 += z1;
-    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
-	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 13 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 13; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z1 <<= CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[4];
-    z4 = (INT32) wsptr[6];
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
-
-    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
-    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
-
-    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
-    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
-
-    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
-    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
-
-    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
-    tmp15 = z1 + z4;
-    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
-    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
-    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
-    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
-    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
-    tmp11 += tmp14;
-    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
-    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
-    tmp12 += tmp14;
-    tmp13 += tmp14;
-    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
-    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
-	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
-    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
-    tmp14 += z1;
-    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
-	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 14x14 output block.
- *
- * Optimized algorithm with 20 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/28).
- */
-
-GLOBAL(void)
-jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*14];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp13 = z4 << CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
-    tmp16 += tmp15;
-    z1    += z4;
-    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
-    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
-    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
-    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
-
-    tmp13 = (z1 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) (tmp23 + tmp13);
-    wsptr[8*10] = (int) (tmp23 - tmp13);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 14 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 14; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z1 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-    z4 <<= CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
-    tmp16 += tmp15;
-    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
-    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
-    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
-    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
-
-    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 15x15 output block.
- *
- * Optimized algorithm with 22 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/30).
- */
-
-GLOBAL(void)
-jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*15];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
-    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
-
-    tmp12 = z1 - tmp10;
-    tmp13 = z1 + tmp11;
-    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
-
-    z4 = z2 - z3;
-    z3 += z2;
-    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
-    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
-
-    tmp20 = tmp13 + tmp10 + tmp11;
-    tmp23 = tmp12 - tmp10 + tmp11 + z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
-
-    tmp25 = tmp13 - tmp10 - tmp11;
-    tmp26 = tmp12 + tmp10 - tmp11 - z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
-
-    tmp21 = tmp12 + tmp10 + tmp11;
-    tmp24 = tmp13 - tmp10 + tmp11;
-    tmp11 += tmp11;
-    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
-    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp13 = z2 - z4;
-    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
-    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
-    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
-
-    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
-    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
-    z2 = z1 - z4;
-    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
-
-    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
-    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
-    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
-    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
-    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
-    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 15 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 15; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z1 <<= CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[4];
-    z4 = (INT32) wsptr[6];
-
-    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
-    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
-
-    tmp12 = z1 - tmp10;
-    tmp13 = z1 + tmp11;
-    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
-
-    z4 = z2 - z3;
-    z3 += z2;
-    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
-    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
-
-    tmp20 = tmp13 + tmp10 + tmp11;
-    tmp23 = tmp12 - tmp10 + tmp11 + z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
-
-    tmp25 = tmp13 - tmp10 - tmp11;
-    tmp26 = tmp12 + tmp10 - tmp11 - z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
-
-    tmp21 = tmp12 + tmp10 + tmp11;
-    tmp24 = tmp13 - tmp10 + tmp11;
-    tmp11 += tmp11;
-    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
-    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z4 = (INT32) wsptr[5];
-    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
-    z4 = (INT32) wsptr[7];
-
-    tmp13 = z2 - z4;
-    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
-    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
-    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
-
-    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
-    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
-    z2 = z1 - z4;
-    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
-
-    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
-    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
-    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
-    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
-    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
-    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 16x16 output block.
- *
- * Optimized algorithm with 28 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/32).
- */
-
-GLOBAL(void)
-jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*16];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 16 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 16; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp0 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[4];
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 16x8 output block.
- *
- * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*8];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-    
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-      
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-    
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
-    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 <<= CONST_BITS;
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0 = z2 + z3;
-    tmp1 = z2 - z3;
-    
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-    
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-    
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-    
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
-    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-    
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process 8 rows from work array, store into output array.
-   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp0 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[4];
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 14x7 output block.
- *
- * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*7];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp23 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
-    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
-    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp10 = z1 + z3;
-    z2 -= tmp10;
-    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
-    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
-    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
-    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
-    tmp10 = tmp11 - tmp12;
-    tmp11 += tmp12;
-    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
-    tmp11 += tmp12;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
-    tmp10 += z2;
-    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 7 rows from work array, store into output array.
-   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z1 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-    z4 <<= CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
-    tmp16 += tmp15;
-    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
-    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
-    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
-    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
-
-    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 12x6 output block.
- *
- * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*6];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp10 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
-    tmp11 = tmp10 + tmp20;
-    tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
-    tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
-    tmp20 = tmp11 + tmp10;
-    tmp22 = tmp11 - tmp10;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
-    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
-    tmp11 = (z1 - z2 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) (tmp21 + tmp11);
-    wsptr[8*4] = (int) (tmp21 - tmp11);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 6 rows from work array, store into output array.
-   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 <<= CONST_BITS;
-
-    z4 = (INT32) wsptr[4];
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = (INT32) wsptr[2];
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = (INT32) wsptr[6];
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 10x5 output block.
- *
- * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*5];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp12 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
-    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
-    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 5 rows from work array, store into output array.
-   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z3 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 8x4 output block.
- *
- * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*4];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-
-    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
-    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
-		       CONST_BITS-PASS1_BITS);
-    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
-		       CONST_BITS-PASS1_BITS);
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) (tmp10 + tmp0);
-    wsptr[8*3] = (int) (tmp10 - tmp0);
-    wsptr[8*1] = (int) (tmp12 + tmp2);
-    wsptr[8*2] = (int) (tmp12 - tmp2);
-  }
-
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
-    /* Add fudge factor here for final descale. */
-    z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 = (INT32) wsptr[4];
-    
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-    
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 6x3 output block.
- *
- * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[6*3];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
-  }
-  
-  /* Pass 2: process 3 rows from work array, store into output array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[4];
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = tmp0 - tmp10 - tmp10;
-    tmp10 = (INT32) wsptr[2];
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << CONST_BITS;
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 6;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 4x2 output block.
- *
- * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  INT32 * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  INT32 workspace[4*2];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-
-    /* Odd part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    /* Final output stage */
-
-    wsptr[4*0] = tmp10 + tmp0;
-    wsptr[4*1] = tmp10 - tmp0;
-  }
-
-  /* Pass 2: process 2 rows from work array, store into output array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 2; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = wsptr[0] + (ONE << 2);
-    tmp2 = wsptr[2];
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = wsptr[1];
-    z3 = wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 4;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 2x1 output block.
- *
- * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp10;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  SHIFT_TEMPS
-
-  /* Pass 1: empty. */
-
-  /* Pass 2: process 1 row from input, store into output array. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  outptr = output_buf[0] + output_col;
-
-  /* Even part */
-
-  tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]);
-  /* Add fudge factor here for final descale. */
-  tmp10 += ONE << 2;
-
-  /* Odd part */
-
-  tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]);
-
-  /* Final output stage */
-
-  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) & RANGE_MASK];
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 8x16 output block.
- *
- * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*16];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
-  }
-  
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 16; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-    
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
-    /* Add fudge factor here for final descale. */
-    z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    z3 = (INT32) wsptr[4];
-    
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-    
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-    
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-    
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
-    
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-    
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 7x14 output block.
- *
- * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[7*14];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp13 = z4 << CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
-    tmp16 += tmp15;
-    z1    += z4;
-    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
-    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
-    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
-    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
-
-    tmp13 = (z1 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[7*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[7*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[7*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[7*3]  = (int) (tmp23 + tmp13);
-    wsptr[7*10] = (int) (tmp23 - tmp13);
-    wsptr[7*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[7*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[7*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[7*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[7*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[7*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 14 rows from work array, store into output array.
-   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 14; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp23 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp23 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
-    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
-    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp10 = z1 + z3;
-    z2 -= tmp10;
-    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
-    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
-    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
-    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
-    tmp10 = tmp11 - tmp12;
-    tmp11 += tmp12;
-    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
-    tmp11 += tmp12;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
-    tmp10 += z2;
-    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 7;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 6x12 output block.
- *
- * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[6*12];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    wsptr[6*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[6*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[6*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[6*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[6*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[6*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[6*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[6*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[6*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[6*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 12 rows from work array, store into output array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 12; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp10 <<= CONST_BITS;
-    tmp12 = (INT32) wsptr[4];
-    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
-    tmp11 = tmp10 + tmp20;
-    tmp21 = tmp10 - tmp20 - tmp20;
-    tmp20 = (INT32) wsptr[2];
-    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
-    tmp20 = tmp11 + tmp10;
-    tmp22 = tmp11 - tmp10;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
-    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
-    tmp11 = (z1 - z2 - z3) << CONST_BITS;
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 6;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 5x10 output block.
- *
- * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4, z5;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[5*10];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-    z5 = z3 << CONST_BITS;
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z5 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[5*2] = (int) (tmp22 + tmp12);
-    wsptr[5*7] = (int) (tmp22 - tmp12);
-    wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 10 rows from work array, store into output array.
-   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 10; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp12 <<= CONST_BITS;
-    tmp13 = (INT32) wsptr[2];
-    tmp14 = (INT32) wsptr[4];
-    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
-    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
-    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 5;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 4x8 output block.
- *
- * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[4*8];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 4; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-
-      wsptr[4*0] = dcval;
-      wsptr[4*1] = dcval;
-      wsptr[4*2] = dcval;
-      wsptr[4*3] = dcval;
-      wsptr[4*4] = dcval;
-      wsptr[4*5] = dcval;
-      wsptr[4*6] = dcval;
-      wsptr[4*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
-    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 <<= CONST_BITS;
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0 = z2 + z3;
-    tmp1 = z2 - z3;
-    
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process 8 rows from work array, store into output array.
-   * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp2 = (INT32) wsptr[2];
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    
-    wsptr += 4;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 3x6 output block.
- *
- * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[3*6];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*1] = (int) (tmp11 + tmp1);
-    wsptr[3*4] = (int) (tmp11 - tmp1);
-    wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 6 rows from work array, store into output array.
-   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
-   */
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[2];
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = (INT32) wsptr[1];
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 3;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 2x4 output block.
- *
- * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  INT32 * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  INT32 workspace[2*4];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    wsptr[2*0] = tmp10 + tmp0;
-    wsptr[2*3] = tmp10 - tmp0;
-    wsptr[2*1] = tmp12 + tmp2;
-    wsptr[2*2] = tmp12 - tmp2;
-  }
-
-  /* Pass 2: process 4 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp10 = wsptr[0] + (ONE << (CONST_BITS+2));
-
-    /* Odd part */
-
-    tmp0 = wsptr[1];
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 2;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 1x2 output block.
- *
- * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp10;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  SHIFT_TEMPS
-
-  /* Process 1 column from input, store into output array. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-
-  /* Even part */
-    
-  tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
-  /* Add fudge factor here for final descale. */
-  tmp10 += ONE << 2;
-
-  /* Odd part */
-
-  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-  /* Final output stage */
-
-  output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3)
-					  & RANGE_MASK];
-  output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3)
-					  & RANGE_MASK];
-}
-
-#endif /* IDCT_SCALING_SUPPORTED */
-#endif /* DCT_ISLOW_SUPPORTED */
+/*
+ * jidctint.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modification developed 2002-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide IDCT routines with various output sample block sizes for
+ * direct resolution reduction or enlargement and for direct resolving the
+ * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
+ * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
+ *
+ * For N<8 we simply take the corresponding low-frequency coefficients of
+ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
+ * to yield the downscaled outputs.
+ * This can be seen as direct low-pass downsampling from the DCT domain
+ * point of view rather than the usual spatial domain point of view,
+ * yielding significant computational savings and results at least
+ * as good as common bilinear (averaging) spatial downsampling.
+ *
+ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
+ * lower frequencies and higher frequencies assumed to be zero.
+ * It turns out that the computational effort is similar to the 8x8 IDCT
+ * regarding the output size.
+ * Furthermore, the scaling and descaling is the same for all IDCT sizes.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate INT32 array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 <<= CONST_BITS;
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0 = z2 + z3;
+    tmp1 = z2 - z3;
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+    
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+
+    /* Add fudge factor here for final descale. */
+    z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+    
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 7x7 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/14).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[7*7];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp13 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 7 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp13 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 7;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x6 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/12).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*6];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*1] = (int) (tmp11 + tmp1);
+    wsptr[6*4] = (int) (tmp11 - tmp1);
+    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[4];
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = tmp0 - tmp10 - tmp10;
+    tmp10 = (INT32) wsptr[2];
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 6;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 5x5 output block.
+ *
+ * Optimized algorithm with 5 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/10).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[5*5];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp12 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 5 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp12 <<= CONST_BITS;
+    tmp0 = (INT32) wsptr[2];
+    tmp1 = (INT32) wsptr[4];
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 5;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 4x4 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+ */
+
+GLOBAL(void)
+jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[4*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    
+    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
+    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
+		       CONST_BITS-PASS1_BITS);
+    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
+		       CONST_BITS-PASS1_BITS);
+
+    /* Final output stage */
+
+    wsptr[4*0] = (int) (tmp10 + tmp0);
+    wsptr[4*3] = (int) (tmp10 - tmp0);
+    wsptr[4*1] = (int) (tmp12 + tmp2);
+    wsptr[4*2] = (int) (tmp12 - tmp2);
+  }
+
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp2 = (INT32) wsptr[2];
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 4;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x3 output block.
+ *
+ * Optimized algorithm with 2 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/6).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[3*3];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 3 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[2];
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = (INT32) wsptr[1];
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 3;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 2x2 output block.
+ *
+ * Multiplication-less algorithm.
+ */
+
+GLOBAL(void)
+jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+  /* Column 0 */
+  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+  /* Add fudge factor here for final descale. */
+  tmp4 += ONE << 2;
+
+  tmp0 = tmp4 + tmp5;
+  tmp2 = tmp4 - tmp5;
+
+  /* Column 1 */
+  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
+  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
+
+  tmp1 = tmp4 + tmp5;
+  tmp3 = tmp4 - tmp5;
+
+  /* Pass 2: process 2 rows, store into output array. */
+
+  /* Row 0 */
+  outptr = output_buf[0] + output_col;
+
+  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
+
+  /* Row 1 */
+  outptr = output_buf[1] + output_col;
+
+  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 1x1 output block.
+ *
+ * We hardly need an inverse DCT routine for this: just take the
+ * average pixel value, which is one-eighth of the DC coefficient.
+ */
+
+GLOBAL(void)
+jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  int dcval;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  SHIFT_TEMPS
+
+  /* 1x1 is trivial: just take the DC coefficient divided by 8. */
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
+  dcval = (int) DESCALE((INT32) dcval, 3);
+
+  output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 9x9 output block.
+ *
+ * Optimized algorithm with 10 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/18).
+ */
+
+GLOBAL(void)
+jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*9];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 9 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 9; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x10 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/20).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*10];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+    z5 = z3 << CONST_BITS;
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z5 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) (tmp22 + tmp12);
+    wsptr[8*7] = (int) (tmp22 - tmp12);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 10 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 10; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z3 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 11x11 output block.
+ *
+ * Optimized algorithm with 24 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/22).
+ */
+
+GLOBAL(void)
+jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*11];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 11 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 11; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp10 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x12 output block.
+ *
+ * Optimized algorithm with 15 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/24).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*12];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 12 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 12; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 <<= CONST_BITS;
+
+    z4 = (INT32) wsptr[4];
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = (INT32) wsptr[2];
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = (INT32) wsptr[6];
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 13x13 output block.
+ *
+ * Optimized algorithm with 29 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/26).
+ */
+
+GLOBAL(void)
+jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*13];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 13 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 13; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x14 output block.
+ *
+ * Optimized algorithm with 20 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/28).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*14];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp13 = z4 << CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
+    tmp16 += tmp15;
+    z1    += z4;
+    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
+    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
+    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
+
+    tmp13 = (z1 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) (tmp23 + tmp13);
+    wsptr[8*10] = (int) (tmp23 - tmp13);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 14 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 14; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+    z4 <<= CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
+    tmp16 += tmp15;
+    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
+    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
+    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
+    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
+
+    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 15x15 output block.
+ *
+ * Optimized algorithm with 22 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/30).
+ */
+
+GLOBAL(void)
+jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*15];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 15 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 15; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z4 = (INT32) wsptr[5];
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = (INT32) wsptr[7];
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x16 output block.
+ *
+ * Optimized algorithm with 28 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/32).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*16];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 16 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 16; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[4];
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x8 output block.
+ *
+ * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*8];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 <<= CONST_BITS;
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0 = z2 + z3;
+    tmp1 = z2 - z3;
+    
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+    
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+    
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+    
+    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+    
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process 8 rows from work array, store into output array.
+   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[4];
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x7 output block.
+ *
+ * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*7];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp23 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
+    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
+    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp10 = z1 + z3;
+    z2 -= tmp10;
+    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
+    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
+    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
+    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
+    tmp10 = tmp11 - tmp12;
+    tmp11 += tmp12;
+    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
+    tmp11 += tmp12;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
+    tmp10 += z2;
+    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 7 rows from work array, store into output array.
+   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+    z4 <<= CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
+    tmp16 += tmp15;
+    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
+    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
+    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
+    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
+
+    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x6 output block.
+ *
+ * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*6];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
+    tmp11 = tmp10 + tmp20;
+    tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
+    tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
+    tmp20 = tmp11 + tmp10;
+    tmp22 = tmp11 - tmp10;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
+    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
+    tmp11 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) (tmp21 + tmp11);
+    wsptr[8*4] = (int) (tmp21 - tmp11);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array.
+   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 <<= CONST_BITS;
+
+    z4 = (INT32) wsptr[4];
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = (INT32) wsptr[2];
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = (INT32) wsptr[6];
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x5 output block.
+ *
+ * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*5];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp12 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
+    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
+    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 5 rows from work array, store into output array.
+   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z3 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 8x4 output block.
+ *
+ * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+
+    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
+    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
+		       CONST_BITS-PASS1_BITS);
+    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
+		       CONST_BITS-PASS1_BITS);
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) (tmp10 + tmp0);
+    wsptr[8*3] = (int) (tmp10 - tmp0);
+    wsptr[8*1] = (int) (tmp12 + tmp2);
+    wsptr[8*2] = (int) (tmp12 - tmp2);
+  }
+
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+    
+    /* Add fudge factor here for final descale. */
+    z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 = (INT32) wsptr[4];
+    
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+    
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x3 output block.
+ *
+ * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*3];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+  }
+  
+  /* Pass 2: process 3 rows from work array, store into output array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[4];
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = tmp0 - tmp10 - tmp10;
+    tmp10 = (INT32) wsptr[2];
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 6;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 4x2 output block.
+ *
+ * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  INT32 * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  INT32 workspace[4*2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+    /* Odd part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    /* Final output stage */
+
+    wsptr[4*0] = tmp10 + tmp0;
+    wsptr[4*1] = tmp10 - tmp0;
+  }
+
+  /* Pass 2: process 2 rows from work array, store into output array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = wsptr[0] + (ONE << 2);
+    tmp2 = wsptr[2];
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = wsptr[1];
+    z3 = wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 4;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 2x1 output block.
+ *
+ * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp10;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  SHIFT_TEMPS
+
+  /* Pass 1: empty. */
+
+  /* Pass 2: process 1 row from input, store into output array. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  outptr = output_buf[0] + output_col;
+
+  /* Even part */
+
+  tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]);
+  /* Add fudge factor here for final descale. */
+  tmp10 += ONE << 2;
+
+  /* Odd part */
+
+  tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]);
+
+  /* Final output stage */
+
+  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 8x16 output block.
+ *
+ * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*16];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 16; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+    
+    /* Add fudge factor here for final descale. */
+    z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 = (INT32) wsptr[4];
+    
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+    
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+    
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+    
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+    
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+    
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 7x14 output block.
+ *
+ * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[7*14];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp13 = z4 << CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
+    tmp16 += tmp15;
+    z1    += z4;
+    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
+    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
+    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
+
+    tmp13 = (z1 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[7*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[7*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[7*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[7*3]  = (int) (tmp23 + tmp13);
+    wsptr[7*10] = (int) (tmp23 - tmp13);
+    wsptr[7*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[7*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[7*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[7*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[7*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[7*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 14 rows from work array, store into output array.
+   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 14; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp23 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp23 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
+    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
+    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp10 = z1 + z3;
+    z2 -= tmp10;
+    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
+    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
+    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
+    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
+    tmp10 = tmp11 - tmp12;
+    tmp11 += tmp12;
+    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
+    tmp11 += tmp12;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
+    tmp10 += z2;
+    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 7;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 6x12 output block.
+ *
+ * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*12];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[6*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[6*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[6*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[6*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[6*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[6*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[6*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[6*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[6*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[6*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 12 rows from work array, store into output array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 12; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp10 <<= CONST_BITS;
+    tmp12 = (INT32) wsptr[4];
+    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
+    tmp11 = tmp10 + tmp20;
+    tmp21 = tmp10 - tmp20 - tmp20;
+    tmp20 = (INT32) wsptr[2];
+    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
+    tmp20 = tmp11 + tmp10;
+    tmp22 = tmp11 - tmp10;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
+    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
+    tmp11 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 6;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 5x10 output block.
+ *
+ * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[5*10];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+    z5 = z3 << CONST_BITS;
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z5 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[5*2] = (int) (tmp22 + tmp12);
+    wsptr[5*7] = (int) (tmp22 - tmp12);
+    wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 10 rows from work array, store into output array.
+   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 10; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp12 <<= CONST_BITS;
+    tmp13 = (INT32) wsptr[2];
+    tmp14 = (INT32) wsptr[4];
+    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
+    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
+    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 5;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 4x8 output block.
+ *
+ * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[4*8];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 4; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+      wsptr[4*0] = dcval;
+      wsptr[4*1] = dcval;
+      wsptr[4*2] = dcval;
+      wsptr[4*3] = dcval;
+      wsptr[4*4] = dcval;
+      wsptr[4*5] = dcval;
+      wsptr[4*6] = dcval;
+      wsptr[4*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 <<= CONST_BITS;
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0 = z2 + z3;
+    tmp1 = z2 - z3;
+    
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process 8 rows from work array, store into output array.
+   * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp2 = (INT32) wsptr[2];
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    
+    wsptr += 4;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x6 output block.
+ *
+ * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[3*6];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*1] = (int) (tmp11 + tmp1);
+    wsptr[3*4] = (int) (tmp11 - tmp1);
+    wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array.
+   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+   */
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[2];
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = (INT32) wsptr[1];
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 3;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 2x4 output block.
+ *
+ * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  INT32 * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  INT32 workspace[2*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    wsptr[2*0] = tmp10 + tmp0;
+    wsptr[2*3] = tmp10 - tmp0;
+    wsptr[2*1] = tmp12 + tmp2;
+    wsptr[2*2] = tmp12 - tmp2;
+  }
+
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = wsptr[0] + (ONE << (CONST_BITS+2));
+
+    /* Odd part */
+
+    tmp0 = wsptr[1];
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 2;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 1x2 output block.
+ *
+ * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp10;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  SHIFT_TEMPS
+
+  /* Process 1 column from input, store into output array. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+  /* Even part */
+    
+  tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+  /* Add fudge factor here for final descale. */
+  tmp10 += ONE << 2;
+
+  /* Odd part */
+
+  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+  /* Final output stage */
+
+  output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3)
+					  & RANGE_MASK];
+  output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3)
+					  & RANGE_MASK];
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jinclude.h b/plugins/AdvaImg/src/LibJPEG/jinclude.h
index 0a4f15146a..5ff60fedf4 100644
--- a/plugins/AdvaImg/src/LibJPEG/jinclude.h
+++ b/plugins/AdvaImg/src/LibJPEG/jinclude.h
@@ -1,91 +1,91 @@
-/*
- * jinclude.h
- *
- * Copyright (C) 1991-1994, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file exists to provide a single place to fix any problems with
- * including the wrong system include files.  (Common problems are taken
- * care of by the standard jconfig symbols, but on really weird systems
- * you may have to edit this file.)
- *
- * NOTE: this file is NOT intended to be included by applications using the
- * JPEG library.  Most applications need only include jpeglib.h.
- */
-
-
-/* Include auto-config file to find out which system include files we need. */
-
-#include "jconfig.h"		/* auto configuration options */
-#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
-
-/*
- * We need the NULL macro and size_t typedef.
- * On an ANSI-conforming system it is sufficient to include <stddef.h>.
- * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
- * pull in <sys/types.h> as well.
- * Note that the core JPEG library does not require <stdio.h>;
- * only the default error handler and data source/destination modules do.
- * But we must pull it in because of the references to FILE in jpeglib.h.
- * You can remove those references if you want to compile without <stdio.h>.
- */
-
-#ifdef HAVE_STDDEF_H
-#include <stddef.h>
-#endif
-
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-
-#ifdef NEED_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-#include <stdio.h>
-
-/*
- * We need memory copying and zeroing functions, plus strncpy().
- * ANSI and System V implementations declare these in <string.h>.
- * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
- * Some systems may declare memset and memcpy in <memory.h>.
- *
- * NOTE: we assume the size parameters to these functions are of type size_t.
- * Change the casts in these macros if not!
- */
-
-#ifdef NEED_BSD_STRINGS
-
-#include <strings.h>
-#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
-#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
-
-#else /* not BSD, assume ANSI/SysV string lib */
-
-#include <string.h>
-#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
-#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
-
-#endif
-
-/*
- * In ANSI C, and indeed any rational implementation, size_t is also the
- * type returned by sizeof().  However, it seems there are some irrational
- * implementations out there, in which sizeof() returns an int even though
- * size_t is defined as long or unsigned long.  To ensure consistent results
- * we always use this SIZEOF() macro in place of using sizeof() directly.
- */
-
-#define SIZEOF(object)	((size_t) sizeof(object))
-
-/*
- * The modules that use fread() and fwrite() always invoke them through
- * these macros.  On some systems you may need to twiddle the argument casts.
- * CAUTION: argument order is different from underlying functions!
- */
-
-#define JFREAD(file,buf,sizeofbuf)  \
-  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
-#define JFWRITE(file,buf,sizeofbuf)  \
-  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+/*
+ * jinclude.h
+ *
+ * Copyright (C) 1991-1994, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file exists to provide a single place to fix any problems with
+ * including the wrong system include files.  (Common problems are taken
+ * care of by the standard jconfig symbols, but on really weird systems
+ * you may have to edit this file.)
+ *
+ * NOTE: this file is NOT intended to be included by applications using the
+ * JPEG library.  Most applications need only include jpeglib.h.
+ */
+
+
+/* Include auto-config file to find out which system include files we need. */
+
+#include "jconfig.h"		/* auto configuration options */
+#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
+
+/*
+ * We need the NULL macro and size_t typedef.
+ * On an ANSI-conforming system it is sufficient to include <stddef.h>.
+ * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
+ * pull in <sys/types.h> as well.
+ * Note that the core JPEG library does not require <stdio.h>;
+ * only the default error handler and data source/destination modules do.
+ * But we must pull it in because of the references to FILE in jpeglib.h.
+ * You can remove those references if you want to compile without <stdio.h>.
+ */
+
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef NEED_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#include <stdio.h>
+
+/*
+ * We need memory copying and zeroing functions, plus strncpy().
+ * ANSI and System V implementations declare these in <string.h>.
+ * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
+ * Some systems may declare memset and memcpy in <memory.h>.
+ *
+ * NOTE: we assume the size parameters to these functions are of type size_t.
+ * Change the casts in these macros if not!
+ */
+
+#ifdef NEED_BSD_STRINGS
+
+#include <strings.h>
+#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
+#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+
+#else /* not BSD, assume ANSI/SysV string lib */
+
+#include <string.h>
+#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
+#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+
+#endif
+
+/*
+ * In ANSI C, and indeed any rational implementation, size_t is also the
+ * type returned by sizeof().  However, it seems there are some irrational
+ * implementations out there, in which sizeof() returns an int even though
+ * size_t is defined as long or unsigned long.  To ensure consistent results
+ * we always use this SIZEOF() macro in place of using sizeof() directly.
+ */
+
+#define SIZEOF(object)	((size_t) sizeof(object))
+
+/*
+ * The modules that use fread() and fwrite() always invoke them through
+ * these macros.  On some systems you may need to twiddle the argument casts.
+ * CAUTION: argument order is different from underlying functions!
+ */
+
+#define JFREAD(file,buf,sizeofbuf)  \
+  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFWRITE(file,buf,sizeofbuf)  \
+  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
diff --git a/plugins/AdvaImg/src/LibJPEG/jmemmgr.c b/plugins/AdvaImg/src/LibJPEG/jmemmgr.c
index f0e83fb950..ea9342022c 100644
--- a/plugins/AdvaImg/src/LibJPEG/jmemmgr.c
+++ b/plugins/AdvaImg/src/LibJPEG/jmemmgr.c
@@ -1,1119 +1,1119 @@
-/*
- * jmemmgr.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the JPEG system-independent memory management
- * routines.  This code is usable across a wide variety of machines; most
- * of the system dependencies have been isolated in a separate file.
- * The major functions provided here are:
- *   * pool-based allocation and freeing of memory;
- *   * policy decisions about how to divide available memory among the
- *     virtual arrays;
- *   * control logic for swapping virtual arrays between main memory and
- *     backing storage.
- * The separate system-dependent file provides the actual backing-storage
- * access code, and it contains the policy decision about how much total
- * main memory to use.
- * This file is system-dependent in the sense that some of its functions
- * are unnecessary in some systems.  For example, if there is enough virtual
- * memory so that backing storage will never be used, much of the virtual
- * array control logic could be removed.  (Of course, if you have that much
- * memory then you shouldn't care about a little bit of unused code...)
- */
-
-#define JPEG_INTERNALS
-#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef NO_GETENV
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
-extern char * getenv JPP((const char * name));
-#endif
-#endif
-
-
-/*
- * Some important notes:
- *   The allocation routines provided here must never return NULL.
- *   They should exit to error_exit if unsuccessful.
- *
- *   It's not a good idea to try to merge the sarray and barray routines,
- *   even though they are textually almost the same, because samples are
- *   usually stored as bytes while coefficients are shorts or ints.  Thus,
- *   in machines where byte pointers have a different representation from
- *   word pointers, the resulting machine code could not be the same.
- */
-
-
-/*
- * Many machines require storage alignment: longs must start on 4-byte
- * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
- * always returns pointers that are multiples of the worst-case alignment
- * requirement, and we had better do so too.
- * There isn't any really portable way to determine the worst-case alignment
- * requirement.  This module assumes that the alignment requirement is
- * multiples of sizeof(ALIGN_TYPE).
- * By default, we define ALIGN_TYPE as double.  This is necessary on some
- * workstations (where doubles really do need 8-byte alignment) and will work
- * fine on nearly everything.  If your machine has lesser alignment needs,
- * you can save a few bytes by making ALIGN_TYPE smaller.
- * The only place I know of where this will NOT work is certain Macintosh
- * 680x0 compilers that define double as a 10-byte IEEE extended float.
- * Doing 10-byte alignment is counterproductive because longwords won't be
- * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
- * such a compiler.
- */
-
-#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
-#define ALIGN_TYPE  double
-#endif
-
-
-/*
- * We allocate objects from "pools", where each pool is gotten with a single
- * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
- * overhead within a pool, except for alignment padding.  Each pool has a
- * header with a link to the next pool of the same class.
- * Small and large pool headers are identical except that the latter's
- * link pointer must be FAR on 80x86 machines.
- * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
- * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
- * of the alignment requirement of ALIGN_TYPE.
- */
-
-typedef union small_pool_struct * small_pool_ptr;
-
-typedef union small_pool_struct {
-  struct {
-    small_pool_ptr next;	/* next in list of pools */
-    size_t bytes_used;		/* how many bytes already used within pool */
-    size_t bytes_left;		/* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
-} small_pool_hdr;
-
-typedef union large_pool_struct FAR * large_pool_ptr;
-
-typedef union large_pool_struct {
-  struct {
-    large_pool_ptr next;	/* next in list of pools */
-    size_t bytes_used;		/* how many bytes already used within pool */
-    size_t bytes_left;		/* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
-} large_pool_hdr;
-
-
-/*
- * Here is the full definition of a memory manager object.
- */
-
-typedef struct {
-  struct jpeg_memory_mgr pub;	/* public fields */
-
-  /* Each pool identifier (lifetime class) names a linked list of pools. */
-  small_pool_ptr small_list[JPOOL_NUMPOOLS];
-  large_pool_ptr large_list[JPOOL_NUMPOOLS];
-
-  /* Since we only have one lifetime class of virtual arrays, only one
-   * linked list is necessary (for each datatype).  Note that the virtual
-   * array control blocks being linked together are actually stored somewhere
-   * in the small-pool list.
-   */
-  jvirt_sarray_ptr virt_sarray_list;
-  jvirt_barray_ptr virt_barray_list;
-
-  /* This counts total space obtained from jpeg_get_small/large */
-  long total_space_allocated;
-
-  /* alloc_sarray and alloc_barray set this value for use by virtual
-   * array routines.
-   */
-  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
-} my_memory_mgr;
-
-typedef my_memory_mgr * my_mem_ptr;
-
-
-/*
- * The control blocks for virtual arrays.
- * Note that these blocks are allocated in the "small" pool area.
- * System-dependent info for the associated backing store (if any) is hidden
- * inside the backing_store_info struct.
- */
-
-struct jvirt_sarray_control {
-  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
-};
-
-struct jvirt_barray_control {
-  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_barray_ptr next;	/* link to next virtual barray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
-};
-
-
-#ifdef MEM_STATS		/* optional extra stuff for statistics */
-
-LOCAL(void)
-print_mem_stats (j_common_ptr cinfo, int pool_id)
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  small_pool_ptr shdr_ptr;
-  large_pool_ptr lhdr_ptr;
-
-  /* Since this is only a debugging stub, we can cheat a little by using
-   * fprintf directly rather than going through the trace message code.
-   * This is helpful because message parm array can't handle longs.
-   */
-  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
-	  pool_id, mem->total_space_allocated);
-
-  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
-       lhdr_ptr = lhdr_ptr->hdr.next) {
-    fprintf(stderr, "  Large chunk used %ld\n",
-	    (long) lhdr_ptr->hdr.bytes_used);
-  }
-
-  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
-       shdr_ptr = shdr_ptr->hdr.next) {
-    fprintf(stderr, "  Small chunk used %ld free %ld\n",
-	    (long) shdr_ptr->hdr.bytes_used,
-	    (long) shdr_ptr->hdr.bytes_left);
-  }
-}
-
-#endif /* MEM_STATS */
-
-
-LOCAL(void)
-out_of_memory (j_common_ptr cinfo, int which)
-/* Report an out-of-memory error and stop execution */
-/* If we compiled MEM_STATS support, report alloc requests before dying */
-{
-#ifdef MEM_STATS
-  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
-#endif
-  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
-}
-
-
-/*
- * Allocation of "small" objects.
- *
- * For these, we use pooled storage.  When a new pool must be created,
- * we try to get enough space for the current request plus a "slop" factor,
- * where the slop will be the amount of leftover space in the new pool.
- * The speed vs. space tradeoff is largely determined by the slop values.
- * A different slop value is provided for each pool class (lifetime),
- * and we also distinguish the first pool of a class from later ones.
- * NOTE: the values given work fairly well on both 16- and 32-bit-int
- * machines, but may be too small if longs are 64 bits or more.
- */
-
-static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
-{
-	1600,			/* first PERMANENT pool */
-	16000			/* first IMAGE pool */
-};
-
-static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
-{
-	0,			/* additional PERMANENT pools */
-	5000			/* additional IMAGE pools */
-};
-
-#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
-
-
-METHODDEF(void *)
-alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
-/* Allocate a "small" object */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  small_pool_ptr hdr_ptr, prev_hdr_ptr;
-  char * data_ptr;
-  size_t odd_bytes, min_request, slop;
-
-  /* Check for unsatisfiable request (do now to ensure no overflow below) */
-  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr)))
-    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
-
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
-  if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
-
-  /* See if space is available in any existing pool */
-  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-  prev_hdr_ptr = NULL;
-  hdr_ptr = mem->small_list[pool_id];
-  while (hdr_ptr != NULL) {
-    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
-      break;			/* found pool with enough space */
-    prev_hdr_ptr = hdr_ptr;
-    hdr_ptr = hdr_ptr->hdr.next;
-  }
-
-  /* Time to make a new pool? */
-  if (hdr_ptr == NULL) {
-    /* min_request is what we need now, slop is what will be leftover */
-    min_request = sizeofobject + SIZEOF(small_pool_hdr);
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
-      slop = first_pool_slop[pool_id];
-    else
-      slop = extra_pool_slop[pool_id];
-    /* Don't ask for more than MAX_ALLOC_CHUNK */
-    if (slop > (size_t) (MAX_ALLOC_CHUNK-min_request))
-      slop = (size_t) (MAX_ALLOC_CHUNK-min_request);
-    /* Try to get space, if fail reduce slop and try again */
-    for (;;) {
-      hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
-      if (hdr_ptr != NULL)
-	break;
-      slop /= 2;
-      if (slop < MIN_SLOP)	/* give up when it gets real small */
-	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
-    }
-    mem->total_space_allocated += min_request + slop;
-    /* Success, initialize the new pool header and add to end of list */
-    hdr_ptr->hdr.next = NULL;
-    hdr_ptr->hdr.bytes_used = 0;
-    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
-      mem->small_list[pool_id] = hdr_ptr;
-    else
-      prev_hdr_ptr->hdr.next = hdr_ptr;
-  }
-
-  /* OK, allocate the object from the current pool */
-  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
-  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
-  hdr_ptr->hdr.bytes_used += sizeofobject;
-  hdr_ptr->hdr.bytes_left -= sizeofobject;
-
-  return (void *) data_ptr;
-}
-
-
-/*
- * Allocation of "large" objects.
- *
- * The external semantics of these are the same as "small" objects,
- * except that FAR pointers are used on 80x86.  However the pool
- * management heuristics are quite different.  We assume that each
- * request is large enough that it may as well be passed directly to
- * jpeg_get_large; the pool management just links everything together
- * so that we can free it all on demand.
- * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
- * structures.  The routines that create these structures (see below)
- * deliberately bunch rows together to ensure a large request size.
- */
-
-METHODDEF(void FAR *)
-alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
-/* Allocate a "large" object */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  large_pool_ptr hdr_ptr;
-  size_t odd_bytes;
-
-  /* Check for unsatisfiable request (do now to ensure no overflow below) */
-  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)))
-    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
-
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
-  if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
-
-  /* Always make a new pool */
-  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-  hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
-					    SIZEOF(large_pool_hdr));
-  if (hdr_ptr == NULL)
-    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
-  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
-
-  /* Success, initialize the new pool header and add to list */
-  hdr_ptr->hdr.next = mem->large_list[pool_id];
-  /* We maintain space counts in each pool header for statistical purposes,
-   * even though they are not needed for allocation.
-   */
-  hdr_ptr->hdr.bytes_used = sizeofobject;
-  hdr_ptr->hdr.bytes_left = 0;
-  mem->large_list[pool_id] = hdr_ptr;
-
-  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
-}
-
-
-/*
- * Creation of 2-D sample arrays.
- * The pointers are in near heap, the samples themselves in FAR heap.
- *
- * To minimize allocation overhead and to allow I/O of large contiguous
- * blocks, we allocate the sample rows in groups of as many rows as possible
- * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
- * NB: the virtual array control routines, later in this file, know about
- * this chunking of rows.  The rowsperchunk value is left in the mem manager
- * object so that it can be saved away if this sarray is the workspace for
- * a virtual array.
- */
-
-METHODDEF(JSAMPARRAY)
-alloc_sarray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION samplesperrow, JDIMENSION numrows)
-/* Allocate a 2-D sample array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  JSAMPARRAY result;
-  JSAMPROW workspace;
-  JDIMENSION rowsperchunk, currow, i;
-  long ltemp;
-
-  /* Calculate max # of rows allowed in one allocation chunk */
-  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
-	  ((long) samplesperrow * SIZEOF(JSAMPLE));
-  if (ltemp <= 0)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-  if (ltemp < (long) numrows)
-    rowsperchunk = (JDIMENSION) ltemp;
-  else
-    rowsperchunk = numrows;
-  mem->last_rowsperchunk = rowsperchunk;
-
-  /* Get space for row pointers (small object) */
-  result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
-				    (size_t) (numrows * SIZEOF(JSAMPROW)));
-
-  /* Get the rows themselves (large objects) */
-  currow = 0;
-  while (currow < numrows) {
-    rowsperchunk = MIN(rowsperchunk, numrows - currow);
-    workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
-	(size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
-		  * SIZEOF(JSAMPLE)));
-    for (i = rowsperchunk; i > 0; i--) {
-      result[currow++] = workspace;
-      workspace += samplesperrow;
-    }
-  }
-
-  return result;
-}
-
-
-/*
- * Creation of 2-D coefficient-block arrays.
- * This is essentially the same as the code for sample arrays, above.
- */
-
-METHODDEF(JBLOCKARRAY)
-alloc_barray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION blocksperrow, JDIMENSION numrows)
-/* Allocate a 2-D coefficient-block array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  JBLOCKARRAY result;
-  JBLOCKROW workspace;
-  JDIMENSION rowsperchunk, currow, i;
-  long ltemp;
-
-  /* Calculate max # of rows allowed in one allocation chunk */
-  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
-	  ((long) blocksperrow * SIZEOF(JBLOCK));
-  if (ltemp <= 0)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-  if (ltemp < (long) numrows)
-    rowsperchunk = (JDIMENSION) ltemp;
-  else
-    rowsperchunk = numrows;
-  mem->last_rowsperchunk = rowsperchunk;
-
-  /* Get space for row pointers (small object) */
-  result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
-				     (size_t) (numrows * SIZEOF(JBLOCKROW)));
-
-  /* Get the rows themselves (large objects) */
-  currow = 0;
-  while (currow < numrows) {
-    rowsperchunk = MIN(rowsperchunk, numrows - currow);
-    workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
-	(size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
-		  * SIZEOF(JBLOCK)));
-    for (i = rowsperchunk; i > 0; i--) {
-      result[currow++] = workspace;
-      workspace += blocksperrow;
-    }
-  }
-
-  return result;
-}
-
-
-/*
- * About virtual array management:
- *
- * The above "normal" array routines are only used to allocate strip buffers
- * (as wide as the image, but just a few rows high).  Full-image-sized buffers
- * are handled as "virtual" arrays.  The array is still accessed a strip at a
- * time, but the memory manager must save the whole array for repeated
- * accesses.  The intended implementation is that there is a strip buffer in
- * memory (as high as is possible given the desired memory limit), plus a
- * backing file that holds the rest of the array.
- *
- * The request_virt_array routines are told the total size of the image and
- * the maximum number of rows that will be accessed at once.  The in-memory
- * buffer must be at least as large as the maxaccess value.
- *
- * The request routines create control blocks but not the in-memory buffers.
- * That is postponed until realize_virt_arrays is called.  At that time the
- * total amount of space needed is known (approximately, anyway), so free
- * memory can be divided up fairly.
- *
- * The access_virt_array routines are responsible for making a specific strip
- * area accessible (after reading or writing the backing file, if necessary).
- * Note that the access routines are told whether the caller intends to modify
- * the accessed strip; during a read-only pass this saves having to rewrite
- * data to disk.  The access routines are also responsible for pre-zeroing
- * any newly accessed rows, if pre-zeroing was requested.
- *
- * In current usage, the access requests are usually for nonoverlapping
- * strips; that is, successive access start_row numbers differ by exactly
- * num_rows = maxaccess.  This means we can get good performance with simple
- * buffer dump/reload logic, by making the in-memory buffer be a multiple
- * of the access height; then there will never be accesses across bufferload
- * boundaries.  The code will still work with overlapping access requests,
- * but it doesn't handle bufferload overlaps very efficiently.
- */
-
-
-METHODDEF(jvirt_sarray_ptr)
-request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION samplesperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
-/* Request a virtual 2-D sample array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  jvirt_sarray_ptr result;
-
-  /* Only IMAGE-lifetime virtual arrays are currently supported */
-  if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-  /* get control block */
-  result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_sarray_control));
-
-  result->mem_buffer = NULL;	/* marks array not yet realized */
-  result->rows_in_array = numrows;
-  result->samplesperrow = samplesperrow;
-  result->maxaccess = maxaccess;
-  result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
-  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
-  mem->virt_sarray_list = result;
-
-  return result;
-}
-
-
-METHODDEF(jvirt_barray_ptr)
-request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION blocksperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
-/* Request a virtual 2-D coefficient-block array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  jvirt_barray_ptr result;
-
-  /* Only IMAGE-lifetime virtual arrays are currently supported */
-  if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-  /* get control block */
-  result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_barray_control));
-
-  result->mem_buffer = NULL;	/* marks array not yet realized */
-  result->rows_in_array = numrows;
-  result->blocksperrow = blocksperrow;
-  result->maxaccess = maxaccess;
-  result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
-  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
-  mem->virt_barray_list = result;
-
-  return result;
-}
-
-
-METHODDEF(void)
-realize_virt_arrays (j_common_ptr cinfo)
-/* Allocate the in-memory buffers for any unrealized virtual arrays */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  long space_per_minheight, maximum_space, avail_mem;
-  long minheights, max_minheights;
-  jvirt_sarray_ptr sptr;
-  jvirt_barray_ptr bptr;
-
-  /* Compute the minimum space needed (maxaccess rows in each buffer)
-   * and the maximum space needed (full image height in each buffer).
-   * These may be of use to the system-dependent jpeg_mem_available routine.
-   */
-  space_per_minheight = 0;
-  maximum_space = 0;
-  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-    if (sptr->mem_buffer == NULL) { /* if not realized yet */
-      space_per_minheight += (long) sptr->maxaccess *
-			     (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
-      maximum_space += (long) sptr->rows_in_array *
-		       (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
-    }
-  }
-  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-    if (bptr->mem_buffer == NULL) { /* if not realized yet */
-      space_per_minheight += (long) bptr->maxaccess *
-			     (long) bptr->blocksperrow * SIZEOF(JBLOCK);
-      maximum_space += (long) bptr->rows_in_array *
-		       (long) bptr->blocksperrow * SIZEOF(JBLOCK);
-    }
-  }
-
-  if (space_per_minheight <= 0)
-    return;			/* no unrealized arrays, no work */
-
-  /* Determine amount of memory to actually use; this is system-dependent. */
-  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
-				 mem->total_space_allocated);
-
-  /* If the maximum space needed is available, make all the buffers full
-   * height; otherwise parcel it out with the same number of minheights
-   * in each buffer.
-   */
-  if (avail_mem >= maximum_space)
-    max_minheights = 1000000000L;
-  else {
-    max_minheights = avail_mem / space_per_minheight;
-    /* If there doesn't seem to be enough space, try to get the minimum
-     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
-     */
-    if (max_minheights <= 0)
-      max_minheights = 1;
-  }
-
-  /* Allocate the in-memory buffers and initialize backing store as needed. */
-
-  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-    if (sptr->mem_buffer == NULL) { /* if not realized yet */
-      minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
-      if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	sptr->rows_in_mem = sptr->rows_in_array;
-      } else {
-	/* It doesn't fit in memory, create backing store. */
-	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
-				(long) sptr->rows_in_array *
-				(long) sptr->samplesperrow *
-				(long) SIZEOF(JSAMPLE));
-	sptr->b_s_open = TRUE;
-      }
-      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
-				      sptr->samplesperrow, sptr->rows_in_mem);
-      sptr->rowsperchunk = mem->last_rowsperchunk;
-      sptr->cur_start_row = 0;
-      sptr->first_undef_row = 0;
-      sptr->dirty = FALSE;
-    }
-  }
-
-  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-    if (bptr->mem_buffer == NULL) { /* if not realized yet */
-      minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
-      if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	bptr->rows_in_mem = bptr->rows_in_array;
-      } else {
-	/* It doesn't fit in memory, create backing store. */
-	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
-				(long) bptr->rows_in_array *
-				(long) bptr->blocksperrow *
-				(long) SIZEOF(JBLOCK));
-	bptr->b_s_open = TRUE;
-      }
-      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
-				      bptr->blocksperrow, bptr->rows_in_mem);
-      bptr->rowsperchunk = mem->last_rowsperchunk;
-      bptr->cur_start_row = 0;
-      bptr->first_undef_row = 0;
-      bptr->dirty = FALSE;
-    }
-  }
-}
-
-
-LOCAL(void)
-do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
-/* Do backing store read or write of a virtual sample array */
-{
-  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
-
-  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
-  file_offset = ptr->cur_start_row * bytesperrow;
-  /* Loop to read or write each allocation chunk in mem_buffer */
-  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
-    /* One chunk, but check for short chunk at end of buffer */
-    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
-    /* Transfer no more than is currently defined */
-    thisrow = (long) ptr->cur_start_row + i;
-    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
-    /* Transfer no more than fits in file */
-    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
-      break;
-    byte_count = rows * bytesperrow;
-    if (writing)
-      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
-    else
-      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
-    file_offset += byte_count;
-  }
-}
-
-
-LOCAL(void)
-do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
-/* Do backing store read or write of a virtual coefficient-block array */
-{
-  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
-
-  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
-  file_offset = ptr->cur_start_row * bytesperrow;
-  /* Loop to read or write each allocation chunk in mem_buffer */
-  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
-    /* One chunk, but check for short chunk at end of buffer */
-    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
-    /* Transfer no more than is currently defined */
-    thisrow = (long) ptr->cur_start_row + i;
-    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
-    /* Transfer no more than fits in file */
-    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
-      break;
-    byte_count = rows * bytesperrow;
-    if (writing)
-      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
-    else
-      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
-    file_offset += byte_count;
-  }
-}
-
-
-METHODDEF(JSAMPARRAY)
-access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
-/* Access the part of a virtual sample array starting at start_row */
-/* and extending for num_rows rows.  writable is true if  */
-/* caller intends to modify the accessed area. */
-{
-  JDIMENSION end_row = start_row + num_rows;
-  JDIMENSION undef_row;
-
-  /* debugging check */
-  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
-      ptr->mem_buffer == NULL)
-    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-
-  /* Make the desired part of the virtual array accessible */
-  if (start_row < ptr->cur_start_row ||
-      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
-    if (! ptr->b_s_open)
-      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
-    /* Flush old buffer contents if necessary */
-    if (ptr->dirty) {
-      do_sarray_io(cinfo, ptr, TRUE);
-      ptr->dirty = FALSE;
-    }
-    /* Decide what part of virtual array to access.
-     * Algorithm: if target address > current window, assume forward scan,
-     * load starting at target address.  If target address < current window,
-     * assume backward scan, load so that target area is top of window.
-     * Note that when switching from forward write to forward read, will have
-     * start_row = 0, so the limiting case applies and we load from 0 anyway.
-     */
-    if (start_row > ptr->cur_start_row) {
-      ptr->cur_start_row = start_row;
-    } else {
-      /* use long arithmetic here to avoid overflow & unsigned problems */
-      long ltemp;
-
-      ltemp = (long) end_row - (long) ptr->rows_in_mem;
-      if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
-      ptr->cur_start_row = (JDIMENSION) ltemp;
-    }
-    /* Read in the selected part of the array.
-     * During the initial write pass, we will do no actual read
-     * because the selected part is all undefined.
-     */
-    do_sarray_io(cinfo, ptr, FALSE);
-  }
-  /* Ensure the accessed part of the array is defined; prezero if needed.
-   * To improve locality of access, we only prezero the part of the array
-   * that the caller is about to access, not the entire in-memory array.
-   */
-  if (ptr->first_undef_row < end_row) {
-    if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
-    } else {
-      undef_row = ptr->first_undef_row;
-    }
-    if (writable)
-      ptr->first_undef_row = end_row;
-    if (ptr->pre_zero) {
-      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
-      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
-      end_row -= ptr->cur_start_row;
-      while (undef_row < end_row) {
-	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
-      }
-    } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-    }
-  }
-  /* Flag the buffer dirty if caller will write in it */
-  if (writable)
-    ptr->dirty = TRUE;
-  /* Return address of proper part of the buffer */
-  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
-}
-
-
-METHODDEF(JBLOCKARRAY)
-access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
-/* Access the part of a virtual block array starting at start_row */
-/* and extending for num_rows rows.  writable is true if  */
-/* caller intends to modify the accessed area. */
-{
-  JDIMENSION end_row = start_row + num_rows;
-  JDIMENSION undef_row;
-
-  /* debugging check */
-  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
-      ptr->mem_buffer == NULL)
-    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-
-  /* Make the desired part of the virtual array accessible */
-  if (start_row < ptr->cur_start_row ||
-      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
-    if (! ptr->b_s_open)
-      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
-    /* Flush old buffer contents if necessary */
-    if (ptr->dirty) {
-      do_barray_io(cinfo, ptr, TRUE);
-      ptr->dirty = FALSE;
-    }
-    /* Decide what part of virtual array to access.
-     * Algorithm: if target address > current window, assume forward scan,
-     * load starting at target address.  If target address < current window,
-     * assume backward scan, load so that target area is top of window.
-     * Note that when switching from forward write to forward read, will have
-     * start_row = 0, so the limiting case applies and we load from 0 anyway.
-     */
-    if (start_row > ptr->cur_start_row) {
-      ptr->cur_start_row = start_row;
-    } else {
-      /* use long arithmetic here to avoid overflow & unsigned problems */
-      long ltemp;
-
-      ltemp = (long) end_row - (long) ptr->rows_in_mem;
-      if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
-      ptr->cur_start_row = (JDIMENSION) ltemp;
-    }
-    /* Read in the selected part of the array.
-     * During the initial write pass, we will do no actual read
-     * because the selected part is all undefined.
-     */
-    do_barray_io(cinfo, ptr, FALSE);
-  }
-  /* Ensure the accessed part of the array is defined; prezero if needed.
-   * To improve locality of access, we only prezero the part of the array
-   * that the caller is about to access, not the entire in-memory array.
-   */
-  if (ptr->first_undef_row < end_row) {
-    if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
-    } else {
-      undef_row = ptr->first_undef_row;
-    }
-    if (writable)
-      ptr->first_undef_row = end_row;
-    if (ptr->pre_zero) {
-      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
-      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
-      end_row -= ptr->cur_start_row;
-      while (undef_row < end_row) {
-	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
-      }
-    } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-    }
-  }
-  /* Flag the buffer dirty if caller will write in it */
-  if (writable)
-    ptr->dirty = TRUE;
-  /* Return address of proper part of the buffer */
-  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
-}
-
-
-/*
- * Release all objects belonging to a specified pool.
- */
-
-METHODDEF(void)
-free_pool (j_common_ptr cinfo, int pool_id)
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  small_pool_ptr shdr_ptr;
-  large_pool_ptr lhdr_ptr;
-  size_t space_freed;
-
-  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-#ifdef MEM_STATS
-  if (cinfo->err->trace_level > 1)
-    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
-#endif
-
-  /* If freeing IMAGE pool, close any virtual arrays first */
-  if (pool_id == JPOOL_IMAGE) {
-    jvirt_sarray_ptr sptr;
-    jvirt_barray_ptr bptr;
-
-    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-      if (sptr->b_s_open) {	/* there may be no backing store */
-	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
-      }
-    }
-    mem->virt_sarray_list = NULL;
-    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-      if (bptr->b_s_open) {	/* there may be no backing store */
-	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
-      }
-    }
-    mem->virt_barray_list = NULL;
-  }
-
-  /* Release large objects */
-  lhdr_ptr = mem->large_list[pool_id];
-  mem->large_list[pool_id] = NULL;
-
-  while (lhdr_ptr != NULL) {
-    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
-    space_freed = lhdr_ptr->hdr.bytes_used +
-		  lhdr_ptr->hdr.bytes_left +
-		  SIZEOF(large_pool_hdr);
-    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
-    mem->total_space_allocated -= space_freed;
-    lhdr_ptr = next_lhdr_ptr;
-  }
-
-  /* Release small objects */
-  shdr_ptr = mem->small_list[pool_id];
-  mem->small_list[pool_id] = NULL;
-
-  while (shdr_ptr != NULL) {
-    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
-    space_freed = shdr_ptr->hdr.bytes_used +
-		  shdr_ptr->hdr.bytes_left +
-		  SIZEOF(small_pool_hdr);
-    jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
-    mem->total_space_allocated -= space_freed;
-    shdr_ptr = next_shdr_ptr;
-  }
-}
-
-
-/*
- * Close up shop entirely.
- * Note that this cannot be called unless cinfo->mem is non-NULL.
- */
-
-METHODDEF(void)
-self_destruct (j_common_ptr cinfo)
-{
-  int pool;
-
-  /* Close all backing store, release all memory.
-   * Releasing pools in reverse order might help avoid fragmentation
-   * with some (brain-damaged) malloc libraries.
-   */
-  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
-    free_pool(cinfo, pool);
-  }
-
-  /* Release the memory manager control block too. */
-  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
-  cinfo->mem = NULL;		/* ensures I will be called only once */
-
-  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
-}
-
-
-/*
- * Memory manager initialization.
- * When this is called, only the error manager pointer is valid in cinfo!
- */
-
-GLOBAL(void)
-jinit_memory_mgr (j_common_ptr cinfo)
-{
-  my_mem_ptr mem;
-  long max_to_use;
-  int pool;
-  size_t test_mac;
-
-  cinfo->mem = NULL;		/* for safety if init fails */
-
-  /* Check for configuration errors.
-   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
-   * doesn't reflect any real hardware alignment requirement.
-   * The test is a little tricky: for X>0, X and X-1 have no one-bits
-   * in common if and only if X is a power of 2, ie has only one one-bit.
-   * Some compilers may give an "unreachable code" warning here; ignore it.
-   */
-  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
-    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
-  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
-   * a multiple of SIZEOF(ALIGN_TYPE).
-   * Again, an "unreachable code" warning may be ignored here.
-   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
-   */
-  test_mac = (size_t) MAX_ALLOC_CHUNK;
-  if ((long) test_mac != MAX_ALLOC_CHUNK ||
-      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
-    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
-
-  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
-
-  /* Attempt to allocate memory manager's control block */
-  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
-
-  if (mem == NULL) {
-    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
-    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
-  }
-
-  /* OK, fill in the method pointers */
-  mem->pub.alloc_small = alloc_small;
-  mem->pub.alloc_large = alloc_large;
-  mem->pub.alloc_sarray = alloc_sarray;
-  mem->pub.alloc_barray = alloc_barray;
-  mem->pub.request_virt_sarray = request_virt_sarray;
-  mem->pub.request_virt_barray = request_virt_barray;
-  mem->pub.realize_virt_arrays = realize_virt_arrays;
-  mem->pub.access_virt_sarray = access_virt_sarray;
-  mem->pub.access_virt_barray = access_virt_barray;
-  mem->pub.free_pool = free_pool;
-  mem->pub.self_destruct = self_destruct;
-
-  /* Make MAX_ALLOC_CHUNK accessible to other modules */
-  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
-
-  /* Initialize working state */
-  mem->pub.max_memory_to_use = max_to_use;
-
-  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
-    mem->small_list[pool] = NULL;
-    mem->large_list[pool] = NULL;
-  }
-  mem->virt_sarray_list = NULL;
-  mem->virt_barray_list = NULL;
-
-  mem->total_space_allocated = SIZEOF(my_memory_mgr);
-
-  /* Declare ourselves open for business */
-  cinfo->mem = & mem->pub;
-
-  /* Check for an environment variable JPEGMEM; if found, override the
-   * default max_memory setting from jpeg_mem_init.  Note that the
-   * surrounding application may again override this value.
-   * If your system doesn't support getenv(), define NO_GETENV to disable
-   * this feature.
-   */
-#ifndef NO_GETENV
-  { char * memenv;
-
-    if ((memenv = getenv("JPEGMEM")) != NULL) {
-      char ch = 'x';
-
-      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
-	if (ch == 'm' || ch == 'M')
-	  max_to_use *= 1000L;
-	mem->pub.max_memory_to_use = max_to_use * 1000L;
-      }
-    }
-  }
-#endif
-
-}
+/*
+ * jmemmgr.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2011-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the JPEG system-independent memory management
+ * routines.  This code is usable across a wide variety of machines; most
+ * of the system dependencies have been isolated in a separate file.
+ * The major functions provided here are:
+ *   * pool-based allocation and freeing of memory;
+ *   * policy decisions about how to divide available memory among the
+ *     virtual arrays;
+ *   * control logic for swapping virtual arrays between main memory and
+ *     backing storage.
+ * The separate system-dependent file provides the actual backing-storage
+ * access code, and it contains the policy decision about how much total
+ * main memory to use.
+ * This file is system-dependent in the sense that some of its functions
+ * are unnecessary in some systems.  For example, if there is enough virtual
+ * memory so that backing storage will never be used, much of the virtual
+ * array control logic could be removed.  (Of course, if you have that much
+ * memory then you shouldn't care about a little bit of unused code...)
+ */
+
+#define JPEG_INTERNALS
+#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef NO_GETENV
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
+extern char * getenv JPP((const char * name));
+#endif
+#endif
+
+
+/*
+ * Some important notes:
+ *   The allocation routines provided here must never return NULL.
+ *   They should exit to error_exit if unsuccessful.
+ *
+ *   It's not a good idea to try to merge the sarray and barray routines,
+ *   even though they are textually almost the same, because samples are
+ *   usually stored as bytes while coefficients are shorts or ints.  Thus,
+ *   in machines where byte pointers have a different representation from
+ *   word pointers, the resulting machine code could not be the same.
+ */
+
+
+/*
+ * Many machines require storage alignment: longs must start on 4-byte
+ * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
+ * always returns pointers that are multiples of the worst-case alignment
+ * requirement, and we had better do so too.
+ * There isn't any really portable way to determine the worst-case alignment
+ * requirement.  This module assumes that the alignment requirement is
+ * multiples of sizeof(ALIGN_TYPE).
+ * By default, we define ALIGN_TYPE as double.  This is necessary on some
+ * workstations (where doubles really do need 8-byte alignment) and will work
+ * fine on nearly everything.  If your machine has lesser alignment needs,
+ * you can save a few bytes by making ALIGN_TYPE smaller.
+ * The only place I know of where this will NOT work is certain Macintosh
+ * 680x0 compilers that define double as a 10-byte IEEE extended float.
+ * Doing 10-byte alignment is counterproductive because longwords won't be
+ * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
+ * such a compiler.
+ */
+
+#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
+#define ALIGN_TYPE  double
+#endif
+
+
+/*
+ * We allocate objects from "pools", where each pool is gotten with a single
+ * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
+ * overhead within a pool, except for alignment padding.  Each pool has a
+ * header with a link to the next pool of the same class.
+ * Small and large pool headers are identical except that the latter's
+ * link pointer must be FAR on 80x86 machines.
+ * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
+ * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
+ * of the alignment requirement of ALIGN_TYPE.
+ */
+
+typedef union small_pool_struct * small_pool_ptr;
+
+typedef union small_pool_struct {
+  struct {
+    small_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} small_pool_hdr;
+
+typedef union large_pool_struct FAR * large_pool_ptr;
+
+typedef union large_pool_struct {
+  struct {
+    large_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} large_pool_hdr;
+
+
+/*
+ * Here is the full definition of a memory manager object.
+ */
+
+typedef struct {
+  struct jpeg_memory_mgr pub;	/* public fields */
+
+  /* Each pool identifier (lifetime class) names a linked list of pools. */
+  small_pool_ptr small_list[JPOOL_NUMPOOLS];
+  large_pool_ptr large_list[JPOOL_NUMPOOLS];
+
+  /* Since we only have one lifetime class of virtual arrays, only one
+   * linked list is necessary (for each datatype).  Note that the virtual
+   * array control blocks being linked together are actually stored somewhere
+   * in the small-pool list.
+   */
+  jvirt_sarray_ptr virt_sarray_list;
+  jvirt_barray_ptr virt_barray_list;
+
+  /* This counts total space obtained from jpeg_get_small/large */
+  long total_space_allocated;
+
+  /* alloc_sarray and alloc_barray set this value for use by virtual
+   * array routines.
+   */
+  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
+} my_memory_mgr;
+
+typedef my_memory_mgr * my_mem_ptr;
+
+
+/*
+ * The control blocks for virtual arrays.
+ * Note that these blocks are allocated in the "small" pool area.
+ * System-dependent info for the associated backing store (if any) is hidden
+ * inside the backing_store_info struct.
+ */
+
+struct jvirt_sarray_control {
+  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+struct jvirt_barray_control {
+  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_barray_ptr next;	/* link to next virtual barray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+
+#ifdef MEM_STATS		/* optional extra stuff for statistics */
+
+LOCAL(void)
+print_mem_stats (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+
+  /* Since this is only a debugging stub, we can cheat a little by using
+   * fprintf directly rather than going through the trace message code.
+   * This is helpful because message parm array can't handle longs.
+   */
+  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
+	  pool_id, mem->total_space_allocated);
+
+  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
+       lhdr_ptr = lhdr_ptr->hdr.next) {
+    fprintf(stderr, "  Large chunk used %ld\n",
+	    (long) lhdr_ptr->hdr.bytes_used);
+  }
+
+  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
+       shdr_ptr = shdr_ptr->hdr.next) {
+    fprintf(stderr, "  Small chunk used %ld free %ld\n",
+	    (long) shdr_ptr->hdr.bytes_used,
+	    (long) shdr_ptr->hdr.bytes_left);
+  }
+}
+
+#endif /* MEM_STATS */
+
+
+LOCAL(noreturn_t)
+out_of_memory (j_common_ptr cinfo, int which)
+/* Report an out-of-memory error and stop execution */
+/* If we compiled MEM_STATS support, report alloc requests before dying */
+{
+#ifdef MEM_STATS
+  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
+#endif
+  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
+}
+
+
+/*
+ * Allocation of "small" objects.
+ *
+ * For these, we use pooled storage.  When a new pool must be created,
+ * we try to get enough space for the current request plus a "slop" factor,
+ * where the slop will be the amount of leftover space in the new pool.
+ * The speed vs. space tradeoff is largely determined by the slop values.
+ * A different slop value is provided for each pool class (lifetime),
+ * and we also distinguish the first pool of a class from later ones.
+ * NOTE: the values given work fairly well on both 16- and 32-bit-int
+ * machines, but may be too small if longs are 64 bits or more.
+ */
+
+static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	1600,			/* first PERMANENT pool */
+	16000			/* first IMAGE pool */
+};
+
+static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	0,			/* additional PERMANENT pools */
+	5000			/* additional IMAGE pools */
+};
+
+#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
+
+
+METHODDEF(void *)
+alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "small" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr hdr_ptr, prev_hdr_ptr;
+  char * data_ptr;
+  size_t odd_bytes, min_request, slop;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr)))
+    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* See if space is available in any existing pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+  prev_hdr_ptr = NULL;
+  hdr_ptr = mem->small_list[pool_id];
+  while (hdr_ptr != NULL) {
+    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
+      break;			/* found pool with enough space */
+    prev_hdr_ptr = hdr_ptr;
+    hdr_ptr = hdr_ptr->hdr.next;
+  }
+
+  /* Time to make a new pool? */
+  if (hdr_ptr == NULL) {
+    /* min_request is what we need now, slop is what will be leftover */
+    min_request = sizeofobject + SIZEOF(small_pool_hdr);
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      slop = first_pool_slop[pool_id];
+    else
+      slop = extra_pool_slop[pool_id];
+    /* Don't ask for more than MAX_ALLOC_CHUNK */
+    if (slop > (size_t) (MAX_ALLOC_CHUNK-min_request))
+      slop = (size_t) (MAX_ALLOC_CHUNK-min_request);
+    /* Try to get space, if fail reduce slop and try again */
+    for (;;) {
+      hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
+      if (hdr_ptr != NULL)
+	break;
+      slop /= 2;
+      if (slop < MIN_SLOP)	/* give up when it gets real small */
+	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
+    }
+    mem->total_space_allocated += min_request + slop;
+    /* Success, initialize the new pool header and add to end of list */
+    hdr_ptr->hdr.next = NULL;
+    hdr_ptr->hdr.bytes_used = 0;
+    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      mem->small_list[pool_id] = hdr_ptr;
+    else
+      prev_hdr_ptr->hdr.next = hdr_ptr;
+  }
+
+  /* OK, allocate the object from the current pool */
+  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
+  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
+  hdr_ptr->hdr.bytes_used += sizeofobject;
+  hdr_ptr->hdr.bytes_left -= sizeofobject;
+
+  return (void *) data_ptr;
+}
+
+
+/*
+ * Allocation of "large" objects.
+ *
+ * The external semantics of these are the same as "small" objects,
+ * except that FAR pointers are used on 80x86.  However the pool
+ * management heuristics are quite different.  We assume that each
+ * request is large enough that it may as well be passed directly to
+ * jpeg_get_large; the pool management just links everything together
+ * so that we can free it all on demand.
+ * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
+ * structures.  The routines that create these structures (see below)
+ * deliberately bunch rows together to ensure a large request size.
+ */
+
+METHODDEF(void FAR *)
+alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "large" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  large_pool_ptr hdr_ptr;
+  size_t odd_bytes;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)))
+    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* Always make a new pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
+					    SIZEOF(large_pool_hdr));
+  if (hdr_ptr == NULL)
+    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
+  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
+
+  /* Success, initialize the new pool header and add to list */
+  hdr_ptr->hdr.next = mem->large_list[pool_id];
+  /* We maintain space counts in each pool header for statistical purposes,
+   * even though they are not needed for allocation.
+   */
+  hdr_ptr->hdr.bytes_used = sizeofobject;
+  hdr_ptr->hdr.bytes_left = 0;
+  mem->large_list[pool_id] = hdr_ptr;
+
+  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
+}
+
+
+/*
+ * Creation of 2-D sample arrays.
+ * The pointers are in near heap, the samples themselves in FAR heap.
+ *
+ * To minimize allocation overhead and to allow I/O of large contiguous
+ * blocks, we allocate the sample rows in groups of as many rows as possible
+ * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
+ * NB: the virtual array control routines, later in this file, know about
+ * this chunking of rows.  The rowsperchunk value is left in the mem manager
+ * object so that it can be saved away if this sarray is the workspace for
+ * a virtual array.
+ */
+
+METHODDEF(JSAMPARRAY)
+alloc_sarray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION samplesperrow, JDIMENSION numrows)
+/* Allocate a 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JSAMPARRAY result;
+  JSAMPROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) samplesperrow * SIZEOF(JSAMPLE));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
+				    (size_t) (numrows * SIZEOF(JSAMPROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
+		  * SIZEOF(JSAMPLE)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += samplesperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * Creation of 2-D coefficient-block arrays.
+ * This is essentially the same as the code for sample arrays, above.
+ */
+
+METHODDEF(JBLOCKARRAY)
+alloc_barray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION blocksperrow, JDIMENSION numrows)
+/* Allocate a 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JBLOCKARRAY result;
+  JBLOCKROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) blocksperrow * SIZEOF(JBLOCK));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
+				     (size_t) (numrows * SIZEOF(JBLOCKROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
+		  * SIZEOF(JBLOCK)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += blocksperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * About virtual array management:
+ *
+ * The above "normal" array routines are only used to allocate strip buffers
+ * (as wide as the image, but just a few rows high).  Full-image-sized buffers
+ * are handled as "virtual" arrays.  The array is still accessed a strip at a
+ * time, but the memory manager must save the whole array for repeated
+ * accesses.  The intended implementation is that there is a strip buffer in
+ * memory (as high as is possible given the desired memory limit), plus a
+ * backing file that holds the rest of the array.
+ *
+ * The request_virt_array routines are told the total size of the image and
+ * the maximum number of rows that will be accessed at once.  The in-memory
+ * buffer must be at least as large as the maxaccess value.
+ *
+ * The request routines create control blocks but not the in-memory buffers.
+ * That is postponed until realize_virt_arrays is called.  At that time the
+ * total amount of space needed is known (approximately, anyway), so free
+ * memory can be divided up fairly.
+ *
+ * The access_virt_array routines are responsible for making a specific strip
+ * area accessible (after reading or writing the backing file, if necessary).
+ * Note that the access routines are told whether the caller intends to modify
+ * the accessed strip; during a read-only pass this saves having to rewrite
+ * data to disk.  The access routines are also responsible for pre-zeroing
+ * any newly accessed rows, if pre-zeroing was requested.
+ *
+ * In current usage, the access requests are usually for nonoverlapping
+ * strips; that is, successive access start_row numbers differ by exactly
+ * num_rows = maxaccess.  This means we can get good performance with simple
+ * buffer dump/reload logic, by making the in-memory buffer be a multiple
+ * of the access height; then there will never be accesses across bufferload
+ * boundaries.  The code will still work with overlapping access requests,
+ * but it doesn't handle bufferload overlaps very efficiently.
+ */
+
+
+METHODDEF(jvirt_sarray_ptr)
+request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION samplesperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_sarray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_sarray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->samplesperrow = samplesperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
+  mem->virt_sarray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(jvirt_barray_ptr)
+request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION blocksperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_barray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_barray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->blocksperrow = blocksperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
+  mem->virt_barray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(void)
+realize_virt_arrays (j_common_ptr cinfo)
+/* Allocate the in-memory buffers for any unrealized virtual arrays */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  long space_per_minheight, maximum_space, avail_mem;
+  long minheights, max_minheights;
+  jvirt_sarray_ptr sptr;
+  jvirt_barray_ptr bptr;
+
+  /* Compute the minimum space needed (maxaccess rows in each buffer)
+   * and the maximum space needed (full image height in each buffer).
+   * These may be of use to the system-dependent jpeg_mem_available routine.
+   */
+  space_per_minheight = 0;
+  maximum_space = 0;
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) sptr->maxaccess *
+			     (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+      maximum_space += (long) sptr->rows_in_array *
+		       (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+    }
+  }
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) bptr->maxaccess *
+			     (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+      maximum_space += (long) bptr->rows_in_array *
+		       (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+    }
+  }
+
+  if (space_per_minheight <= 0)
+    return;			/* no unrealized arrays, no work */
+
+  /* Determine amount of memory to actually use; this is system-dependent. */
+  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
+				 mem->total_space_allocated);
+
+  /* If the maximum space needed is available, make all the buffers full
+   * height; otherwise parcel it out with the same number of minheights
+   * in each buffer.
+   */
+  if (avail_mem >= maximum_space)
+    max_minheights = 1000000000L;
+  else {
+    max_minheights = avail_mem / space_per_minheight;
+    /* If there doesn't seem to be enough space, try to get the minimum
+     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
+     */
+    if (max_minheights <= 0)
+      max_minheights = 1;
+  }
+
+  /* Allocate the in-memory buffers and initialize backing store as needed. */
+
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	sptr->rows_in_mem = sptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
+				(long) sptr->rows_in_array *
+				(long) sptr->samplesperrow *
+				(long) SIZEOF(JSAMPLE));
+	sptr->b_s_open = TRUE;
+      }
+      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
+				      sptr->samplesperrow, sptr->rows_in_mem);
+      sptr->rowsperchunk = mem->last_rowsperchunk;
+      sptr->cur_start_row = 0;
+      sptr->first_undef_row = 0;
+      sptr->dirty = FALSE;
+    }
+  }
+
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	bptr->rows_in_mem = bptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
+				(long) bptr->rows_in_array *
+				(long) bptr->blocksperrow *
+				(long) SIZEOF(JBLOCK));
+	bptr->b_s_open = TRUE;
+      }
+      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
+				      bptr->blocksperrow, bptr->rows_in_mem);
+      bptr->rowsperchunk = mem->last_rowsperchunk;
+      bptr->cur_start_row = 0;
+      bptr->first_undef_row = 0;
+      bptr->dirty = FALSE;
+    }
+  }
+}
+
+
+LOCAL(void)
+do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual sample array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+LOCAL(void)
+do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual coefficient-block array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+METHODDEF(JSAMPARRAY)
+access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual sample array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_sarray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_sarray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+METHODDEF(JBLOCKARRAY)
+access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual block array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_barray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_barray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+/*
+ * Release all objects belonging to a specified pool.
+ */
+
+METHODDEF(void)
+free_pool (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+  size_t space_freed;
+
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+#ifdef MEM_STATS
+  if (cinfo->err->trace_level > 1)
+    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
+#endif
+
+  /* If freeing IMAGE pool, close any virtual arrays first */
+  if (pool_id == JPOOL_IMAGE) {
+    jvirt_sarray_ptr sptr;
+    jvirt_barray_ptr bptr;
+
+    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+      if (sptr->b_s_open) {	/* there may be no backing store */
+	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
+      }
+    }
+    mem->virt_sarray_list = NULL;
+    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+      if (bptr->b_s_open) {	/* there may be no backing store */
+	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
+      }
+    }
+    mem->virt_barray_list = NULL;
+  }
+
+  /* Release large objects */
+  lhdr_ptr = mem->large_list[pool_id];
+  mem->large_list[pool_id] = NULL;
+
+  while (lhdr_ptr != NULL) {
+    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
+    space_freed = lhdr_ptr->hdr.bytes_used +
+		  lhdr_ptr->hdr.bytes_left +
+		  SIZEOF(large_pool_hdr);
+    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    lhdr_ptr = next_lhdr_ptr;
+  }
+
+  /* Release small objects */
+  shdr_ptr = mem->small_list[pool_id];
+  mem->small_list[pool_id] = NULL;
+
+  while (shdr_ptr != NULL) {
+    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
+    space_freed = shdr_ptr->hdr.bytes_used +
+		  shdr_ptr->hdr.bytes_left +
+		  SIZEOF(small_pool_hdr);
+    jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    shdr_ptr = next_shdr_ptr;
+  }
+}
+
+
+/*
+ * Close up shop entirely.
+ * Note that this cannot be called unless cinfo->mem is non-NULL.
+ */
+
+METHODDEF(void)
+self_destruct (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Close all backing store, release all memory.
+   * Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    free_pool(cinfo, pool);
+  }
+
+  /* Release the memory manager control block too. */
+  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
+  cinfo->mem = NULL;		/* ensures I will be called only once */
+
+  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
+}
+
+
+/*
+ * Memory manager initialization.
+ * When this is called, only the error manager pointer is valid in cinfo!
+ */
+
+GLOBAL(void)
+jinit_memory_mgr (j_common_ptr cinfo)
+{
+  my_mem_ptr mem;
+  long max_to_use;
+  int pool;
+  size_t test_mac;
+
+  cinfo->mem = NULL;		/* for safety if init fails */
+
+  /* Check for configuration errors.
+   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
+   * doesn't reflect any real hardware alignment requirement.
+   * The test is a little tricky: for X>0, X and X-1 have no one-bits
+   * in common if and only if X is a power of 2, ie has only one one-bit.
+   * Some compilers may give an "unreachable code" warning here; ignore it.
+   */
+  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
+  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
+   * a multiple of SIZEOF(ALIGN_TYPE).
+   * Again, an "unreachable code" warning may be ignored here.
+   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
+   */
+  test_mac = (size_t) MAX_ALLOC_CHUNK;
+  if ((long) test_mac != MAX_ALLOC_CHUNK ||
+      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
+
+  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
+
+  /* Attempt to allocate memory manager's control block */
+  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
+
+  if (mem == NULL) {
+    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
+  }
+
+  /* OK, fill in the method pointers */
+  mem->pub.alloc_small = alloc_small;
+  mem->pub.alloc_large = alloc_large;
+  mem->pub.alloc_sarray = alloc_sarray;
+  mem->pub.alloc_barray = alloc_barray;
+  mem->pub.request_virt_sarray = request_virt_sarray;
+  mem->pub.request_virt_barray = request_virt_barray;
+  mem->pub.realize_virt_arrays = realize_virt_arrays;
+  mem->pub.access_virt_sarray = access_virt_sarray;
+  mem->pub.access_virt_barray = access_virt_barray;
+  mem->pub.free_pool = free_pool;
+  mem->pub.self_destruct = self_destruct;
+
+  /* Make MAX_ALLOC_CHUNK accessible to other modules */
+  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
+
+  /* Initialize working state */
+  mem->pub.max_memory_to_use = max_to_use;
+
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    mem->small_list[pool] = NULL;
+    mem->large_list[pool] = NULL;
+  }
+  mem->virt_sarray_list = NULL;
+  mem->virt_barray_list = NULL;
+
+  mem->total_space_allocated = SIZEOF(my_memory_mgr);
+
+  /* Declare ourselves open for business */
+  cinfo->mem = & mem->pub;
+
+  /* Check for an environment variable JPEGMEM; if found, override the
+   * default max_memory setting from jpeg_mem_init.  Note that the
+   * surrounding application may again override this value.
+   * If your system doesn't support getenv(), define NO_GETENV to disable
+   * this feature.
+   */
+#ifndef NO_GETENV
+  { char * memenv;
+
+    if ((memenv = getenv("JPEGMEM")) != NULL) {
+      char ch = 'x';
+
+      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
+	if (ch == 'm' || ch == 'M')
+	  max_to_use *= 1000L;
+	mem->pub.max_memory_to_use = max_to_use * 1000L;
+      }
+    }
+  }
+#endif
+
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jmemnobs.c b/plugins/AdvaImg/src/LibJPEG/jmemnobs.c
index eb8c337725..6aa1e92950 100644
--- a/plugins/AdvaImg/src/LibJPEG/jmemnobs.c
+++ b/plugins/AdvaImg/src/LibJPEG/jmemnobs.c
@@ -1,109 +1,109 @@
-/*
- * jmemnobs.c
- *
- * Copyright (C) 1992-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides a really simple implementation of the system-
- * dependent portion of the JPEG memory manager.  This implementation
- * assumes that no backing-store files are needed: all required space
- * can be obtained from malloc().
- * This is very portable in the sense that it'll compile on almost anything,
- * but you'd better have lots of main memory (or virtual memory) if you want
- * to process big images.
- * Note that the max_memory_to_use option is ignored by this implementation.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-#endif
-
-
-/*
- * Memory allocation and freeing are controlled by the regular library
- * routines malloc() and free().
- */
-
-GLOBAL(void *)
-jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * "Large" objects are treated the same as "small" ones.
- * NB: although we include FAR keywords in the routine declarations,
- * this file won't actually work in 80x86 small/medium model; at least,
- * you probably won't be able to process useful-size images in only 64KB.
- */
-
-GLOBAL(void FAR *)
-jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void FAR *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * This routine computes the total memory space available for allocation.
- * Here we always say, "we got all you want bud!"
- */
-
-GLOBAL(long)
-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
-		    long max_bytes_needed, long already_allocated)
-{
-  return max_bytes_needed;
-}
-
-
-/*
- * Backing store (temporary file) management.
- * Since jpeg_mem_available always promised the moon,
- * this should never be called and we can just error out.
- */
-
-GLOBAL(void)
-jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
-{
-  ERREXIT(cinfo, JERR_NO_BACKING_STORE);
-}
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.  Here, there isn't any.
- */
-
-GLOBAL(long)
-jpeg_mem_init (j_common_ptr cinfo)
-{
-  return 0;			/* just set max_memory_to_use to 0 */
-}
-
-GLOBAL(void)
-jpeg_mem_term (j_common_ptr cinfo)
-{
-  /* no work */
-}
+/*
+ * jmemnobs.c
+ *
+ * Copyright (C) 1992-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides a really simple implementation of the system-
+ * dependent portion of the JPEG memory manager.  This implementation
+ * assumes that no backing-store files are needed: all required space
+ * can be obtained from malloc().
+ * This is very portable in the sense that it'll compile on almost anything,
+ * but you'd better have lots of main memory (or virtual memory) if you want
+ * to process big images.
+ * Note that the max_memory_to_use option is ignored by this implementation.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+extern void * malloc JPP((size_t size));
+extern void free JPP((void *ptr));
+#endif
+
+
+/*
+ * Memory allocation and freeing are controlled by the regular library
+ * routines malloc() and free().
+ */
+
+GLOBAL(void *)
+jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * "Large" objects are treated the same as "small" ones.
+ * NB: although we include FAR keywords in the routine declarations,
+ * this file won't actually work in 80x86 small/medium model; at least,
+ * you probably won't be able to process useful-size images in only 64KB.
+ */
+
+GLOBAL(void FAR *)
+jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void FAR *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * This routine computes the total memory space available for allocation.
+ * Here we always say, "we got all you want bud!"
+ */
+
+GLOBAL(long)
+jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
+		    long max_bytes_needed, long already_allocated)
+{
+  return max_bytes_needed;
+}
+
+
+/*
+ * Backing store (temporary file) management.
+ * Since jpeg_mem_available always promised the moon,
+ * this should never be called and we can just error out.
+ */
+
+GLOBAL(void)
+jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+			 long total_bytes_needed)
+{
+  ERREXIT(cinfo, JERR_NO_BACKING_STORE);
+}
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  Here, there isn't any.
+ */
+
+GLOBAL(long)
+jpeg_mem_init (j_common_ptr cinfo)
+{
+  return 0;			/* just set max_memory_to_use to 0 */
+}
+
+GLOBAL(void)
+jpeg_mem_term (j_common_ptr cinfo)
+{
+  /* no work */
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jmemsys.h b/plugins/AdvaImg/src/LibJPEG/jmemsys.h
index 6c3c6d348f..2a8796119c 100644
--- a/plugins/AdvaImg/src/LibJPEG/jmemsys.h
+++ b/plugins/AdvaImg/src/LibJPEG/jmemsys.h
@@ -1,198 +1,198 @@
-/*
- * jmemsys.h
- *
- * Copyright (C) 1992-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This include file defines the interface between the system-independent
- * and system-dependent portions of the JPEG memory manager.  No other
- * modules need include it.  (The system-independent portion is jmemmgr.c;
- * there are several different versions of the system-dependent portion.)
- *
- * This file works as-is for the system-dependent memory managers supplied
- * in the IJG distribution.  You may need to modify it if you write a
- * custom memory manager.  If system-dependent changes are needed in
- * this file, the best method is to #ifdef them based on a configuration
- * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
- * and USE_MAC_MEMMGR.
- */
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_get_small		jGetSmall
-#define jpeg_free_small		jFreeSmall
-#define jpeg_get_large		jGetLarge
-#define jpeg_free_large		jFreeLarge
-#define jpeg_mem_available	jMemAvail
-#define jpeg_open_backing_store	jOpenBackStore
-#define jpeg_mem_init		jMemInit
-#define jpeg_mem_term		jMemTerm
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/*
- * These two functions are used to allocate and release small chunks of
- * memory.  (Typically the total amount requested through jpeg_get_small is
- * no more than 20K or so; this will be requested in chunks of a few K each.)
- * Behavior should be the same as for the standard library functions malloc
- * and free; in particular, jpeg_get_small must return NULL on failure.
- * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
- * size of the object being freed, just in case it's needed.
- * On an 80x86 machine using small-data memory model, these manage near heap.
- */
-
-EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
-EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
-				  size_t sizeofobject));
-
-/*
- * These two functions are used to allocate and release large chunks of
- * memory (up to the total free space designated by jpeg_mem_available).
- * The interface is the same as above, except that on an 80x86 machine,
- * far pointers are used.  On most other machines these are identical to
- * the jpeg_get/free_small routines; but we keep them separate anyway,
- * in case a different allocation strategy is desirable for large chunks.
- */
-
-EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
-				       size_t sizeofobject));
-EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
-				  size_t sizeofobject));
-
-/*
- * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
- * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
- * matter, but that case should never come into play).  This macro is needed
- * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
- * On those machines, we expect that jconfig.h will provide a proper value.
- * On machines with 32-bit flat address spaces, any large constant may be used.
- *
- * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
- * size_t and will be a multiple of sizeof(align_type).
- */
-
-#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
-#define MAX_ALLOC_CHUNK  1000000000L
-#endif
-
-/*
- * This routine computes the total space still available for allocation by
- * jpeg_get_large.  If more space than this is needed, backing store will be
- * used.  NOTE: any memory already allocated must not be counted.
- *
- * There is a minimum space requirement, corresponding to the minimum
- * feasible buffer sizes; jmemmgr.c will request that much space even if
- * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
- * all working storage in memory, is also passed in case it is useful.
- * Finally, the total space already allocated is passed.  If no better
- * method is available, cinfo->mem->max_memory_to_use - already_allocated
- * is often a suitable calculation.
- *
- * It is OK for jpeg_mem_available to underestimate the space available
- * (that'll just lead to more backing-store access than is really necessary).
- * However, an overestimate will lead to failure.  Hence it's wise to subtract
- * a slop factor from the true available space.  5% should be enough.
- *
- * On machines with lots of virtual memory, any large constant may be returned.
- * Conversely, zero may be returned to always use the minimum amount of memory.
- */
-
-EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
-				     long min_bytes_needed,
-				     long max_bytes_needed,
-				     long already_allocated));
-
-
-/*
- * This structure holds whatever state is needed to access a single
- * backing-store object.  The read/write/close method pointers are called
- * by jmemmgr.c to manipulate the backing-store object; all other fields
- * are private to the system-dependent backing store routines.
- */
-
-#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
-
-
-#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
-
-typedef unsigned short XMSH;	/* type of extended-memory handles */
-typedef unsigned short EMSH;	/* type of expanded-memory handles */
-
-typedef union {
-  short file_handle;		/* DOS file handle if it's a temp file */
-  XMSH xms_handle;		/* handle if it's a chunk of XMS */
-  EMSH ems_handle;		/* handle if it's a chunk of EMS */
-} handle_union;
-
-#endif /* USE_MSDOS_MEMMGR */
-
-#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
-#include <Files.h>
-#endif /* USE_MAC_MEMMGR */
-
-
-typedef struct backing_store_struct * backing_store_ptr;
-
-typedef struct backing_store_struct {
-  /* Methods for reading/writing/closing this backing-store object */
-  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
-				     backing_store_ptr info,
-				     void FAR * buffer_address,
-				     long file_offset, long byte_count));
-  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info,
-				      void FAR * buffer_address,
-				      long file_offset, long byte_count));
-  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info));
-
-  /* Private fields for system-dependent backing-store management */
-#ifdef USE_MSDOS_MEMMGR
-  /* For the MS-DOS manager (jmemdos.c), we need: */
-  handle_union handle;		/* reference to backing-store storage object */
-  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
-#else
-#ifdef USE_MAC_MEMMGR
-  /* For the Mac manager (jmemmac.c), we need: */
-  short temp_file;		/* file reference number to temp file */
-  FSSpec tempSpec;		/* the FSSpec for the temp file */
-  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
-#else
-  /* For a typical implementation with temp files, we need: */
-  FILE * temp_file;		/* stdio reference to temp file */
-  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
-#endif
-#endif
-} backing_store_info;
-
-
-/*
- * Initial opening of a backing-store object.  This must fill in the
- * read/write/close pointers in the object.  The read/write routines
- * may take an error exit if the specified maximum file size is exceeded.
- * (If jpeg_mem_available always returns a large value, this routine can
- * just take an error exit.)
- */
-
-EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
-					  backing_store_ptr info,
-					  long total_bytes_needed));
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.  jpeg_mem_init will be called before anything is
- * allocated (and, therefore, nothing in cinfo is of use except the error
- * manager pointer).  It should return a suitable default value for
- * max_memory_to_use; this may subsequently be overridden by the surrounding
- * application.  (Note that max_memory_to_use is only important if
- * jpeg_mem_available chooses to consult it ... no one else will.)
- * jpeg_mem_term may assume that all requested memory has been freed and that
- * all opened backing-store objects have been closed.
- */
-
-EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
-EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
+/*
+ * jmemsys.h
+ *
+ * Copyright (C) 1992-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file defines the interface between the system-independent
+ * and system-dependent portions of the JPEG memory manager.  No other
+ * modules need include it.  (The system-independent portion is jmemmgr.c;
+ * there are several different versions of the system-dependent portion.)
+ *
+ * This file works as-is for the system-dependent memory managers supplied
+ * in the IJG distribution.  You may need to modify it if you write a
+ * custom memory manager.  If system-dependent changes are needed in
+ * this file, the best method is to #ifdef them based on a configuration
+ * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
+ * and USE_MAC_MEMMGR.
+ */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_get_small		jGetSmall
+#define jpeg_free_small		jFreeSmall
+#define jpeg_get_large		jGetLarge
+#define jpeg_free_large		jFreeLarge
+#define jpeg_mem_available	jMemAvail
+#define jpeg_open_backing_store	jOpenBackStore
+#define jpeg_mem_init		jMemInit
+#define jpeg_mem_term		jMemTerm
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/*
+ * These two functions are used to allocate and release small chunks of
+ * memory.  (Typically the total amount requested through jpeg_get_small is
+ * no more than 20K or so; this will be requested in chunks of a few K each.)
+ * Behavior should be the same as for the standard library functions malloc
+ * and free; in particular, jpeg_get_small must return NULL on failure.
+ * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
+ * size of the object being freed, just in case it's needed.
+ * On an 80x86 machine using small-data memory model, these manage near heap.
+ */
+
+EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
+EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
+				  size_t sizeofobject));
+
+/*
+ * These two functions are used to allocate and release large chunks of
+ * memory (up to the total free space designated by jpeg_mem_available).
+ * The interface is the same as above, except that on an 80x86 machine,
+ * far pointers are used.  On most other machines these are identical to
+ * the jpeg_get/free_small routines; but we keep them separate anyway,
+ * in case a different allocation strategy is desirable for large chunks.
+ */
+
+EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
+				       size_t sizeofobject));
+EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
+				  size_t sizeofobject));
+
+/*
+ * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
+ * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
+ * matter, but that case should never come into play).  This macro is needed
+ * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
+ * On those machines, we expect that jconfig.h will provide a proper value.
+ * On machines with 32-bit flat address spaces, any large constant may be used.
+ *
+ * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
+ * size_t and will be a multiple of sizeof(align_type).
+ */
+
+#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
+#define MAX_ALLOC_CHUNK  1000000000L
+#endif
+
+/*
+ * This routine computes the total space still available for allocation by
+ * jpeg_get_large.  If more space than this is needed, backing store will be
+ * used.  NOTE: any memory already allocated must not be counted.
+ *
+ * There is a minimum space requirement, corresponding to the minimum
+ * feasible buffer sizes; jmemmgr.c will request that much space even if
+ * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
+ * all working storage in memory, is also passed in case it is useful.
+ * Finally, the total space already allocated is passed.  If no better
+ * method is available, cinfo->mem->max_memory_to_use - already_allocated
+ * is often a suitable calculation.
+ *
+ * It is OK for jpeg_mem_available to underestimate the space available
+ * (that'll just lead to more backing-store access than is really necessary).
+ * However, an overestimate will lead to failure.  Hence it's wise to subtract
+ * a slop factor from the true available space.  5% should be enough.
+ *
+ * On machines with lots of virtual memory, any large constant may be returned.
+ * Conversely, zero may be returned to always use the minimum amount of memory.
+ */
+
+EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
+				     long min_bytes_needed,
+				     long max_bytes_needed,
+				     long already_allocated));
+
+
+/*
+ * This structure holds whatever state is needed to access a single
+ * backing-store object.  The read/write/close method pointers are called
+ * by jmemmgr.c to manipulate the backing-store object; all other fields
+ * are private to the system-dependent backing store routines.
+ */
+
+#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
+
+
+#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
+
+typedef unsigned short XMSH;	/* type of extended-memory handles */
+typedef unsigned short EMSH;	/* type of expanded-memory handles */
+
+typedef union {
+  short file_handle;		/* DOS file handle if it's a temp file */
+  XMSH xms_handle;		/* handle if it's a chunk of XMS */
+  EMSH ems_handle;		/* handle if it's a chunk of EMS */
+} handle_union;
+
+#endif /* USE_MSDOS_MEMMGR */
+
+#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
+#include <Files.h>
+#endif /* USE_MAC_MEMMGR */
+
+
+typedef struct backing_store_struct * backing_store_ptr;
+
+typedef struct backing_store_struct {
+  /* Methods for reading/writing/closing this backing-store object */
+  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
+				     backing_store_ptr info,
+				     void FAR * buffer_address,
+				     long file_offset, long byte_count));
+  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info,
+				      void FAR * buffer_address,
+				      long file_offset, long byte_count));
+  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info));
+
+  /* Private fields for system-dependent backing-store management */
+#ifdef USE_MSDOS_MEMMGR
+  /* For the MS-DOS manager (jmemdos.c), we need: */
+  handle_union handle;		/* reference to backing-store storage object */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+#ifdef USE_MAC_MEMMGR
+  /* For the Mac manager (jmemmac.c), we need: */
+  short temp_file;		/* file reference number to temp file */
+  FSSpec tempSpec;		/* the FSSpec for the temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+  /* For a typical implementation with temp files, we need: */
+  FILE * temp_file;		/* stdio reference to temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
+#endif
+#endif
+} backing_store_info;
+
+
+/*
+ * Initial opening of a backing-store object.  This must fill in the
+ * read/write/close pointers in the object.  The read/write routines
+ * may take an error exit if the specified maximum file size is exceeded.
+ * (If jpeg_mem_available always returns a large value, this routine can
+ * just take an error exit.)
+ */
+
+EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
+					  backing_store_ptr info,
+					  long total_bytes_needed));
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  jpeg_mem_init will be called before anything is
+ * allocated (and, therefore, nothing in cinfo is of use except the error
+ * manager pointer).  It should return a suitable default value for
+ * max_memory_to_use; this may subsequently be overridden by the surrounding
+ * application.  (Note that max_memory_to_use is only important if
+ * jpeg_mem_available chooses to consult it ... no one else will.)
+ * jpeg_mem_term may assume that all requested memory has been freed and that
+ * all opened backing-store objects have been closed.
+ */
+
+EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
diff --git a/plugins/AdvaImg/src/LibJPEG/jmorecfg.h b/plugins/AdvaImg/src/LibJPEG/jmorecfg.h
index 6c085c36a6..f2600cc8ff 100644
--- a/plugins/AdvaImg/src/LibJPEG/jmorecfg.h
+++ b/plugins/AdvaImg/src/LibJPEG/jmorecfg.h
@@ -1,369 +1,390 @@
-/*
- * jmorecfg.h
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains additional configuration options that customize the
- * JPEG software for special applications or support machine-dependent
- * optimizations.  Most users will not need to touch this file.
- */
-
-
-/*
- * Define BITS_IN_JSAMPLE as either
- *   8   for 8-bit sample values (the usual setting)
- *   12  for 12-bit sample values
- * Only 8 and 12 are legal data precisions for lossy JPEG according to the
- * JPEG standard, and the IJG code does not support anything else!
- * We do not support run-time selection of data precision, sorry.
- */
-
-#define BITS_IN_JSAMPLE  8	/* use 8 or 12 */
-
-
-/*
- * Maximum number of components (color channels) allowed in JPEG image.
- * To meet the letter of the JPEG spec, set this to 255.  However, darn
- * few applications need more than 4 channels (maybe 5 for CMYK + alpha
- * mask).  We recommend 10 as a reasonable compromise; use 4 if you are
- * really short on memory.  (Each allowed component costs a hundred or so
- * bytes of storage, whether actually used in an image or not.)
- */
-
-#define MAX_COMPONENTS  10	/* maximum number of image components */
-
-
-/*
- * Basic data types.
- * You may need to change these if you have a machine with unusual data
- * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
- * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
- * but it had better be at least 16.
- */
-
-/* Representation of a single sample (pixel element value).
- * We frequently allocate large arrays of these, so it's important to keep
- * them small.  But if you have memory to burn and access to char or short
- * arrays is very slow on your hardware, you might want to change these.
- */
-
-#if BITS_IN_JSAMPLE == 8
-/* JSAMPLE should be the smallest type that will hold the values 0..255.
- * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
- */
-
-#ifdef HAVE_UNSIGNED_CHAR
-
-typedef unsigned char JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JSAMPLE;
-#ifdef CHAR_IS_UNSIGNED
-#define GETJSAMPLE(value)  ((int) (value))
-#else
-#define GETJSAMPLE(value)  ((int) (value) & 0xFF)
-#endif /* CHAR_IS_UNSIGNED */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
-#define MAXJSAMPLE	255
-#define CENTERJSAMPLE	128
-
-#endif /* BITS_IN_JSAMPLE == 8 */
-
-
-#if BITS_IN_JSAMPLE == 12
-/* JSAMPLE should be the smallest type that will hold the values 0..4095.
- * On nearly all machines "short" will do nicely.
- */
-
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#define MAXJSAMPLE	4095
-#define CENTERJSAMPLE	2048
-
-#endif /* BITS_IN_JSAMPLE == 12 */
-
-
-/* Representation of a DCT frequency coefficient.
- * This should be a signed value of at least 16 bits; "short" is usually OK.
- * Again, we allocate large arrays of these, but you can change to int
- * if you have memory to burn and "short" is really slow.
- */
-
-typedef short JCOEF;
-
-
-/* Compressed datastreams are represented as arrays of JOCTET.
- * These must be EXACTLY 8 bits wide, at least once they are written to
- * external storage.  Note that when using the stdio data source/destination
- * managers, this is also the data type passed to fread/fwrite.
- */
-
-#ifdef HAVE_UNSIGNED_CHAR
-
-typedef unsigned char JOCTET;
-#define GETJOCTET(value)  (value)
-
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JOCTET;
-#ifdef CHAR_IS_UNSIGNED
-#define GETJOCTET(value)  (value)
-#else
-#define GETJOCTET(value)  ((value) & 0xFF)
-#endif /* CHAR_IS_UNSIGNED */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
-
-/* These typedefs are used for various table entries and so forth.
- * They must be at least as wide as specified; but making them too big
- * won't cost a huge amount of memory, so we don't provide special
- * extraction code like we did for JSAMPLE.  (In other words, these
- * typedefs live at a different point on the speed/space tradeoff curve.)
- */
-
-/* UINT8 must hold at least the values 0..255. */
-
-#ifdef HAVE_UNSIGNED_CHAR
-typedef unsigned char UINT8;
-#else /* not HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
-typedef char UINT8;
-#else /* not CHAR_IS_UNSIGNED */
-typedef short UINT8;
-#endif /* CHAR_IS_UNSIGNED */
-#endif /* HAVE_UNSIGNED_CHAR */
-
-/* UINT16 must hold at least the values 0..65535. */
-
-#ifdef HAVE_UNSIGNED_SHORT
-typedef unsigned short UINT16;
-#else /* not HAVE_UNSIGNED_SHORT */
-typedef unsigned int UINT16;
-#endif /* HAVE_UNSIGNED_SHORT */
-
-/* INT16 must hold at least the values -32768..32767. */
-
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
-typedef short INT16;
-#endif
-
-/* INT32 must hold at least signed 32-bit values. */
-
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
-#ifndef _BASETSD_H_		/* Microsoft defines it in basetsd.h */
-#ifndef _BASETSD_H		/* MinGW is slightly different */
-#ifndef QGLOBAL_H		/* Qt defines it in qglobal.h */
-typedef long INT32;
-#endif
-#endif
-#endif
-#endif
-
-/* Datatype used for image dimensions.  The JPEG standard only supports
- * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
- * "unsigned int" is sufficient on all machines.  However, if you need to
- * handle larger images and you don't mind deviating from the spec, you
- * can change this datatype.
- */
-
-typedef unsigned int JDIMENSION;
-
-#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
-
-
-/* These macros are used in all function definitions and extern declarations.
- * You could modify them if you need to change function linkage conventions;
- * in particular, you'll need to do that to make the library a Windows DLL.
- * Another application is to make all functions global for use with debuggers
- * or code profilers that require it.
- */
-
-/* a function called through method pointers: */
-#define METHODDEF(type)		static type
-/* a function used only in its module: */
-#define LOCAL(type)		static type
-/* a function referenced thru EXTERNs: */
-#define GLOBAL(type)		type
-/* a reference to a GLOBAL function: */
-#define EXTERN(type)		extern type
-
-
-/* This macro is used to declare a "method", that is, a function pointer.
- * We want to supply prototype parameters if the compiler can cope.
- * Note that the arglist parameter must be parenthesized!
- * Again, you can customize this if you need special linkage keywords.
- */
-
-#ifdef HAVE_PROTOTYPES
-#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
-#else
-#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
-#endif
-
-
-/* Here is the pseudo-keyword for declaring pointers that must be "far"
- * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
- * by just saying "FAR *" where such a pointer is needed.  In a few places
- * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
- */
-
-#ifndef FAR
-#ifdef NEED_FAR_POINTERS
-#define FAR  far
-#else
-#define FAR
-#endif
-#endif
-
-
-/*
- * On a few systems, type boolean and/or its values FALSE, TRUE may appear
- * in standard header files.  Or you may have conflicts with application-
- * specific header files that you want to include together with these files.
- * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
- */
-
-#ifndef HAVE_BOOLEAN
-typedef int boolean;
-#endif
-#ifndef FALSE			/* in case these macros already exist */
-#define FALSE	0		/* values of boolean */
-#endif
-#ifndef TRUE
-#define TRUE	1
-#endif
-
-
-/*
- * The remaining options affect code selection within the JPEG library,
- * but they don't need to be visible to most applications using the library.
- * To minimize application namespace pollution, the symbols won't be
- * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
- */
-
-#ifdef JPEG_INTERNALS
-#define JPEG_INTERNAL_OPTIONS
-#endif
-
-#ifdef JPEG_INTERNAL_OPTIONS
-
-
-/*
- * These defines indicate whether to include various optional functions.
- * Undefining some of these symbols will produce a smaller but less capable
- * library.  Note that you can leave certain source files out of the
- * compilation/linking process if you've #undef'd the corresponding symbols.
- * (You may HAVE to do that if your compiler doesn't like null source files.)
- */
-
-/* Capability options common to encoder and decoder: */
-
-#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
-#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
-#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
-
-/* Encoder capability options: */
-
-#define C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
-#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
-#define DCT_SCALING_SUPPORTED	    /* Input rescaling via DCT? (Requires DCT_ISLOW)*/
-#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
-/* Note: if you selected 12-bit data precision, it is dangerous to turn off
- * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
- * precision, so jchuff.c normally uses entropy optimization to compute
- * usable tables for higher precision.  If you don't want to do optimization,
- * you'll have to supply different default Huffman tables.
- * The exact same statements apply for progressive JPEG: the default tables
- * don't work for progressive mode.  (This may get fixed, however.)
- */
-#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
-
-/* Decoder capability options: */
-
-#define D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
-#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
-#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? */
-#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
-#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
-#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
-#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
-#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
-#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
-
-/* more capability options later, no doubt */
-
-
-/*
- * Ordering of RGB data in scanlines passed to or from the application.
- * If your application wants to deal with data in the order B,G,R, just
- * change these macros.  You can also deal with formats such as R,G,B,X
- * (one extra byte per pixel) by changing RGB_PIXELSIZE.  Note that changing
- * the offsets will also change the order in which colormap data is organized.
- * RESTRICTIONS:
- * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
- * 2. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
- *    is not 3 (they don't understand about dummy color components!).  So you
- *    can't use color quantization if you change that value.
- */
-
-#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
-#define RGB_GREEN	1	/* Offset of Green */
-#define RGB_BLUE	2	/* Offset of Blue */
-#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
-
-
-/* Definitions for speed-related optimizations. */
-
-
-/* If your compiler supports inline functions, define INLINE
- * as the inline keyword; otherwise define it as empty.
- */
-
-#ifndef INLINE
-#ifdef __GNUC__			/* for instance, GNU C knows about inline */
-#define INLINE __inline__
-#endif
-#ifndef INLINE
-#define INLINE			/* default is to define it as empty */
-#endif
-#endif
-
-
-/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
- * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
- * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
- */
-
-#ifndef MULTIPLIER
-#define MULTIPLIER  int		/* type for fastest integer multiply */
-#endif
-
-
-/* FAST_FLOAT should be either float or double, whichever is done faster
- * by your compiler.  (Note that this type is only used in the floating point
- * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
- * Typically, float is faster in ANSI C compilers, while double is faster in
- * pre-ANSI compilers (because they insist on converting to double anyway).
- * The code below therefore chooses float if we have ANSI-style prototypes.
- */
-
-#ifndef FAST_FLOAT
-#ifdef HAVE_PROTOTYPES
-#define FAST_FLOAT  float
-#else
-#define FAST_FLOAT  double
-#endif
-#endif
-
-#endif /* JPEG_INTERNAL_OPTIONS */
+/*
+ * jmorecfg.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 1997-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains additional configuration options that customize the
+ * JPEG software for special applications or support machine-dependent
+ * optimizations.  Most users will not need to touch this file.
+ */
+
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   12  for 12-bit sample values
+ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
+ * JPEG standard, and the IJG code does not support anything else!
+ * We do not support run-time selection of data precision, sorry.
+ */
+
+#define BITS_IN_JSAMPLE  8	/* use 8 or 12 */
+
+
+/*
+ * Maximum number of components (color channels) allowed in JPEG image.
+ * To meet the letter of the JPEG spec, set this to 255.  However, darn
+ * few applications need more than 4 channels (maybe 5 for CMYK + alpha
+ * mask).  We recommend 10 as a reasonable compromise; use 4 if you are
+ * really short on memory.  (Each allowed component costs a hundred or so
+ * bytes of storage, whether actually used in an image or not.)
+ */
+
+#define MAX_COMPONENTS  10	/* maximum number of image components */
+
+
+/*
+ * Basic data types.
+ * You may need to change these if you have a machine with unusual data
+ * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
+ * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
+ * but it had better be at least 16.
+ */
+
+/* Representation of a single sample (pixel element value).
+ * We frequently allocate large arrays of these, so it's important to keep
+ * them small.  But if you have memory to burn and access to char or short
+ * arrays is very slow on your hardware, you might want to change these.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+/* JSAMPLE should be the smallest type that will hold the values 0..255.
+ * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JSAMPLE;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJSAMPLE(value)  ((int) (value))
+#else
+#define GETJSAMPLE(value)  ((int) (value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+#define MAXJSAMPLE	255
+#define CENTERJSAMPLE	128
+
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE == 12
+/* JSAMPLE should be the smallest type that will hold the values 0..4095.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	4095
+#define CENTERJSAMPLE	2048
+
+#endif /* BITS_IN_JSAMPLE == 12 */
+
+
+/* Representation of a DCT frequency coefficient.
+ * This should be a signed value of at least 16 bits; "short" is usually OK.
+ * Again, we allocate large arrays of these, but you can change to int
+ * if you have memory to burn and "short" is really slow.
+ */
+
+typedef short JCOEF;
+
+
+/* Compressed datastreams are represented as arrays of JOCTET.
+ * These must be EXACTLY 8 bits wide, at least once they are written to
+ * external storage.  Note that when using the stdio data source/destination
+ * managers, this is also the data type passed to fread/fwrite.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JOCTET;
+#define GETJOCTET(value)  (value)
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JOCTET;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJOCTET(value)  (value)
+#else
+#define GETJOCTET(value)  ((value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+
+/* These typedefs are used for various table entries and so forth.
+ * They must be at least as wide as specified; but making them too big
+ * won't cost a huge amount of memory, so we don't provide special
+ * extraction code like we did for JSAMPLE.  (In other words, these
+ * typedefs live at a different point on the speed/space tradeoff curve.)
+ */
+
+/* UINT8 must hold at least the values 0..255. */
+
+#ifdef HAVE_UNSIGNED_CHAR
+typedef unsigned char UINT8;
+#else /* not HAVE_UNSIGNED_CHAR */
+#ifdef CHAR_IS_UNSIGNED
+typedef char UINT8;
+#else /* not CHAR_IS_UNSIGNED */
+typedef short UINT8;
+#endif /* CHAR_IS_UNSIGNED */
+#endif /* HAVE_UNSIGNED_CHAR */
+
+/* UINT16 must hold at least the values 0..65535. */
+
+#ifdef HAVE_UNSIGNED_SHORT
+typedef unsigned short UINT16;
+#else /* not HAVE_UNSIGNED_SHORT */
+typedef unsigned int UINT16;
+#endif /* HAVE_UNSIGNED_SHORT */
+
+/* INT16 must hold at least the values -32768..32767. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
+typedef short INT16;
+#endif
+
+/* INT32 must hold at least signed 32-bit values. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
+#ifndef _BASETSD_H_		/* Microsoft defines it in basetsd.h */
+#ifndef _BASETSD_H		/* MinGW is slightly different */
+#ifndef QGLOBAL_H		/* Qt defines it in qglobal.h */
+typedef long INT32;
+#endif
+#endif
+#endif
+#endif
+
+/* Datatype used for image dimensions.  The JPEG standard only supports
+ * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
+ * "unsigned int" is sufficient on all machines.  However, if you need to
+ * handle larger images and you don't mind deviating from the spec, you
+ * can change this datatype.
+ */
+
+typedef unsigned int JDIMENSION;
+
+#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
+
+
+/* These macros are used in all function definitions and extern declarations.
+ * You could modify them if you need to change function linkage conventions;
+ * in particular, you'll need to do that to make the library a Windows DLL.
+ * Another application is to make all functions global for use with debuggers
+ * or code profilers that require it.
+ */
+
+/* a function called through method pointers: */
+#define METHODDEF(type)		static type
+/* a function used only in its module: */
+#define LOCAL(type)		static type
+/* a function referenced thru EXTERNs: */
+#define GLOBAL(type)		type
+/* a reference to a GLOBAL function: */
+#define EXTERN(type)		extern type
+
+
+/* This macro is used to declare a "method", that is, a function pointer.
+ * We want to supply prototype parameters if the compiler can cope.
+ * Note that the arglist parameter must be parenthesized!
+ * Again, you can customize this if you need special linkage keywords.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
+#else
+#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
+#endif
+
+
+/* The noreturn type identifier is used to declare functions
+ * which cannot return.
+ * Compilers can thus create more optimized code and perform
+ * better checks for warnings and errors.
+ * Static analyzer tools can make improved inferences about
+ * execution paths and are prevented from giving false alerts.
+ *
+ * Unfortunately, the proposed specifications of corresponding
+ * extensions in the Dec 2011 ISO C standard revision (C11),
+ * GCC, MSVC, etc. are not viable.
+ * Thus we introduce a user defined type to declare noreturn
+ * functions at least for clarity.  A proper compiler would
+ * have a suitable noreturn type to match in place of void.
+ */
+
+#ifndef HAVE_NORETURN_T
+typedef void noreturn_t;
+#endif
+
+
+/* Here is the pseudo-keyword for declaring pointers that must be "far"
+ * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
+ * by just saying "FAR *" where such a pointer is needed.  In a few places
+ * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
+ */
+
+#ifndef FAR
+#ifdef NEED_FAR_POINTERS
+#define FAR  far
+#else
+#define FAR
+#endif
+#endif
+
+
+/*
+ * On a few systems, type boolean and/or its values FALSE, TRUE may appear
+ * in standard header files.  Or you may have conflicts with application-
+ * specific header files that you want to include together with these files.
+ * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
+ */
+
+#ifdef HAVE_BOOLEAN
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
+#else
+typedef enum { FALSE = 0, TRUE = 1 } boolean;
+#endif
+
+
+/*
+ * The remaining options affect code selection within the JPEG library,
+ * but they don't need to be visible to most applications using the library.
+ * To minimize application namespace pollution, the symbols won't be
+ * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
+ */
+
+#ifdef JPEG_INTERNALS
+#define JPEG_INTERNAL_OPTIONS
+#endif
+
+#ifdef JPEG_INTERNAL_OPTIONS
+
+
+/*
+ * These defines indicate whether to include various optional functions.
+ * Undefining some of these symbols will produce a smaller but less capable
+ * library.  Note that you can leave certain source files out of the
+ * compilation/linking process if you've #undef'd the corresponding symbols.
+ * (You may HAVE to do that if your compiler doesn't like null source files.)
+ */
+
+/* Capability options common to encoder and decoder: */
+
+#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
+#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
+#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
+
+/* Encoder capability options: */
+
+#define C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define DCT_SCALING_SUPPORTED	    /* Input rescaling via DCT? (Requires DCT_ISLOW)*/
+#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
+/* Note: if you selected 12-bit data precision, it is dangerous to turn off
+ * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
+ * precision, so jchuff.c normally uses entropy optimization to compute
+ * usable tables for higher precision.  If you don't want to do optimization,
+ * you'll have to supply different default Huffman tables.
+ * The exact same statements apply for progressive JPEG: the default tables
+ * don't work for progressive mode.  (This may get fixed, however.)
+ */
+#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
+
+/* Decoder capability options: */
+
+#define D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? */
+#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
+#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
+#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
+#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
+#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
+#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
+
+/* more capability options later, no doubt */
+
+
+/*
+ * Ordering of RGB data in scanlines passed to or from the application.
+ * If your application wants to deal with data in the order B,G,R, just
+ * change these macros.  You can also deal with formats such as R,G,B,X
+ * (one extra byte per pixel) by changing RGB_PIXELSIZE.  Note that changing
+ * the offsets will also change the order in which colormap data is organized.
+ * RESTRICTIONS:
+ * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
+ * 2. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
+ *    is not 3 (they don't understand about dummy color components!).  So you
+ *    can't use color quantization if you change that value.
+ */
+
+#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
+#define RGB_GREEN	1	/* Offset of Green */
+#define RGB_BLUE	2	/* Offset of Blue */
+#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
+
+
+/* Definitions for speed-related optimizations. */
+
+
+/* If your compiler supports inline functions, define INLINE
+ * as the inline keyword; otherwise define it as empty.
+ */
+
+#ifndef INLINE
+#ifdef __GNUC__			/* for instance, GNU C knows about inline */
+#define INLINE __inline__
+#endif
+#ifndef INLINE
+#define INLINE			/* default is to define it as empty */
+#endif
+#endif
+
+
+/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
+ * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
+ * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
+ */
+
+#ifndef MULTIPLIER
+#define MULTIPLIER  int		/* type for fastest integer multiply */
+#endif
+
+
+/* FAST_FLOAT should be either float or double, whichever is done faster
+ * by your compiler.  (Note that this type is only used in the floating point
+ * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
+ * Typically, float is faster in ANSI C compilers, while double is faster in
+ * pre-ANSI compilers (because they insist on converting to double anyway).
+ * The code below therefore chooses float if we have ANSI-style prototypes.
+ */
+
+#ifndef FAST_FLOAT
+#ifdef HAVE_PROTOTYPES
+#define FAST_FLOAT  float
+#else
+#define FAST_FLOAT  double
+#endif
+#endif
+
+#endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/plugins/AdvaImg/src/LibJPEG/jpegint.h b/plugins/AdvaImg/src/LibJPEG/jpegint.h
index c0d5c14202..18404efd00 100644
--- a/plugins/AdvaImg/src/LibJPEG/jpegint.h
+++ b/plugins/AdvaImg/src/LibJPEG/jpegint.h
@@ -1,426 +1,426 @@
-/*
- * jpegint.h
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides common declarations for the various JPEG modules.
- * These declarations are considered internal to the JPEG library; most
- * applications using the library shouldn't need to include this file.
- */
-
-
-/* Declarations for both compression & decompression */
-
-typedef enum {			/* Operating modes for buffer controllers */
-	JBUF_PASS_THRU,		/* Plain stripwise operation */
-	/* Remaining modes require a full-image buffer to have been created */
-	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
-	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
-	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
-} J_BUF_MODE;
-
-/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
-#define CSTATE_START	100	/* after create_compress */
-#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
-#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
-#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
-#define DSTATE_START	200	/* after create_decompress */
-#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
-#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
-#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
-#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
-#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
-#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
-#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
-#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
-#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
-#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
-
-
-/* Declarations for compression modules */
-
-/* Master control module */
-struct jpeg_comp_master {
-  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
-  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
-
-  /* State variables made visible to other modules */
-  boolean call_pass_startup;	/* True if pass_startup must be called */
-  boolean is_last_pass;		/* True during last pass */
-};
-
-/* Main buffer control (downsampled-data buffer) */
-struct jpeg_c_main_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, process_data, (j_compress_ptr cinfo,
-			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			       JDIMENSION in_rows_avail));
-};
-
-/* Compression preprocessing (downsampling input buffer control) */
-struct jpeg_c_prep_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
-				   JSAMPARRAY input_buf,
-				   JDIMENSION *in_row_ctr,
-				   JDIMENSION in_rows_avail,
-				   JSAMPIMAGE output_buf,
-				   JDIMENSION *out_row_group_ctr,
-				   JDIMENSION out_row_groups_avail));
-};
-
-/* Coefficient buffer control */
-struct jpeg_c_coef_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
-				   JSAMPIMAGE input_buf));
-};
-
-/* Colorspace conversion */
-struct jpeg_color_converter {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
-				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-				JDIMENSION output_row, int num_rows));
-};
-
-/* Downsampling */
-struct jpeg_downsampler {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, downsample, (j_compress_ptr cinfo,
-			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-			     JSAMPIMAGE output_buf,
-			     JDIMENSION out_row_group_index));
-
-  boolean need_context_rows;	/* TRUE if need rows above & below */
-};
-
-/* Forward DCT (also controls coefficient quantization) */
-typedef JMETHOD(void, forward_DCT_ptr,
-		(j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-		 JDIMENSION start_row, JDIMENSION start_col,
-		 JDIMENSION num_blocks));
-
-struct jpeg_forward_dct {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  /* It is useful to allow each component to have a separate FDCT method. */
-  forward_DCT_ptr forward_DCT[MAX_COMPONENTS];
-};
-
-/* Entropy encoding */
-struct jpeg_entropy_encoder {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
-  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKROW *MCU_data));
-  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
-};
-
-/* Marker writing */
-struct jpeg_marker_writer {
-  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
-  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
-  /* These routines are exported to allow insertion of extra markers */
-  /* Probably only COM and APPn markers should be written this way */
-  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
-				      unsigned int datalen));
-  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
-};
-
-
-/* Declarations for decompression modules */
-
-/* Master control module */
-struct jpeg_decomp_master {
-  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
-
-  /* State variables made visible to other modules */
-  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
-};
-
-/* Input control module */
-struct jpeg_input_controller {
-  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
-  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
-  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
-
-  /* State variables made visible to other modules */
-  boolean has_multiple_scans;	/* True if file has multiple scans */
-  boolean eoi_reached;		/* True when EOI has been consumed */
-};
-
-/* Main buffer control (downsampled-data buffer) */
-struct jpeg_d_main_controller {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
-			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			       JDIMENSION out_rows_avail));
-};
-
-/* Coefficient buffer control */
-struct jpeg_d_coef_controller {
-  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
-  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
-  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
-  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
-				 JSAMPIMAGE output_buf));
-  /* Pointer to array of coefficient virtual arrays, or NULL if none */
-  jvirt_barray_ptr *coef_arrays;
-};
-
-/* Decompression postprocessing (color quantization buffer control) */
-struct jpeg_d_post_controller {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
-				    JSAMPIMAGE input_buf,
-				    JDIMENSION *in_row_group_ctr,
-				    JDIMENSION in_row_groups_avail,
-				    JSAMPARRAY output_buf,
-				    JDIMENSION *out_row_ctr,
-				    JDIMENSION out_rows_avail));
-};
-
-/* Marker reading & parsing */
-struct jpeg_marker_reader {
-  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
-  /* Read markers until SOS or EOI.
-   * Returns same codes as are defined for jpeg_consume_input:
-   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
-   */
-  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
-  /* Read a restart marker --- exported for use by entropy decoder only */
-  jpeg_marker_parser_method read_restart_marker;
-
-  /* State of marker reader --- nominally internal, but applications
-   * supplying COM or APPn handlers might like to know the state.
-   */
-  boolean saw_SOI;		/* found SOI? */
-  boolean saw_SOF;		/* found SOF? */
-  int next_restart_num;		/* next restart number expected (0-7) */
-  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
-};
-
-/* Entropy decoding */
-struct jpeg_entropy_decoder {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
-				JBLOCKROW *MCU_data));
-};
-
-/* Inverse DCT (also performs dequantization) */
-typedef JMETHOD(void, inverse_DCT_method_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col));
-
-struct jpeg_inverse_dct {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  /* It is useful to allow each component to have a separate IDCT method. */
-  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
-};
-
-/* Upsampling (note that upsampler must also call color converter) */
-struct jpeg_upsampler {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf,
-			   JDIMENSION *in_row_group_ctr,
-			   JDIMENSION in_row_groups_avail,
-			   JSAMPARRAY output_buf,
-			   JDIMENSION *out_row_ctr,
-			   JDIMENSION out_rows_avail));
-
-  boolean need_context_rows;	/* TRUE if need rows above & below */
-};
-
-/* Colorspace conversion */
-struct jpeg_color_deconverter {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
-				JSAMPIMAGE input_buf, JDIMENSION input_row,
-				JSAMPARRAY output_buf, int num_rows));
-};
-
-/* Color quantization or color precision reduction */
-struct jpeg_color_quantizer {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
-  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
-				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
-				 int num_rows));
-  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
-};
-
-
-/* Miscellaneous useful macros */
-
-#undef MAX
-#define MAX(a,b)	((a) > (b) ? (a) : (b))
-#undef MIN
-#define MIN(a,b)	((a) < (b) ? (a) : (b))
-
-
-/* We assume that right shift corresponds to signed division by 2 with
- * rounding towards minus infinity.  This is correct for typical "arithmetic
- * shift" instructions that shift in copies of the sign bit.  But some
- * C compilers implement >> with an unsigned shift.  For these machines you
- * must define RIGHT_SHIFT_IS_UNSIGNED.
- * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
- * It is only applied with constant shift counts.  SHIFT_TEMPS must be
- * included in the variables of any routine using RIGHT_SHIFT.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define SHIFT_TEMPS	INT32 shift_temp;
-#define RIGHT_SHIFT(x,shft)  \
-	((shift_temp = (x)) < 0 ? \
-	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
-	 (shift_temp >> (shft)))
-#else
-#define SHIFT_TEMPS
-#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jinit_compress_master	jICompress
-#define jinit_c_master_control	jICMaster
-#define jinit_c_main_controller	jICMainC
-#define jinit_c_prep_controller	jICPrepC
-#define jinit_c_coef_controller	jICCoefC
-#define jinit_color_converter	jICColor
-#define jinit_downsampler	jIDownsampler
-#define jinit_forward_dct	jIFDCT
-#define jinit_huff_encoder	jIHEncoder
-#define jinit_arith_encoder	jIAEncoder
-#define jinit_marker_writer	jIMWriter
-#define jinit_master_decompress	jIDMaster
-#define jinit_d_main_controller	jIDMainC
-#define jinit_d_coef_controller	jIDCoefC
-#define jinit_d_post_controller	jIDPostC
-#define jinit_input_controller	jIInCtlr
-#define jinit_marker_reader	jIMReader
-#define jinit_huff_decoder	jIHDecoder
-#define jinit_arith_decoder	jIADecoder
-#define jinit_inverse_dct	jIIDCT
-#define jinit_upsampler		jIUpsampler
-#define jinit_color_deconverter	jIDColor
-#define jinit_1pass_quantizer	jI1Quant
-#define jinit_2pass_quantizer	jI2Quant
-#define jinit_merged_upsampler	jIMUpsampler
-#define jinit_memory_mgr	jIMemMgr
-#define jdiv_round_up		jDivRound
-#define jround_up		jRound
-#define jzero_far		jZeroFar
-#define jcopy_sample_rows	jCopySamples
-#define jcopy_block_row		jCopyBlocks
-#define jpeg_zigzag_order	jZIGTable
-#define jpeg_natural_order	jZAGTable
-#define jpeg_natural_order7	jZAG7Table
-#define jpeg_natural_order6	jZAG6Table
-#define jpeg_natural_order5	jZAG5Table
-#define jpeg_natural_order4	jZAG4Table
-#define jpeg_natural_order3	jZAG3Table
-#define jpeg_natural_order2	jZAG2Table
-#define jpeg_aritab		jAriTab
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
- * and coefficient-block arrays.  This won't work on 80x86 because the arrays
- * are FAR and we're assuming a small-pointer memory model.  However, some
- * DOS compilers provide far-pointer versions of memcpy() and memset() even
- * in the small-model libraries.  These will be used if USE_FMEM is defined.
- * Otherwise, the routines in jutils.c do it the hard way.
- */
-
-#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
-#define FMEMZERO(target,size)	MEMZERO(target,size)
-#else				/* 80x86 case */
-#ifdef USE_FMEM
-#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
-#else
-EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
-#define FMEMZERO(target,size)	jzero_far(target, size)
-#endif
-#endif
-
-
-/* Compression module initialization routines */
-EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
-					 boolean transcode_only));
-EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
-/* Decompression module initialization routines */
-EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
-/* Memory manager initialization */
-EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
-
-/* Utility routines in jutils.c */
-EXTERN(long) jdiv_round_up JPP((long a, long b));
-EXTERN(long) jround_up JPP((long a, long b));
-EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
-				    JSAMPARRAY output_array, int dest_row,
-				    int num_rows, JDIMENSION num_cols));
-EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
-				  JDIMENSION num_blocks));
-/* Constant tables in jutils.c */
-#if 0				/* This table is not actually needed in v6a */
-extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
-#endif
-extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
-extern const int jpeg_natural_order7[]; /* zz to natural order for 7x7 block */
-extern const int jpeg_natural_order6[]; /* zz to natural order for 6x6 block */
-extern const int jpeg_natural_order5[]; /* zz to natural order for 5x5 block */
-extern const int jpeg_natural_order4[]; /* zz to natural order for 4x4 block */
-extern const int jpeg_natural_order3[]; /* zz to natural order for 3x3 block */
-extern const int jpeg_natural_order2[]; /* zz to natural order for 2x2 block */
-
-/* Arithmetic coding probability estimation tables in jaricom.c */
-extern const INT32 jpeg_aritab[];
-
-/* Suppress undefined-structure complaints if necessary. */
-
-#ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
-struct jvirt_sarray_control { long dummy; };
-struct jvirt_barray_control { long dummy; };
-#endif
-#endif /* INCOMPLETE_TYPES_BROKEN */
+/*
+ * jpegint.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 1997-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides common declarations for the various JPEG modules.
+ * These declarations are considered internal to the JPEG library; most
+ * applications using the library shouldn't need to include this file.
+ */
+
+
+/* Declarations for both compression & decompression */
+
+typedef enum {			/* Operating modes for buffer controllers */
+	JBUF_PASS_THRU,		/* Plain stripwise operation */
+	/* Remaining modes require a full-image buffer to have been created */
+	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
+	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
+	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
+} J_BUF_MODE;
+
+/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
+#define CSTATE_START	100	/* after create_compress */
+#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
+#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
+#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
+#define DSTATE_START	200	/* after create_decompress */
+#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
+#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
+#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
+#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
+#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
+#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
+#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
+#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
+#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
+#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
+
+
+/* Declarations for compression modules */
+
+/* Master control module */
+struct jpeg_comp_master {
+  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean call_pass_startup;	/* True if pass_startup must be called */
+  boolean is_last_pass;		/* True during last pass */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_c_main_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_compress_ptr cinfo,
+			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			       JDIMENSION in_rows_avail));
+};
+
+/* Compression preprocessing (downsampling input buffer control) */
+struct jpeg_c_prep_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
+				   JSAMPARRAY input_buf,
+				   JDIMENSION *in_row_ctr,
+				   JDIMENSION in_rows_avail,
+				   JSAMPIMAGE output_buf,
+				   JDIMENSION *out_row_group_ctr,
+				   JDIMENSION out_row_groups_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_c_coef_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
+				   JSAMPIMAGE input_buf));
+};
+
+/* Colorspace conversion */
+struct jpeg_color_converter {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
+				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+				JDIMENSION output_row, int num_rows));
+};
+
+/* Downsampling */
+struct jpeg_downsampler {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, downsample, (j_compress_ptr cinfo,
+			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+			     JSAMPIMAGE output_buf,
+			     JDIMENSION out_row_group_index));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Forward DCT (also controls coefficient quantization) */
+typedef JMETHOD(void, forward_DCT_ptr,
+		(j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+		 JDIMENSION start_row, JDIMENSION start_col,
+		 JDIMENSION num_blocks));
+
+struct jpeg_forward_dct {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  /* It is useful to allow each component to have a separate FDCT method. */
+  forward_DCT_ptr forward_DCT[MAX_COMPONENTS];
+};
+
+/* Entropy encoding */
+struct jpeg_entropy_encoder {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
+  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKROW *MCU_data));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+};
+
+/* Marker writing */
+struct jpeg_marker_writer {
+  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
+  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
+  /* These routines are exported to allow insertion of extra markers */
+  /* Probably only COM and APPn markers should be written this way */
+  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
+				      unsigned int datalen));
+  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
+};
+
+
+/* Declarations for decompression modules */
+
+/* Master control module */
+struct jpeg_decomp_master {
+  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
+};
+
+/* Input control module */
+struct jpeg_input_controller {
+  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
+  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean has_multiple_scans;	/* True if file has multiple scans */
+  boolean eoi_reached;		/* True when EOI has been consumed */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_d_main_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
+			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			       JDIMENSION out_rows_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_d_coef_controller {
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
+				 JSAMPIMAGE output_buf));
+  /* Pointer to array of coefficient virtual arrays, or NULL if none */
+  jvirt_barray_ptr *coef_arrays;
+};
+
+/* Decompression postprocessing (color quantization buffer control) */
+struct jpeg_d_post_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
+				    JSAMPIMAGE input_buf,
+				    JDIMENSION *in_row_group_ctr,
+				    JDIMENSION in_row_groups_avail,
+				    JSAMPARRAY output_buf,
+				    JDIMENSION *out_row_ctr,
+				    JDIMENSION out_rows_avail));
+};
+
+/* Marker reading & parsing */
+struct jpeg_marker_reader {
+  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
+  /* Read markers until SOS or EOI.
+   * Returns same codes as are defined for jpeg_consume_input:
+   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+   */
+  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
+  /* Read a restart marker --- exported for use by entropy decoder only */
+  jpeg_marker_parser_method read_restart_marker;
+
+  /* State of marker reader --- nominally internal, but applications
+   * supplying COM or APPn handlers might like to know the state.
+   */
+  boolean saw_SOI;		/* found SOI? */
+  boolean saw_SOF;		/* found SOF? */
+  int next_restart_num;		/* next restart number expected (0-7) */
+  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
+};
+
+/* Entropy decoding */
+struct jpeg_entropy_decoder {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
+				JBLOCKROW *MCU_data));
+};
+
+/* Inverse DCT (also performs dequantization) */
+typedef JMETHOD(void, inverse_DCT_method_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col));
+
+struct jpeg_inverse_dct {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  /* It is useful to allow each component to have a separate IDCT method. */
+  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
+};
+
+/* Upsampling (note that upsampler must also call color converter) */
+struct jpeg_upsampler {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf,
+			   JDIMENSION *in_row_group_ctr,
+			   JDIMENSION in_row_groups_avail,
+			   JSAMPARRAY output_buf,
+			   JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Colorspace conversion */
+struct jpeg_color_deconverter {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
+				JSAMPIMAGE input_buf, JDIMENSION input_row,
+				JSAMPARRAY output_buf, int num_rows));
+};
+
+/* Color quantization or color precision reduction */
+struct jpeg_color_quantizer {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
+  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
+				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
+				 int num_rows));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
+};
+
+
+/* Miscellaneous useful macros */
+
+#undef MAX
+#define MAX(a,b)	((a) > (b) ? (a) : (b))
+#undef MIN
+#define MIN(a,b)	((a) < (b) ? (a) : (b))
+
+
+/* We assume that right shift corresponds to signed division by 2 with
+ * rounding towards minus infinity.  This is correct for typical "arithmetic
+ * shift" instructions that shift in copies of the sign bit.  But some
+ * C compilers implement >> with an unsigned shift.  For these machines you
+ * must define RIGHT_SHIFT_IS_UNSIGNED.
+ * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
+ * It is only applied with constant shift counts.  SHIFT_TEMPS must be
+ * included in the variables of any routine using RIGHT_SHIFT.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define SHIFT_TEMPS	INT32 shift_temp;
+#define RIGHT_SHIFT(x,shft)  \
+	((shift_temp = (x)) < 0 ? \
+	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+	 (shift_temp >> (shft)))
+#else
+#define SHIFT_TEMPS
+#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jinit_compress_master	jICompress
+#define jinit_c_master_control	jICMaster
+#define jinit_c_main_controller	jICMainC
+#define jinit_c_prep_controller	jICPrepC
+#define jinit_c_coef_controller	jICCoefC
+#define jinit_color_converter	jICColor
+#define jinit_downsampler	jIDownsampler
+#define jinit_forward_dct	jIFDCT
+#define jinit_huff_encoder	jIHEncoder
+#define jinit_arith_encoder	jIAEncoder
+#define jinit_marker_writer	jIMWriter
+#define jinit_master_decompress	jIDMaster
+#define jinit_d_main_controller	jIDMainC
+#define jinit_d_coef_controller	jIDCoefC
+#define jinit_d_post_controller	jIDPostC
+#define jinit_input_controller	jIInCtlr
+#define jinit_marker_reader	jIMReader
+#define jinit_huff_decoder	jIHDecoder
+#define jinit_arith_decoder	jIADecoder
+#define jinit_inverse_dct	jIIDCT
+#define jinit_upsampler		jIUpsampler
+#define jinit_color_deconverter	jIDColor
+#define jinit_1pass_quantizer	jI1Quant
+#define jinit_2pass_quantizer	jI2Quant
+#define jinit_merged_upsampler	jIMUpsampler
+#define jinit_memory_mgr	jIMemMgr
+#define jdiv_round_up		jDivRound
+#define jround_up		jRound
+#define jzero_far		jZeroFar
+#define jcopy_sample_rows	jCopySamples
+#define jcopy_block_row		jCopyBlocks
+#define jpeg_zigzag_order	jZIGTable
+#define jpeg_natural_order	jZAGTable
+#define jpeg_natural_order7	jZAG7Table
+#define jpeg_natural_order6	jZAG6Table
+#define jpeg_natural_order5	jZAG5Table
+#define jpeg_natural_order4	jZAG4Table
+#define jpeg_natural_order3	jZAG3Table
+#define jpeg_natural_order2	jZAG2Table
+#define jpeg_aritab		jAriTab
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
+ * and coefficient-block arrays.  This won't work on 80x86 because the arrays
+ * are FAR and we're assuming a small-pointer memory model.  However, some
+ * DOS compilers provide far-pointer versions of memcpy() and memset() even
+ * in the small-model libraries.  These will be used if USE_FMEM is defined.
+ * Otherwise, the routines in jutils.c do it the hard way.
+ */
+
+#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
+#define FMEMZERO(target,size)	MEMZERO(target,size)
+#else				/* 80x86 case */
+#ifdef USE_FMEM
+#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
+#else
+EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
+#define FMEMZERO(target,size)	jzero_far(target, size)
+#endif
+#endif
+
+
+/* Compression module initialization routines */
+EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
+					 boolean transcode_only));
+EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
+/* Decompression module initialization routines */
+EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
+/* Memory manager initialization */
+EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
+
+/* Utility routines in jutils.c */
+EXTERN(long) jdiv_round_up JPP((long a, long b));
+EXTERN(long) jround_up JPP((long a, long b));
+EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
+				    JSAMPARRAY output_array, int dest_row,
+				    int num_rows, JDIMENSION num_cols));
+EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
+				  JDIMENSION num_blocks));
+/* Constant tables in jutils.c */
+#if 0				/* This table is not actually needed in v6a */
+extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
+#endif
+extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
+extern const int jpeg_natural_order7[]; /* zz to natural order for 7x7 block */
+extern const int jpeg_natural_order6[]; /* zz to natural order for 6x6 block */
+extern const int jpeg_natural_order5[]; /* zz to natural order for 5x5 block */
+extern const int jpeg_natural_order4[]; /* zz to natural order for 4x4 block */
+extern const int jpeg_natural_order3[]; /* zz to natural order for 3x3 block */
+extern const int jpeg_natural_order2[]; /* zz to natural order for 2x2 block */
+
+/* Arithmetic coding probability estimation tables in jaricom.c */
+extern const INT32 jpeg_aritab[];
+
+/* Suppress undefined-structure complaints if necessary. */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+#endif
+#endif /* INCOMPLETE_TYPES_BROKEN */
diff --git a/plugins/AdvaImg/src/LibJPEG/jpeglib.h b/plugins/AdvaImg/src/LibJPEG/jpeglib.h
index 1327cffa96..b5e85d2d73 100644
--- a/plugins/AdvaImg/src/LibJPEG/jpeglib.h
+++ b/plugins/AdvaImg/src/LibJPEG/jpeglib.h
@@ -1,1160 +1,1173 @@
-/*
- * jpeglib.h
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2002-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file defines the application interface for the JPEG library.
- * Most applications using the library need only include this file,
- * and perhaps jerror.h if they want to know the exact error codes.
- */
-
-#ifndef JPEGLIB_H
-#define JPEGLIB_H
-
-/*
- * First we include the configuration files that record how this
- * installation of the JPEG library is set up.  jconfig.h can be
- * generated automatically for many systems.  jmorecfg.h contains
- * manual configuration options that most people need not worry about.
- */
-
-#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
-#include "jconfig.h"		/* widely used configuration options */
-#endif
-#include "jmorecfg.h"		/* seldom changed options */
-
-
-#ifdef __cplusplus
-#ifndef DONT_USE_EXTERN_C
-extern "C" {
-#endif
-#endif
-
-/* Version IDs for the JPEG library.
- * Might be useful for tests like "#if JPEG_LIB_VERSION >= 80".
- */
-
-#define JPEG_LIB_VERSION        80	/* Compatibility version 8.0 */
-#define JPEG_LIB_VERSION_MAJOR  8
-#define JPEG_LIB_VERSION_MINOR  4
-
-
-/* Various constants determining the sizes of things.
- * All of these are specified by the JPEG standard, so don't change them
- * if you want to be compatible.
- */
-
-#define DCTSIZE		    8	/* The basic DCT block is 8x8 coefficients */
-#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
-#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
-#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
-#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
-#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
-#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
-/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
- * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
- * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
- * to handle it.  We even let you do this from the jconfig.h file.  However,
- * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
- * sometimes emits noncompliant files doesn't mean you should too.
- */
-#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
-#ifndef D_MAX_BLOCKS_IN_MCU
-#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
-#endif
-
-
-/* Data structures for images (arrays of samples and of DCT coefficients).
- * On 80x86 machines, the image arrays are too big for near pointers,
- * but the pointer arrays can fit in near memory.
- */
-
-typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
-typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
-typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
-
-typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
-typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
-typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
-typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
-
-typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
-
-
-/* Types for JPEG compression parameters and working tables. */
-
-
-/* DCT coefficient quantization tables. */
-
-typedef struct {
-  /* This array gives the coefficient quantizers in natural array order
-   * (not the zigzag order in which they are stored in a JPEG DQT marker).
-   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
-   */
-  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
-  /* This field is used only during compression.  It's initialized FALSE when
-   * the table is created, and set TRUE when it's been output to the file.
-   * You could suppress output of a table by setting this to TRUE.
-   * (See jpeg_suppress_tables for an example.)
-   */
-  boolean sent_table;		/* TRUE when table has been output */
-} JQUANT_TBL;
-
-
-/* Huffman coding tables. */
-
-typedef struct {
-  /* These two fields directly represent the contents of a JPEG DHT marker */
-  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
-				/* length k bits; bits[0] is unused */
-  UINT8 huffval[256];		/* The symbols, in order of incr code length */
-  /* This field is used only during compression.  It's initialized FALSE when
-   * the table is created, and set TRUE when it's been output to the file.
-   * You could suppress output of a table by setting this to TRUE.
-   * (See jpeg_suppress_tables for an example.)
-   */
-  boolean sent_table;		/* TRUE when table has been output */
-} JHUFF_TBL;
-
-
-/* Basic info about one component (color channel). */
-
-typedef struct {
-  /* These values are fixed over the whole image. */
-  /* For compression, they must be supplied by parameter setup; */
-  /* for decompression, they are read from the SOF marker. */
-  int component_id;		/* identifier for this component (0..255) */
-  int component_index;		/* its index in SOF or cinfo->comp_info[] */
-  int h_samp_factor;		/* horizontal sampling factor (1..4) */
-  int v_samp_factor;		/* vertical sampling factor (1..4) */
-  int quant_tbl_no;		/* quantization table selector (0..3) */
-  /* These values may vary between scans. */
-  /* For compression, they must be supplied by parameter setup; */
-  /* for decompression, they are read from the SOS marker. */
-  /* The decompressor output side may not use these variables. */
-  int dc_tbl_no;		/* DC entropy table selector (0..3) */
-  int ac_tbl_no;		/* AC entropy table selector (0..3) */
-  
-  /* Remaining fields should be treated as private by applications. */
-  
-  /* These values are computed during compression or decompression startup: */
-  /* Component's size in DCT blocks.
-   * Any dummy blocks added to complete an MCU are not counted; therefore
-   * these values do not depend on whether a scan is interleaved or not.
-   */
-  JDIMENSION width_in_blocks;
-  JDIMENSION height_in_blocks;
-  /* Size of a DCT block in samples,
-   * reflecting any scaling we choose to apply during the DCT step.
-   * Values from 1 to 16 are supported.
-   * Note that different components may receive different DCT scalings.
-   */
-  int DCT_h_scaled_size;
-  int DCT_v_scaled_size;
-  /* The downsampled dimensions are the component's actual, unpadded number
-   * of samples at the main buffer (preprocessing/compression interface);
-   * DCT scaling is included, so
-   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_h_scaled_size/DCTSIZE)
-   * and similarly for height.
-   */
-  JDIMENSION downsampled_width;	 /* actual width in samples */
-  JDIMENSION downsampled_height; /* actual height in samples */
-  /* This flag is used only for decompression.  In cases where some of the
-   * components will be ignored (eg grayscale output from YCbCr image),
-   * we can skip most computations for the unused components.
-   */
-  boolean component_needed;	/* do we need the value of this component? */
-
-  /* These values are computed before starting a scan of the component. */
-  /* The decompressor output side may not use these variables. */
-  int MCU_width;		/* number of blocks per MCU, horizontally */
-  int MCU_height;		/* number of blocks per MCU, vertically */
-  int MCU_blocks;		/* MCU_width * MCU_height */
-  int MCU_sample_width;	/* MCU width in samples: MCU_width * DCT_h_scaled_size */
-  int last_col_width;		/* # of non-dummy blocks across in last MCU */
-  int last_row_height;		/* # of non-dummy blocks down in last MCU */
-
-  /* Saved quantization table for component; NULL if none yet saved.
-   * See jdinput.c comments about the need for this information.
-   * This field is currently used only for decompression.
-   */
-  JQUANT_TBL * quant_table;
-
-  /* Private per-component storage for DCT or IDCT subsystem. */
-  void * dct_table;
-} jpeg_component_info;
-
-
-/* The script for encoding a multiple-scan file is an array of these: */
-
-typedef struct {
-  int comps_in_scan;		/* number of components encoded in this scan */
-  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
-  int Ss, Se;			/* progressive JPEG spectral selection parms */
-  int Ah, Al;			/* progressive JPEG successive approx. parms */
-} jpeg_scan_info;
-
-/* The decompressor can save APPn and COM markers in a list of these: */
-
-typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
-
-struct jpeg_marker_struct {
-  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
-  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
-  unsigned int original_length;	/* # bytes of data in the file */
-  unsigned int data_length;	/* # bytes of data saved at data[] */
-  JOCTET FAR * data;		/* the data contained in the marker */
-  /* the marker length word is not counted in data_length or original_length */
-};
-
-/* Known color spaces. */
-
-typedef enum {
-	JCS_UNKNOWN,		/* error/unspecified */
-	JCS_GRAYSCALE,		/* monochrome */
-	JCS_RGB,		/* red/green/blue */
-	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
-	JCS_CMYK,		/* C/M/Y/K */
-	JCS_YCCK		/* Y/Cb/Cr/K */
-} J_COLOR_SPACE;
-
-/* DCT/IDCT algorithm options. */
-
-typedef enum {
-	JDCT_ISLOW,		/* slow but accurate integer algorithm */
-	JDCT_IFAST,		/* faster, less accurate integer method */
-	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
-} J_DCT_METHOD;
-
-#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
-#define JDCT_DEFAULT  JDCT_ISLOW
-#endif
-#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
-#define JDCT_FASTEST  JDCT_IFAST
-#endif
-
-/* Dithering options for decompression. */
-
-typedef enum {
-	JDITHER_NONE,		/* no dithering */
-	JDITHER_ORDERED,	/* simple ordered dither */
-	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
-} J_DITHER_MODE;
-
-
-/* Common fields between JPEG compression and decompression master structs. */
-
-#define jpeg_common_fields \
-  struct jpeg_error_mgr * err;	/* Error handler module */\
-  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
-  struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
-  void * client_data;		/* Available for use by application */\
-  boolean is_decompressor;	/* So common code can tell which is which */\
-  int global_state		/* For checking call sequence validity */
-
-/* Routines that are to be used by both halves of the library are declared
- * to receive a pointer to this structure.  There are no actual instances of
- * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
- */
-struct jpeg_common_struct {
-  jpeg_common_fields;		/* Fields common to both master struct types */
-  /* Additional fields follow in an actual jpeg_compress_struct or
-   * jpeg_decompress_struct.  All three structs must agree on these
-   * initial fields!  (This would be a lot cleaner in C++.)
-   */
-};
-
-typedef struct jpeg_common_struct * j_common_ptr;
-typedef struct jpeg_compress_struct * j_compress_ptr;
-typedef struct jpeg_decompress_struct * j_decompress_ptr;
-
-
-/* Master record for a compression instance */
-
-struct jpeg_compress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
-
-  /* Destination for compressed data */
-  struct jpeg_destination_mgr * dest;
-
-  /* Description of source image --- these fields must be filled in by
-   * outer application before starting compression.  in_color_space must
-   * be correct before you can even call jpeg_set_defaults().
-   */
-
-  JDIMENSION image_width;	/* input image width */
-  JDIMENSION image_height;	/* input image height */
-  int input_components;		/* # of color components in input image */
-  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
-
-  double input_gamma;		/* image gamma of input image */
-
-  /* Compression parameters --- these fields must be set before calling
-   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
-   * initialize everything to reasonable defaults, then changing anything
-   * the application specifically wants to change.  That way you won't get
-   * burnt when new parameters are added.  Also note that there are several
-   * helper routines to simplify changing parameters.
-   */
-
-  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
-
-  JDIMENSION jpeg_width;	/* scaled JPEG image width */
-  JDIMENSION jpeg_height;	/* scaled JPEG image height */
-  /* Dimensions of actual JPEG image that will be written to file,
-   * derived from input dimensions by scaling factors above.
-   * These fields are computed by jpeg_start_compress().
-   * You can also use jpeg_calc_jpeg_dimensions() to determine these values
-   * in advance of calling jpeg_start_compress().
-   */
-
-  int data_precision;		/* bits of precision in image data */
-
-  int num_components;		/* # of color components in JPEG image */
-  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
-
-  jpeg_component_info * comp_info;
-  /* comp_info[i] describes component that appears i'th in SOF */
-
-  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
-  int q_scale_factor[NUM_QUANT_TBLS];
-  /* ptrs to coefficient quantization tables, or NULL if not defined,
-   * and corresponding scale factors (percentage, initialized 100).
-   */
-
-  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  /* ptrs to Huffman coding tables, or NULL if not defined */
-
-  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
-  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
-  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
-
-  int num_scans;		/* # of entries in scan_info array */
-  const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
-  /* The default value of scan_info is NULL, which causes a single-scan
-   * sequential JPEG file to be emitted.  To create a multi-scan file,
-   * set num_scans and scan_info to point to an array of scan definitions.
-   */
-
-  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
-  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
-  boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
-  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
-  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
-
-  /* The restart interval can be specified in absolute MCUs by setting
-   * restart_interval, or in MCU rows by setting restart_in_rows
-   * (in which case the correct restart_interval will be figured
-   * for each scan).
-   */
-  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
-  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
-
-  /* Parameters controlling emission of special markers. */
-
-  boolean write_JFIF_header;	/* should a JFIF marker be written? */
-  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
-  UINT8 JFIF_minor_version;
-  /* These three values are not used by the JPEG code, merely copied */
-  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
-  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
-  /* ratio is defined by X_density/Y_density even when density_unit=0. */
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
-  
-  /* State variable: index of next scanline to be written to
-   * jpeg_write_scanlines().  Application may use this to control its
-   * processing loop, e.g., "while (next_scanline < image_height)".
-   */
-
-  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
-
-  /* Remaining fields are known throughout compressor, but generally
-   * should not be touched by a surrounding application.
-   */
-
-  /*
-   * These fields are computed during compression startup
-   */
-  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
-
-  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
-  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
-
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
-  /* The coefficient controller receives data in units of MCU rows as defined
-   * for fully interleaved scans (whether the JPEG file is interleaved or not).
-   * There are v_samp_factor * DCTSIZE sample rows of each component in an
-   * "iMCU" (interleaved MCU) row.
-   */
-  
-  /*
-   * These fields are valid during any one scan.
-   * They describe the components and MCUs actually appearing in the scan.
-   */
-  int comps_in_scan;		/* # of JPEG components in this scan */
-  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
-  /* *cur_comp_info[i] describes component that appears i'th in SOS */
-  
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
-  
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
-  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
-  /* MCU_membership[i] is index in cur_comp_info of component owning */
-  /* i'th block in an MCU */
-
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
-
-  int block_size;		/* the basic DCT block size: 1..16 */
-  const int * natural_order;	/* natural-order position array */
-  int lim_Se;			/* min( Se, DCTSIZE2-1 ) */
-
-  /*
-   * Links to compression subobjects (methods and private variables of modules)
-   */
-  struct jpeg_comp_master * master;
-  struct jpeg_c_main_controller * main;
-  struct jpeg_c_prep_controller * prep;
-  struct jpeg_c_coef_controller * coef;
-  struct jpeg_marker_writer * marker;
-  struct jpeg_color_converter * cconvert;
-  struct jpeg_downsampler * downsample;
-  struct jpeg_forward_dct * fdct;
-  struct jpeg_entropy_encoder * entropy;
-  jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */
-  int script_space_size;
-};
-
-
-/* Master record for a decompression instance */
-
-struct jpeg_decompress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
-
-  /* Source of compressed data */
-  struct jpeg_source_mgr * src;
-
-  /* Basic description of image --- filled in by jpeg_read_header(). */
-  /* Application may inspect these values to decide how to process image. */
-
-  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
-  JDIMENSION image_height;	/* nominal image height */
-  int num_components;		/* # of color components in JPEG image */
-  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
-
-  /* Decompression processing parameters --- these fields must be set before
-   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
-   * them to default values.
-   */
-
-  J_COLOR_SPACE out_color_space; /* colorspace for output */
-
-  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
-
-  double output_gamma;		/* image gamma wanted in output */
-
-  boolean buffered_image;	/* TRUE=multiple output passes */
-  boolean raw_data_out;		/* TRUE=downsampled data wanted */
-
-  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
-  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
-  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
-
-  boolean quantize_colors;	/* TRUE=colormapped output wanted */
-  /* the following are ignored if not quantize_colors: */
-  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
-  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
-  int desired_number_of_colors;	/* max # colors to use in created colormap */
-  /* these are significant only in buffered-image mode: */
-  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
-  boolean enable_external_quant;/* enable future use of external colormap */
-  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
-
-  /* Description of actual output image that will be returned to application.
-   * These fields are computed by jpeg_start_decompress().
-   * You can also use jpeg_calc_output_dimensions() to determine these values
-   * in advance of calling jpeg_start_decompress().
-   */
-
-  JDIMENSION output_width;	/* scaled image width */
-  JDIMENSION output_height;	/* scaled image height */
-  int out_color_components;	/* # of color components in out_color_space */
-  int output_components;	/* # of color components returned */
-  /* output_components is 1 (a colormap index) when quantizing colors;
-   * otherwise it equals out_color_components.
-   */
-  int rec_outbuf_height;	/* min recommended height of scanline buffer */
-  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
-   * high, space and time will be wasted due to unnecessary data copying.
-   * Usually rec_outbuf_height will be 1 or 2, at most 4.
-   */
-
-  /* When quantizing colors, the output colormap is described by these fields.
-   * The application can supply a colormap by setting colormap non-NULL before
-   * calling jpeg_start_decompress; otherwise a colormap is created during
-   * jpeg_start_decompress or jpeg_start_output.
-   * The map has out_color_components rows and actual_number_of_colors columns.
-   */
-  int actual_number_of_colors;	/* number of entries in use */
-  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
-
-  /* State variables: these variables indicate the progress of decompression.
-   * The application may examine these but must not modify them.
-   */
-
-  /* Row index of next scanline to be read from jpeg_read_scanlines().
-   * Application may use this to control its processing loop, e.g.,
-   * "while (output_scanline < output_height)".
-   */
-  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
-
-  /* Current input scan number and number of iMCU rows completed in scan.
-   * These indicate the progress of the decompressor input side.
-   */
-  int input_scan_number;	/* Number of SOS markers seen so far */
-  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
-
-  /* The "output scan number" is the notional scan being displayed by the
-   * output side.  The decompressor will not allow output scan/row number
-   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
-   */
-  int output_scan_number;	/* Nominal scan number being displayed */
-  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
-
-  /* Current progression status.  coef_bits[c][i] indicates the precision
-   * with which component c's DCT coefficient i (in zigzag order) is known.
-   * It is -1 when no data has yet been received, otherwise it is the point
-   * transform (shift) value for the most recent scan of the coefficient
-   * (thus, 0 at completion of the progression).
-   * This pointer is NULL when reading a non-progressive file.
-   */
-  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
-
-  /* Internal JPEG parameters --- the application usually need not look at
-   * these fields.  Note that the decompressor output side may not use
-   * any parameters that can change between scans.
-   */
-
-  /* Quantization and Huffman tables are carried forward across input
-   * datastreams when processing abbreviated JPEG datastreams.
-   */
-
-  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
-  /* ptrs to coefficient quantization tables, or NULL if not defined */
-
-  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  /* ptrs to Huffman coding tables, or NULL if not defined */
-
-  /* These parameters are never carried across datastreams, since they
-   * are given in SOF/SOS markers or defined to be reset by SOI.
-   */
-
-  int data_precision;		/* bits of precision in image data */
-
-  jpeg_component_info * comp_info;
-  /* comp_info[i] describes component that appears i'th in SOF */
-
-  boolean is_baseline;		/* TRUE if Baseline SOF0 encountered */
-  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
-
-  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
-  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
-  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
-
-  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
-
-  /* These fields record data obtained from optional markers recognized by
-   * the JPEG library.
-   */
-  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
-  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
-  UINT8 JFIF_major_version;	/* JFIF version number */
-  UINT8 JFIF_minor_version;
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
-  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
-
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
-
-  /* Aside from the specific data retained from APPn markers known to the
-   * library, the uninterpreted contents of any or all APPn and COM markers
-   * can be saved in a list for examination by the application.
-   */
-  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
-
-  /* Remaining fields are known throughout decompressor, but generally
-   * should not be touched by a surrounding application.
-   */
-
-  /*
-   * These fields are computed during decompression startup
-   */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
-
-  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
-  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
-
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
-  /* The coefficient controller's input and output progress is measured in
-   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
-   * in fully interleaved JPEG scans, but are used whether the scan is
-   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
-   * rows of each component.  Therefore, the IDCT output contains
-   * v_samp_factor*DCT_v_scaled_size sample rows of a component per iMCU row.
-   */
-
-  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
-
-  /*
-   * These fields are valid during any one scan.
-   * They describe the components and MCUs actually appearing in the scan.
-   * Note that the decompressor output side must not use these fields.
-   */
-  int comps_in_scan;		/* # of JPEG components in this scan */
-  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
-  /* *cur_comp_info[i] describes component that appears i'th in SOS */
-
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
-
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
-  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
-  /* MCU_membership[i] is index in cur_comp_info of component owning */
-  /* i'th block in an MCU */
-
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
-
-  /* These fields are derived from Se of first SOS marker.
-   */
-  int block_size;		/* the basic DCT block size: 1..16 */
-  const int * natural_order; /* natural-order position array for entropy decode */
-  int lim_Se;			/* min( Se, DCTSIZE2-1 ) for entropy decode */
-
-  /* This field is shared between entropy decoder and marker parser.
-   * It is either zero or the code of a JPEG marker that has been
-   * read from the data source, but has not yet been processed.
-   */
-  int unread_marker;
-
-  /*
-   * Links to decompression subobjects (methods, private variables of modules)
-   */
-  struct jpeg_decomp_master * master;
-  struct jpeg_d_main_controller * main;
-  struct jpeg_d_coef_controller * coef;
-  struct jpeg_d_post_controller * post;
-  struct jpeg_input_controller * inputctl;
-  struct jpeg_marker_reader * marker;
-  struct jpeg_entropy_decoder * entropy;
-  struct jpeg_inverse_dct * idct;
-  struct jpeg_upsampler * upsample;
-  struct jpeg_color_deconverter * cconvert;
-  struct jpeg_color_quantizer * cquantize;
-};
-
-
-/* "Object" declarations for JPEG modules that may be supplied or called
- * directly by the surrounding application.
- * As with all objects in the JPEG library, these structs only define the
- * publicly visible methods and state variables of a module.  Additional
- * private fields may exist after the public ones.
- */
-
-
-/* Error handler object */
-
-struct jpeg_error_mgr {
-  /* Error exit handler: does not return to caller */
-  JMETHOD(void, error_exit, (j_common_ptr cinfo));
-  /* Conditionally emit a trace or warning message */
-  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
-  /* Routine that actually outputs a trace or error message */
-  JMETHOD(void, output_message, (j_common_ptr cinfo));
-  /* Format a message string for the most recent JPEG error or message */
-  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
-#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
-  /* Reset error state variables at start of a new image */
-  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
-  
-  /* The message ID code and any parameters are saved here.
-   * A message can have one string parameter or up to 8 int parameters.
-   */
-  int msg_code;
-#define JMSG_STR_PARM_MAX  80
-  union {
-    int i[8];
-    char s[JMSG_STR_PARM_MAX];
-  } msg_parm;
-  
-  /* Standard state variables for error facility */
-  
-  int trace_level;		/* max msg_level that will be displayed */
-  
-  /* For recoverable corrupt-data errors, we emit a warning message,
-   * but keep going unless emit_message chooses to abort.  emit_message
-   * should count warnings in num_warnings.  The surrounding application
-   * can check for bad data by seeing if num_warnings is nonzero at the
-   * end of processing.
-   */
-  long num_warnings;		/* number of corrupt-data warnings */
-
-  /* These fields point to the table(s) of error message strings.
-   * An application can change the table pointer to switch to a different
-   * message list (typically, to change the language in which errors are
-   * reported).  Some applications may wish to add additional error codes
-   * that will be handled by the JPEG library error mechanism; the second
-   * table pointer is used for this purpose.
-   *
-   * First table includes all errors generated by JPEG library itself.
-   * Error code 0 is reserved for a "no such error string" message.
-   */
-  const char * const * jpeg_message_table; /* Library errors */
-  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
-  /* Second table can be added by application (see cjpeg/djpeg for example).
-   * It contains strings numbered first_addon_message..last_addon_message.
-   */
-  const char * const * addon_message_table; /* Non-library errors */
-  int first_addon_message;	/* code for first string in addon table */
-  int last_addon_message;	/* code for last string in addon table */
-};
-
-
-/* Progress monitor object */
-
-struct jpeg_progress_mgr {
-  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
-
-  long pass_counter;		/* work units completed in this pass */
-  long pass_limit;		/* total number of work units in this pass */
-  int completed_passes;		/* passes completed so far */
-  int total_passes;		/* total number of passes expected */
-};
-
-
-/* Data destination object for compression */
-
-struct jpeg_destination_mgr {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-
-  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
-  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
-  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
-};
-
-
-/* Data source object for decompression */
-
-struct jpeg_source_mgr {
-  const JOCTET * next_input_byte; /* => next byte to read from buffer */
-  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
-
-  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
-  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
-  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
-  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
-};
-
-
-/* Memory manager object.
- * Allocates "small" objects (a few K total), "large" objects (tens of K),
- * and "really big" objects (virtual arrays with backing store if needed).
- * The memory manager does not allow individual objects to be freed; rather,
- * each created object is assigned to a pool, and whole pools can be freed
- * at once.  This is faster and more convenient than remembering exactly what
- * to free, especially where malloc()/free() are not too speedy.
- * NB: alloc routines never return NULL.  They exit to error_exit if not
- * successful.
- */
-
-#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
-#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
-#define JPOOL_NUMPOOLS	2
-
-typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
-typedef struct jvirt_barray_control * jvirt_barray_ptr;
-
-
-struct jpeg_memory_mgr {
-  /* Method pointers */
-  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
-				size_t sizeofobject));
-  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
-				     size_t sizeofobject));
-  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
-				     JDIMENSION samplesperrow,
-				     JDIMENSION numrows));
-  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
-				      JDIMENSION blocksperrow,
-				      JDIMENSION numrows));
-  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION samplesperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
-  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION blocksperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
-  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
-  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
-					   jvirt_sarray_ptr ptr,
-					   JDIMENSION start_row,
-					   JDIMENSION num_rows,
-					   boolean writable));
-  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
-					    jvirt_barray_ptr ptr,
-					    JDIMENSION start_row,
-					    JDIMENSION num_rows,
-					    boolean writable));
-  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
-  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
-
-  /* Limit on memory allocation for this JPEG object.  (Note that this is
-   * merely advisory, not a guaranteed maximum; it only affects the space
-   * used for virtual-array buffers.)  May be changed by outer application
-   * after creating the JPEG object.
-   */
-  long max_memory_to_use;
-
-  /* Maximum allocation request accepted by alloc_large. */
-  long max_alloc_chunk;
-};
-
-
-/* Routine signature for application-supplied marker processing methods.
- * Need not pass marker code since it is stored in cinfo->unread_marker.
- */
-typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
-
-
-/* Declarations for routines called by application.
- * The JPP macro hides prototype parameters from compilers that can't cope.
- * Note JPP requires double parentheses.
- */
-
-#ifdef HAVE_PROTOTYPES
-#define JPP(arglist)	arglist
-#else
-#define JPP(arglist)	()
-#endif
-
-
-/* Short forms of external names for systems with brain-damaged linkers.
- * We shorten external names to be unique in the first six letters, which
- * is good enough for all known systems.
- * (If your compiler itself needs names to be unique in less than 15 
- * characters, you are out of luck.  Get a better compiler.)
- */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_error		jStdError
-#define jpeg_CreateCompress	jCreaCompress
-#define jpeg_CreateDecompress	jCreaDecompress
-#define jpeg_destroy_compress	jDestCompress
-#define jpeg_destroy_decompress	jDestDecompress
-#define jpeg_stdio_dest		jStdDest
-#define jpeg_stdio_src		jStdSrc
-#define jpeg_mem_dest		jMemDest
-#define jpeg_mem_src		jMemSrc
-#define jpeg_set_defaults	jSetDefaults
-#define jpeg_set_colorspace	jSetColorspace
-#define jpeg_default_colorspace	jDefColorspace
-#define jpeg_set_quality	jSetQuality
-#define jpeg_set_linear_quality	jSetLQuality
-#define jpeg_default_qtables	jDefQTables
-#define jpeg_add_quant_table	jAddQuantTable
-#define jpeg_quality_scaling	jQualityScaling
-#define jpeg_simple_progression	jSimProgress
-#define jpeg_suppress_tables	jSuppressTables
-#define jpeg_alloc_quant_table	jAlcQTable
-#define jpeg_alloc_huff_table	jAlcHTable
-#define jpeg_start_compress	jStrtCompress
-#define jpeg_write_scanlines	jWrtScanlines
-#define jpeg_finish_compress	jFinCompress
-#define jpeg_calc_jpeg_dimensions	jCjpegDimensions
-#define jpeg_write_raw_data	jWrtRawData
-#define jpeg_write_marker	jWrtMarker
-#define jpeg_write_m_header	jWrtMHeader
-#define jpeg_write_m_byte	jWrtMByte
-#define jpeg_write_tables	jWrtTables
-#define jpeg_read_header	jReadHeader
-#define jpeg_start_decompress	jStrtDecompress
-#define jpeg_read_scanlines	jReadScanlines
-#define jpeg_finish_decompress	jFinDecompress
-#define jpeg_read_raw_data	jReadRawData
-#define jpeg_has_multiple_scans	jHasMultScn
-#define jpeg_start_output	jStrtOutput
-#define jpeg_finish_output	jFinOutput
-#define jpeg_input_complete	jInComplete
-#define jpeg_new_colormap	jNewCMap
-#define jpeg_consume_input	jConsumeInput
-#define jpeg_core_output_dimensions	jCoreDimensions
-#define jpeg_calc_output_dimensions	jCalcDimensions
-#define jpeg_save_markers	jSaveMarkers
-#define jpeg_set_marker_processor	jSetMarker
-#define jpeg_read_coefficients	jReadCoefs
-#define jpeg_write_coefficients	jWrtCoefs
-#define jpeg_copy_critical_parameters	jCopyCrit
-#define jpeg_abort_compress	jAbrtCompress
-#define jpeg_abort_decompress	jAbrtDecompress
-#define jpeg_abort		jAbort
-#define jpeg_destroy		jDestroy
-#define jpeg_resync_to_restart	jResyncRestart
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/* Default error-management setup */
-EXTERN(struct jpeg_error_mgr *) jpeg_std_error
-	JPP((struct jpeg_error_mgr * err));
-
-/* Initialization of JPEG compression objects.
- * jpeg_create_compress() and jpeg_create_decompress() are the exported
- * names that applications should call.  These expand to calls on
- * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
- * passed for version mismatch checking.
- * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
- */
-#define jpeg_create_compress(cinfo) \
-    jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
-			(size_t) sizeof(struct jpeg_compress_struct))
-#define jpeg_create_decompress(cinfo) \
-    jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
-			  (size_t) sizeof(struct jpeg_decompress_struct))
-EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
-				      int version, size_t structsize));
-EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
-					int version, size_t structsize));
-/* Destruction of JPEG compression objects */
-EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
-
-/* Standard data source and destination managers: stdio streams. */
-/* Caller is responsible for opening the file before and closing after. */
-EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
-EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
-
-/* Data source and destination managers: memory buffers. */
-EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo,
-			       unsigned char ** outbuffer,
-			       unsigned long * outsize));
-EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo,
-			      unsigned char * inbuffer,
-			      unsigned long insize));
-
-/* Default parameter setup for compression */
-EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
-/* Compression parameter setup aids */
-EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
-				      J_COLOR_SPACE colorspace));
-EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
-				   boolean force_baseline));
-EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
-					  int scale_factor,
-					  boolean force_baseline));
-EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
-				       boolean force_baseline));
-EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
-				       const unsigned int *basic_table,
-				       int scale_factor,
-				       boolean force_baseline));
-EXTERN(int) jpeg_quality_scaling JPP((int quality));
-EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
-				       boolean suppress));
-EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
-EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
-
-/* Main entry points for compression */
-EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
-				      boolean write_all_tables));
-EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
-					     JSAMPARRAY scanlines,
-					     JDIMENSION num_lines));
-EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
-
-/* Precalculate JPEG dimensions for current compression parameters. */
-EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo));
-
-/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
-EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
-					    JSAMPIMAGE data,
-					    JDIMENSION num_lines));
-
-/* Write a special marker.  See libjpeg.txt concerning safe usage. */
-EXTERN(void) jpeg_write_marker
-	JPP((j_compress_ptr cinfo, int marker,
-	     const JOCTET * dataptr, unsigned int datalen));
-/* Same, but piecemeal. */
-EXTERN(void) jpeg_write_m_header
-	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
-EXTERN(void) jpeg_write_m_byte
-	JPP((j_compress_ptr cinfo, int val));
-
-/* Alternate compression function: just write an abbreviated table file */
-EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
-
-/* Decompression startup: read start of JPEG datastream to see what's there */
-EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
-				  boolean require_image));
-/* Return value is one of: */
-#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
-#define JPEG_HEADER_OK		1 /* Found valid image datastream */
-#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
-/* If you pass require_image = TRUE (normal case), you need not check for
- * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
- * JPEG_SUSPENDED is only possible if you use a data source module that can
- * give a suspension return (the stdio source module doesn't).
- */
-
-/* Main entry points for decompression */
-EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
-EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
-					    JSAMPARRAY scanlines,
-					    JDIMENSION max_lines));
-EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
-
-/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
-EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
-					   JSAMPIMAGE data,
-					   JDIMENSION max_lines));
-
-/* Additional entry points for buffered-image mode. */
-EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
-EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
-				       int scan_number));
-EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
-EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
-EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
-/* Return value is one of: */
-/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
-#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
-#define JPEG_REACHED_EOI	2 /* Reached end of image */
-#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
-#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
-
-/* Precalculate output dimensions for current decompression parameters. */
-EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
-
-/* Control saving of COM and APPn markers into marker_list. */
-EXTERN(void) jpeg_save_markers
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     unsigned int length_limit));
-
-/* Install a special processing method for COM or APPn markers. */
-EXTERN(void) jpeg_set_marker_processor
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     jpeg_marker_parser_method routine));
-
-/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
-EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
-					  jvirt_barray_ptr * coef_arrays));
-EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
-						j_compress_ptr dstinfo));
-
-/* If you choose to abort compression or decompression before completing
- * jpeg_finish_(de)compress, then you need to clean up to release memory,
- * temporary files, etc.  You can just call jpeg_destroy_(de)compress
- * if you're done with the JPEG object, but if you want to clean it up and
- * reuse it, call this:
- */
-EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
-
-/* Generic versions of jpeg_abort and jpeg_destroy that work on either
- * flavor of JPEG object.  These may be more convenient in some places.
- */
-EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
-EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
-
-/* Default restart-marker-resync procedure for use by data source modules */
-EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
-					    int desired));
-
-
-/* These marker codes are exported since applications and data source modules
- * are likely to want to use them.
- */
-
-#define JPEG_RST0	0xD0	/* RST0 marker code */
-#define JPEG_EOI	0xD9	/* EOI marker code */
-#define JPEG_APP0	0xE0	/* APP0 marker code */
-#define JPEG_COM	0xFE	/* COM marker code */
-
-
-/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
- * for structure definitions that are never filled in, keep it quiet by
- * supplying dummy definitions for the various substructures.
- */
-
-#ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
-struct jvirt_sarray_control { long dummy; };
-struct jvirt_barray_control { long dummy; };
-struct jpeg_comp_master { long dummy; };
-struct jpeg_c_main_controller { long dummy; };
-struct jpeg_c_prep_controller { long dummy; };
-struct jpeg_c_coef_controller { long dummy; };
-struct jpeg_marker_writer { long dummy; };
-struct jpeg_color_converter { long dummy; };
-struct jpeg_downsampler { long dummy; };
-struct jpeg_forward_dct { long dummy; };
-struct jpeg_entropy_encoder { long dummy; };
-struct jpeg_decomp_master { long dummy; };
-struct jpeg_d_main_controller { long dummy; };
-struct jpeg_d_coef_controller { long dummy; };
-struct jpeg_d_post_controller { long dummy; };
-struct jpeg_input_controller { long dummy; };
-struct jpeg_marker_reader { long dummy; };
-struct jpeg_entropy_decoder { long dummy; };
-struct jpeg_inverse_dct { long dummy; };
-struct jpeg_upsampler { long dummy; };
-struct jpeg_color_deconverter { long dummy; };
-struct jpeg_color_quantizer { long dummy; };
-#endif /* JPEG_INTERNALS */
-#endif /* INCOMPLETE_TYPES_BROKEN */
-
-
-/*
- * The JPEG library modules define JPEG_INTERNALS before including this file.
- * The internal structure declarations are read only when that is true.
- * Applications using the library should not include jpegint.h, but may wish
- * to include jerror.h.
- */
-
-#ifdef JPEG_INTERNALS
-#include "jpegint.h"		/* fetch private declarations */
-#include "jerror.h"		/* fetch error codes too */
-#endif
-
-#ifdef __cplusplus
-#ifndef DONT_USE_EXTERN_C
-}
-#endif
-#endif
-
-#endif /* JPEGLIB_H */
+/*
+ * jpeglib.h
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2002-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the application interface for the JPEG library.
+ * Most applications using the library need only include this file,
+ * and perhaps jerror.h if they want to know the exact error codes.
+ */
+
+#ifndef JPEGLIB_H
+#define JPEGLIB_H
+
+/*
+ * First we include the configuration files that record how this
+ * installation of the JPEG library is set up.  jconfig.h can be
+ * generated automatically for many systems.  jmorecfg.h contains
+ * manual configuration options that most people need not worry about.
+ */
+
+#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
+#include "jconfig.h"		/* widely used configuration options */
+#endif
+#include "jmorecfg.h"		/* seldom changed options */
+
+
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+extern "C" {
+#endif
+#endif
+
+/* Version IDs for the JPEG library.
+ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 90".
+ */
+
+#define JPEG_LIB_VERSION        90	/* Compatibility version 9.0 */
+#define JPEG_LIB_VERSION_MAJOR  9
+#define JPEG_LIB_VERSION_MINOR  0
+
+
+/* Various constants determining the sizes of things.
+ * All of these are specified by the JPEG standard, so don't change them
+ * if you want to be compatible.
+ */
+
+#define DCTSIZE		    8	/* The basic DCT block is 8x8 coefficients */
+#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
+#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
+#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
+#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
+#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
+#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
+/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
+ * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
+ * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
+ * to handle it.  We even let you do this from the jconfig.h file.  However,
+ * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
+ * sometimes emits noncompliant files doesn't mean you should too.
+ */
+#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
+#ifndef D_MAX_BLOCKS_IN_MCU
+#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
+#endif
+
+
+/* Data structures for images (arrays of samples and of DCT coefficients).
+ * On 80x86 machines, the image arrays are too big for near pointers,
+ * but the pointer arrays can fit in near memory.
+ */
+
+typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
+typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
+typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
+
+typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
+typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
+typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
+typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
+
+typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
+
+
+/* Types for JPEG compression parameters and working tables. */
+
+
+/* DCT coefficient quantization tables. */
+
+typedef struct {
+  /* This array gives the coefficient quantizers in natural array order
+   * (not the zigzag order in which they are stored in a JPEG DQT marker).
+   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
+   */
+  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JQUANT_TBL;
+
+
+/* Huffman coding tables. */
+
+typedef struct {
+  /* These two fields directly represent the contents of a JPEG DHT marker */
+  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
+				/* length k bits; bits[0] is unused */
+  UINT8 huffval[256];		/* The symbols, in order of incr code length */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JHUFF_TBL;
+
+
+/* Basic info about one component (color channel). */
+
+typedef struct {
+  /* These values are fixed over the whole image. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOF marker. */
+  int component_id;		/* identifier for this component (0..255) */
+  int component_index;		/* its index in SOF or cinfo->comp_info[] */
+  int h_samp_factor;		/* horizontal sampling factor (1..4) */
+  int v_samp_factor;		/* vertical sampling factor (1..4) */
+  int quant_tbl_no;		/* quantization table selector (0..3) */
+  /* These values may vary between scans. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOS marker. */
+  /* The decompressor output side may not use these variables. */
+  int dc_tbl_no;		/* DC entropy table selector (0..3) */
+  int ac_tbl_no;		/* AC entropy table selector (0..3) */
+  
+  /* Remaining fields should be treated as private by applications. */
+  
+  /* These values are computed during compression or decompression startup: */
+  /* Component's size in DCT blocks.
+   * Any dummy blocks added to complete an MCU are not counted; therefore
+   * these values do not depend on whether a scan is interleaved or not.
+   */
+  JDIMENSION width_in_blocks;
+  JDIMENSION height_in_blocks;
+  /* Size of a DCT block in samples,
+   * reflecting any scaling we choose to apply during the DCT step.
+   * Values from 1 to 16 are supported.
+   * Note that different components may receive different DCT scalings.
+   */
+  int DCT_h_scaled_size;
+  int DCT_v_scaled_size;
+  /* The downsampled dimensions are the component's actual, unpadded number
+   * of samples at the main buffer (preprocessing/compression interface);
+   * DCT scaling is included, so
+   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_h_scaled_size/DCTSIZE)
+   * and similarly for height.
+   */
+  JDIMENSION downsampled_width;	 /* actual width in samples */
+  JDIMENSION downsampled_height; /* actual height in samples */
+  /* This flag is used only for decompression.  In cases where some of the
+   * components will be ignored (eg grayscale output from YCbCr image),
+   * we can skip most computations for the unused components.
+   */
+  boolean component_needed;	/* do we need the value of this component? */
+
+  /* These values are computed before starting a scan of the component. */
+  /* The decompressor output side may not use these variables. */
+  int MCU_width;		/* number of blocks per MCU, horizontally */
+  int MCU_height;		/* number of blocks per MCU, vertically */
+  int MCU_blocks;		/* MCU_width * MCU_height */
+  int MCU_sample_width;	/* MCU width in samples: MCU_width * DCT_h_scaled_size */
+  int last_col_width;		/* # of non-dummy blocks across in last MCU */
+  int last_row_height;		/* # of non-dummy blocks down in last MCU */
+
+  /* Saved quantization table for component; NULL if none yet saved.
+   * See jdinput.c comments about the need for this information.
+   * This field is currently used only for decompression.
+   */
+  JQUANT_TBL * quant_table;
+
+  /* Private per-component storage for DCT or IDCT subsystem. */
+  void * dct_table;
+} jpeg_component_info;
+
+
+/* The script for encoding a multiple-scan file is an array of these: */
+
+typedef struct {
+  int comps_in_scan;		/* number of components encoded in this scan */
+  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
+  int Ss, Se;			/* progressive JPEG spectral selection parms */
+  int Ah, Al;			/* progressive JPEG successive approx. parms */
+} jpeg_scan_info;
+
+/* The decompressor can save APPn and COM markers in a list of these: */
+
+typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
+
+struct jpeg_marker_struct {
+  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
+  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
+  unsigned int original_length;	/* # bytes of data in the file */
+  unsigned int data_length;	/* # bytes of data saved at data[] */
+  JOCTET FAR * data;		/* the data contained in the marker */
+  /* the marker length word is not counted in data_length or original_length */
+};
+
+/* Known color spaces. */
+
+typedef enum {
+	JCS_UNKNOWN,		/* error/unspecified */
+	JCS_GRAYSCALE,		/* monochrome */
+	JCS_RGB,		/* red/green/blue */
+	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
+	JCS_CMYK,		/* C/M/Y/K */
+	JCS_YCCK		/* Y/Cb/Cr/K */
+} J_COLOR_SPACE;
+
+/* Supported color transforms. */
+
+typedef enum {
+	JCT_NONE           = 0,
+	JCT_SUBTRACT_GREEN = 1
+} J_COLOR_TRANSFORM;
+
+/* DCT/IDCT algorithm options. */
+
+typedef enum {
+	JDCT_ISLOW,		/* slow but accurate integer algorithm */
+	JDCT_IFAST,		/* faster, less accurate integer method */
+	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
+} J_DCT_METHOD;
+
+#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
+#define JDCT_DEFAULT  JDCT_ISLOW
+#endif
+#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
+#define JDCT_FASTEST  JDCT_IFAST
+#endif
+
+/* Dithering options for decompression. */
+
+typedef enum {
+	JDITHER_NONE,		/* no dithering */
+	JDITHER_ORDERED,	/* simple ordered dither */
+	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
+} J_DITHER_MODE;
+
+
+/* Common fields between JPEG compression and decompression master structs. */
+
+#define jpeg_common_fields \
+  struct jpeg_error_mgr * err;	/* Error handler module */\
+  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
+  struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
+  void * client_data;		/* Available for use by application */\
+  boolean is_decompressor;	/* So common code can tell which is which */\
+  int global_state		/* For checking call sequence validity */
+
+/* Routines that are to be used by both halves of the library are declared
+ * to receive a pointer to this structure.  There are no actual instances of
+ * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
+ */
+struct jpeg_common_struct {
+  jpeg_common_fields;		/* Fields common to both master struct types */
+  /* Additional fields follow in an actual jpeg_compress_struct or
+   * jpeg_decompress_struct.  All three structs must agree on these
+   * initial fields!  (This would be a lot cleaner in C++.)
+   */
+};
+
+typedef struct jpeg_common_struct * j_common_ptr;
+typedef struct jpeg_compress_struct * j_compress_ptr;
+typedef struct jpeg_decompress_struct * j_decompress_ptr;
+
+
+/* Master record for a compression instance */
+
+struct jpeg_compress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
+
+  /* Destination for compressed data */
+  struct jpeg_destination_mgr * dest;
+
+  /* Description of source image --- these fields must be filled in by
+   * outer application before starting compression.  in_color_space must
+   * be correct before you can even call jpeg_set_defaults().
+   */
+
+  JDIMENSION image_width;	/* input image width */
+  JDIMENSION image_height;	/* input image height */
+  int input_components;		/* # of color components in input image */
+  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
+
+  double input_gamma;		/* image gamma of input image */
+
+  /* Compression parameters --- these fields must be set before calling
+   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
+   * initialize everything to reasonable defaults, then changing anything
+   * the application specifically wants to change.  That way you won't get
+   * burnt when new parameters are added.  Also note that there are several
+   * helper routines to simplify changing parameters.
+   */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  JDIMENSION jpeg_width;	/* scaled JPEG image width */
+  JDIMENSION jpeg_height;	/* scaled JPEG image height */
+  /* Dimensions of actual JPEG image that will be written to file,
+   * derived from input dimensions by scaling factors above.
+   * These fields are computed by jpeg_start_compress().
+   * You can also use jpeg_calc_jpeg_dimensions() to determine these values
+   * in advance of calling jpeg_start_compress().
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  int q_scale_factor[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined,
+   * and corresponding scale factors (percentage, initialized 100).
+   */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  int num_scans;		/* # of entries in scan_info array */
+  const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
+  /* The default value of scan_info is NULL, which causes a single-scan
+   * sequential JPEG file to be emitted.  To create a multi-scan file,
+   * set num_scans and scan_info to point to an array of scan definitions.
+   */
+
+  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+  boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
+  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
+  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
+
+  /* The restart interval can be specified in absolute MCUs by setting
+   * restart_interval, or in MCU rows by setting restart_in_rows
+   * (in which case the correct restart_interval will be figured
+   * for each scan).
+   */
+  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
+  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
+
+  /* Parameters controlling emission of special markers. */
+
+  boolean write_JFIF_header;	/* should a JFIF marker be written? */
+  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
+  UINT8 JFIF_minor_version;
+  /* These three values are not used by the JPEG code, merely copied */
+  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
+  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
+  /* ratio is defined by X_density/Y_density even when density_unit=0. */
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
+
+  J_COLOR_TRANSFORM color_transform;
+  /* Color transform identifier, writes LSE marker if nonzero */
+
+  /* State variable: index of next scanline to be written to
+   * jpeg_write_scanlines().  Application may use this to control its
+   * processing loop, e.g., "while (next_scanline < image_height)".
+   */
+
+  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
+
+  /* Remaining fields are known throughout compressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during compression startup
+   */
+  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
+  /* The coefficient controller receives data in units of MCU rows as defined
+   * for fully interleaved scans (whether the JPEG file is interleaved or not).
+   * There are v_samp_factor * DCTSIZE sample rows of each component in an
+   * "iMCU" (interleaved MCU) row.
+   */
+  
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+  
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+  
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  int block_size;		/* the basic DCT block size: 1..16 */
+  const int * natural_order;	/* natural-order position array */
+  int lim_Se;			/* min( Se, DCTSIZE2-1 ) */
+
+  /*
+   * Links to compression subobjects (methods and private variables of modules)
+   */
+  struct jpeg_comp_master * master;
+  struct jpeg_c_main_controller * main;
+  struct jpeg_c_prep_controller * prep;
+  struct jpeg_c_coef_controller * coef;
+  struct jpeg_marker_writer * marker;
+  struct jpeg_color_converter * cconvert;
+  struct jpeg_downsampler * downsample;
+  struct jpeg_forward_dct * fdct;
+  struct jpeg_entropy_encoder * entropy;
+  jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */
+  int script_space_size;
+};
+
+
+/* Master record for a decompression instance */
+
+struct jpeg_decompress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
+
+  /* Source of compressed data */
+  struct jpeg_source_mgr * src;
+
+  /* Basic description of image --- filled in by jpeg_read_header(). */
+  /* Application may inspect these values to decide how to process image. */
+
+  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
+  JDIMENSION image_height;	/* nominal image height */
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  /* Decompression processing parameters --- these fields must be set before
+   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
+   * them to default values.
+   */
+
+  J_COLOR_SPACE out_color_space; /* colorspace for output */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  double output_gamma;		/* image gamma wanted in output */
+
+  boolean buffered_image;	/* TRUE=multiple output passes */
+  boolean raw_data_out;		/* TRUE=downsampled data wanted */
+
+  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
+  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
+  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
+
+  boolean quantize_colors;	/* TRUE=colormapped output wanted */
+  /* the following are ignored if not quantize_colors: */
+  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
+  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
+  int desired_number_of_colors;	/* max # colors to use in created colormap */
+  /* these are significant only in buffered-image mode: */
+  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
+  boolean enable_external_quant;/* enable future use of external colormap */
+  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
+
+  /* Description of actual output image that will be returned to application.
+   * These fields are computed by jpeg_start_decompress().
+   * You can also use jpeg_calc_output_dimensions() to determine these values
+   * in advance of calling jpeg_start_decompress().
+   */
+
+  JDIMENSION output_width;	/* scaled image width */
+  JDIMENSION output_height;	/* scaled image height */
+  int out_color_components;	/* # of color components in out_color_space */
+  int output_components;	/* # of color components returned */
+  /* output_components is 1 (a colormap index) when quantizing colors;
+   * otherwise it equals out_color_components.
+   */
+  int rec_outbuf_height;	/* min recommended height of scanline buffer */
+  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
+   * high, space and time will be wasted due to unnecessary data copying.
+   * Usually rec_outbuf_height will be 1 or 2, at most 4.
+   */
+
+  /* When quantizing colors, the output colormap is described by these fields.
+   * The application can supply a colormap by setting colormap non-NULL before
+   * calling jpeg_start_decompress; otherwise a colormap is created during
+   * jpeg_start_decompress or jpeg_start_output.
+   * The map has out_color_components rows and actual_number_of_colors columns.
+   */
+  int actual_number_of_colors;	/* number of entries in use */
+  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
+
+  /* State variables: these variables indicate the progress of decompression.
+   * The application may examine these but must not modify them.
+   */
+
+  /* Row index of next scanline to be read from jpeg_read_scanlines().
+   * Application may use this to control its processing loop, e.g.,
+   * "while (output_scanline < output_height)".
+   */
+  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
+
+  /* Current input scan number and number of iMCU rows completed in scan.
+   * These indicate the progress of the decompressor input side.
+   */
+  int input_scan_number;	/* Number of SOS markers seen so far */
+  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
+
+  /* The "output scan number" is the notional scan being displayed by the
+   * output side.  The decompressor will not allow output scan/row number
+   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
+   */
+  int output_scan_number;	/* Nominal scan number being displayed */
+  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
+
+  /* Current progression status.  coef_bits[c][i] indicates the precision
+   * with which component c's DCT coefficient i (in zigzag order) is known.
+   * It is -1 when no data has yet been received, otherwise it is the point
+   * transform (shift) value for the most recent scan of the coefficient
+   * (thus, 0 at completion of the progression).
+   * This pointer is NULL when reading a non-progressive file.
+   */
+  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
+
+  /* Internal JPEG parameters --- the application usually need not look at
+   * these fields.  Note that the decompressor output side may not use
+   * any parameters that can change between scans.
+   */
+
+  /* Quantization and Huffman tables are carried forward across input
+   * datastreams when processing abbreviated JPEG datastreams.
+   */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  /* These parameters are never carried across datastreams, since they
+   * are given in SOF/SOS markers or defined to be reset by SOI.
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  boolean is_baseline;		/* TRUE if Baseline SOF0 encountered */
+  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
+
+  /* These fields record data obtained from optional markers recognized by
+   * the JPEG library.
+   */
+  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
+  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
+  UINT8 JFIF_major_version;	/* JFIF version number */
+  UINT8 JFIF_minor_version;
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
+  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
+
+  J_COLOR_TRANSFORM color_transform;
+  /* Color transform identifier derived from LSE marker, otherwise zero */
+
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+
+  /* Aside from the specific data retained from APPn markers known to the
+   * library, the uninterpreted contents of any or all APPn and COM markers
+   * can be saved in a list for examination by the application.
+   */
+  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
+
+  /* Remaining fields are known throughout decompressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during decompression startup
+   */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
+  /* The coefficient controller's input and output progress is measured in
+   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
+   * in fully interleaved JPEG scans, but are used whether the scan is
+   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
+   * rows of each component.  Therefore, the IDCT output contains
+   * v_samp_factor*DCT_v_scaled_size sample rows of a component per iMCU row.
+   */
+
+  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   * Note that the decompressor output side must not use these fields.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  /* These fields are derived from Se of first SOS marker.
+   */
+  int block_size;		/* the basic DCT block size: 1..16 */
+  const int * natural_order; /* natural-order position array for entropy decode */
+  int lim_Se;			/* min( Se, DCTSIZE2-1 ) for entropy decode */
+
+  /* This field is shared between entropy decoder and marker parser.
+   * It is either zero or the code of a JPEG marker that has been
+   * read from the data source, but has not yet been processed.
+   */
+  int unread_marker;
+
+  /*
+   * Links to decompression subobjects (methods, private variables of modules)
+   */
+  struct jpeg_decomp_master * master;
+  struct jpeg_d_main_controller * main;
+  struct jpeg_d_coef_controller * coef;
+  struct jpeg_d_post_controller * post;
+  struct jpeg_input_controller * inputctl;
+  struct jpeg_marker_reader * marker;
+  struct jpeg_entropy_decoder * entropy;
+  struct jpeg_inverse_dct * idct;
+  struct jpeg_upsampler * upsample;
+  struct jpeg_color_deconverter * cconvert;
+  struct jpeg_color_quantizer * cquantize;
+};
+
+
+/* "Object" declarations for JPEG modules that may be supplied or called
+ * directly by the surrounding application.
+ * As with all objects in the JPEG library, these structs only define the
+ * publicly visible methods and state variables of a module.  Additional
+ * private fields may exist after the public ones.
+ */
+
+
+/* Error handler object */
+
+struct jpeg_error_mgr {
+  /* Error exit handler: does not return to caller */
+  JMETHOD(noreturn_t, error_exit, (j_common_ptr cinfo));
+  /* Conditionally emit a trace or warning message */
+  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
+  /* Routine that actually outputs a trace or error message */
+  JMETHOD(void, output_message, (j_common_ptr cinfo));
+  /* Format a message string for the most recent JPEG error or message */
+  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
+#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
+  /* Reset error state variables at start of a new image */
+  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
+  
+  /* The message ID code and any parameters are saved here.
+   * A message can have one string parameter or up to 8 int parameters.
+   */
+  int msg_code;
+#define JMSG_STR_PARM_MAX  80
+  union {
+    int i[8];
+    char s[JMSG_STR_PARM_MAX];
+  } msg_parm;
+  
+  /* Standard state variables for error facility */
+  
+  int trace_level;		/* max msg_level that will be displayed */
+  
+  /* For recoverable corrupt-data errors, we emit a warning message,
+   * but keep going unless emit_message chooses to abort.  emit_message
+   * should count warnings in num_warnings.  The surrounding application
+   * can check for bad data by seeing if num_warnings is nonzero at the
+   * end of processing.
+   */
+  long num_warnings;		/* number of corrupt-data warnings */
+
+  /* These fields point to the table(s) of error message strings.
+   * An application can change the table pointer to switch to a different
+   * message list (typically, to change the language in which errors are
+   * reported).  Some applications may wish to add additional error codes
+   * that will be handled by the JPEG library error mechanism; the second
+   * table pointer is used for this purpose.
+   *
+   * First table includes all errors generated by JPEG library itself.
+   * Error code 0 is reserved for a "no such error string" message.
+   */
+  const char * const * jpeg_message_table; /* Library errors */
+  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
+  /* Second table can be added by application (see cjpeg/djpeg for example).
+   * It contains strings numbered first_addon_message..last_addon_message.
+   */
+  const char * const * addon_message_table; /* Non-library errors */
+  int first_addon_message;	/* code for first string in addon table */
+  int last_addon_message;	/* code for last string in addon table */
+};
+
+
+/* Progress monitor object */
+
+struct jpeg_progress_mgr {
+  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
+
+  long pass_counter;		/* work units completed in this pass */
+  long pass_limit;		/* total number of work units in this pass */
+  int completed_passes;		/* passes completed so far */
+  int total_passes;		/* total number of passes expected */
+};
+
+
+/* Data destination object for compression */
+
+struct jpeg_destination_mgr {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+
+  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
+  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
+  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
+};
+
+
+/* Data source object for decompression */
+
+struct jpeg_source_mgr {
+  const JOCTET * next_input_byte; /* => next byte to read from buffer */
+  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
+
+  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
+  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
+  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
+  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
+};
+
+
+/* Memory manager object.
+ * Allocates "small" objects (a few K total), "large" objects (tens of K),
+ * and "really big" objects (virtual arrays with backing store if needed).
+ * The memory manager does not allow individual objects to be freed; rather,
+ * each created object is assigned to a pool, and whole pools can be freed
+ * at once.  This is faster and more convenient than remembering exactly what
+ * to free, especially where malloc()/free() are not too speedy.
+ * NB: alloc routines never return NULL.  They exit to error_exit if not
+ * successful.
+ */
+
+#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
+#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
+#define JPOOL_NUMPOOLS	2
+
+typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
+typedef struct jvirt_barray_control * jvirt_barray_ptr;
+
+
+struct jpeg_memory_mgr {
+  /* Method pointers */
+  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
+				size_t sizeofobject));
+  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
+				     size_t sizeofobject));
+  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
+				     JDIMENSION samplesperrow,
+				     JDIMENSION numrows));
+  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
+				      JDIMENSION blocksperrow,
+				      JDIMENSION numrows));
+  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION samplesperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION blocksperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
+  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
+					   jvirt_sarray_ptr ptr,
+					   JDIMENSION start_row,
+					   JDIMENSION num_rows,
+					   boolean writable));
+  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
+					    jvirt_barray_ptr ptr,
+					    JDIMENSION start_row,
+					    JDIMENSION num_rows,
+					    boolean writable));
+  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
+  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
+
+  /* Limit on memory allocation for this JPEG object.  (Note that this is
+   * merely advisory, not a guaranteed maximum; it only affects the space
+   * used for virtual-array buffers.)  May be changed by outer application
+   * after creating the JPEG object.
+   */
+  long max_memory_to_use;
+
+  /* Maximum allocation request accepted by alloc_large. */
+  long max_alloc_chunk;
+};
+
+
+/* Routine signature for application-supplied marker processing methods.
+ * Need not pass marker code since it is stored in cinfo->unread_marker.
+ */
+typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
+
+
+/* Declarations for routines called by application.
+ * The JPP macro hides prototype parameters from compilers that can't cope.
+ * Note JPP requires double parentheses.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JPP(arglist)	arglist
+#else
+#define JPP(arglist)	()
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers.
+ * We shorten external names to be unique in the first six letters, which
+ * is good enough for all known systems.
+ * (If your compiler itself needs names to be unique in less than 15 
+ * characters, you are out of luck.  Get a better compiler.)
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_error		jStdError
+#define jpeg_CreateCompress	jCreaCompress
+#define jpeg_CreateDecompress	jCreaDecompress
+#define jpeg_destroy_compress	jDestCompress
+#define jpeg_destroy_decompress	jDestDecompress
+#define jpeg_stdio_dest		jStdDest
+#define jpeg_stdio_src		jStdSrc
+#define jpeg_mem_dest		jMemDest
+#define jpeg_mem_src		jMemSrc
+#define jpeg_set_defaults	jSetDefaults
+#define jpeg_set_colorspace	jSetColorspace
+#define jpeg_default_colorspace	jDefColorspace
+#define jpeg_set_quality	jSetQuality
+#define jpeg_set_linear_quality	jSetLQuality
+#define jpeg_default_qtables	jDefQTables
+#define jpeg_add_quant_table	jAddQuantTable
+#define jpeg_quality_scaling	jQualityScaling
+#define jpeg_simple_progression	jSimProgress
+#define jpeg_suppress_tables	jSuppressTables
+#define jpeg_alloc_quant_table	jAlcQTable
+#define jpeg_alloc_huff_table	jAlcHTable
+#define jpeg_start_compress	jStrtCompress
+#define jpeg_write_scanlines	jWrtScanlines
+#define jpeg_finish_compress	jFinCompress
+#define jpeg_calc_jpeg_dimensions	jCjpegDimensions
+#define jpeg_write_raw_data	jWrtRawData
+#define jpeg_write_marker	jWrtMarker
+#define jpeg_write_m_header	jWrtMHeader
+#define jpeg_write_m_byte	jWrtMByte
+#define jpeg_write_tables	jWrtTables
+#define jpeg_read_header	jReadHeader
+#define jpeg_start_decompress	jStrtDecompress
+#define jpeg_read_scanlines	jReadScanlines
+#define jpeg_finish_decompress	jFinDecompress
+#define jpeg_read_raw_data	jReadRawData
+#define jpeg_has_multiple_scans	jHasMultScn
+#define jpeg_start_output	jStrtOutput
+#define jpeg_finish_output	jFinOutput
+#define jpeg_input_complete	jInComplete
+#define jpeg_new_colormap	jNewCMap
+#define jpeg_consume_input	jConsumeInput
+#define jpeg_core_output_dimensions	jCoreDimensions
+#define jpeg_calc_output_dimensions	jCalcDimensions
+#define jpeg_save_markers	jSaveMarkers
+#define jpeg_set_marker_processor	jSetMarker
+#define jpeg_read_coefficients	jReadCoefs
+#define jpeg_write_coefficients	jWrtCoefs
+#define jpeg_copy_critical_parameters	jCopyCrit
+#define jpeg_abort_compress	jAbrtCompress
+#define jpeg_abort_decompress	jAbrtDecompress
+#define jpeg_abort		jAbort
+#define jpeg_destroy		jDestroy
+#define jpeg_resync_to_restart	jResyncRestart
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Default error-management setup */
+EXTERN(struct jpeg_error_mgr *) jpeg_std_error
+	JPP((struct jpeg_error_mgr * err));
+
+/* Initialization of JPEG compression objects.
+ * jpeg_create_compress() and jpeg_create_decompress() are the exported
+ * names that applications should call.  These expand to calls on
+ * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
+ * passed for version mismatch checking.
+ * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
+ */
+#define jpeg_create_compress(cinfo) \
+    jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+			(size_t) sizeof(struct jpeg_compress_struct))
+#define jpeg_create_decompress(cinfo) \
+    jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+			  (size_t) sizeof(struct jpeg_decompress_struct))
+EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
+				      int version, size_t structsize));
+EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
+					int version, size_t structsize));
+/* Destruction of JPEG compression objects */
+EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
+
+/* Standard data source and destination managers: stdio streams. */
+/* Caller is responsible for opening the file before and closing after. */
+EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
+EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
+
+/* Data source and destination managers: memory buffers. */
+EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo,
+			       unsigned char ** outbuffer,
+			       unsigned long * outsize));
+EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo,
+			      unsigned char * inbuffer,
+			      unsigned long insize));
+
+/* Default parameter setup for compression */
+EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
+/* Compression parameter setup aids */
+EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
+				      J_COLOR_SPACE colorspace));
+EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
+				   boolean force_baseline));
+EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
+					  int scale_factor,
+					  boolean force_baseline));
+EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
+				       boolean force_baseline));
+EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
+				       const unsigned int *basic_table,
+				       int scale_factor,
+				       boolean force_baseline));
+EXTERN(int) jpeg_quality_scaling JPP((int quality));
+EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
+				       boolean suppress));
+EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
+EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
+
+/* Main entry points for compression */
+EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
+				      boolean write_all_tables));
+EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
+					     JSAMPARRAY scanlines,
+					     JDIMENSION num_lines));
+EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
+
+/* Precalculate JPEG dimensions for current compression parameters. */
+EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo));
+
+/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
+					    JSAMPIMAGE data,
+					    JDIMENSION num_lines));
+
+/* Write a special marker.  See libjpeg.txt concerning safe usage. */
+EXTERN(void) jpeg_write_marker
+	JPP((j_compress_ptr cinfo, int marker,
+	     const JOCTET * dataptr, unsigned int datalen));
+/* Same, but piecemeal. */
+EXTERN(void) jpeg_write_m_header
+	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
+EXTERN(void) jpeg_write_m_byte
+	JPP((j_compress_ptr cinfo, int val));
+
+/* Alternate compression function: just write an abbreviated table file */
+EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
+
+/* Decompression startup: read start of JPEG datastream to see what's there */
+EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
+				  boolean require_image));
+/* Return value is one of: */
+#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
+#define JPEG_HEADER_OK		1 /* Found valid image datastream */
+#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
+/* If you pass require_image = TRUE (normal case), you need not check for
+ * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
+ * JPEG_SUSPENDED is only possible if you use a data source module that can
+ * give a suspension return (the stdio source module doesn't).
+ */
+
+/* Main entry points for decompression */
+EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
+					    JSAMPARRAY scanlines,
+					    JDIMENSION max_lines));
+EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
+
+/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
+					   JSAMPIMAGE data,
+					   JDIMENSION max_lines));
+
+/* Additional entry points for buffered-image mode. */
+EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
+				       int scan_number));
+EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
+EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
+/* Return value is one of: */
+/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
+#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
+#define JPEG_REACHED_EOI	2 /* Reached end of image */
+#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
+#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
+
+/* Precalculate output dimensions for current decompression parameters. */
+EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
+
+/* Control saving of COM and APPn markers into marker_list. */
+EXTERN(void) jpeg_save_markers
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     unsigned int length_limit));
+
+/* Install a special processing method for COM or APPn markers. */
+EXTERN(void) jpeg_set_marker_processor
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     jpeg_marker_parser_method routine));
+
+/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
+EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
+					  jvirt_barray_ptr * coef_arrays));
+EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
+						j_compress_ptr dstinfo));
+
+/* If you choose to abort compression or decompression before completing
+ * jpeg_finish_(de)compress, then you need to clean up to release memory,
+ * temporary files, etc.  You can just call jpeg_destroy_(de)compress
+ * if you're done with the JPEG object, but if you want to clean it up and
+ * reuse it, call this:
+ */
+EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
+
+/* Generic versions of jpeg_abort and jpeg_destroy that work on either
+ * flavor of JPEG object.  These may be more convenient in some places.
+ */
+EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
+
+/* Default restart-marker-resync procedure for use by data source modules */
+EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
+					    int desired));
+
+
+/* These marker codes are exported since applications and data source modules
+ * are likely to want to use them.
+ */
+
+#define JPEG_RST0	0xD0	/* RST0 marker code */
+#define JPEG_EOI	0xD9	/* EOI marker code */
+#define JPEG_APP0	0xE0	/* APP0 marker code */
+#define JPEG_COM	0xFE	/* COM marker code */
+
+
+/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
+ * for structure definitions that are never filled in, keep it quiet by
+ * supplying dummy definitions for the various substructures.
+ */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+struct jpeg_comp_master { long dummy; };
+struct jpeg_c_main_controller { long dummy; };
+struct jpeg_c_prep_controller { long dummy; };
+struct jpeg_c_coef_controller { long dummy; };
+struct jpeg_marker_writer { long dummy; };
+struct jpeg_color_converter { long dummy; };
+struct jpeg_downsampler { long dummy; };
+struct jpeg_forward_dct { long dummy; };
+struct jpeg_entropy_encoder { long dummy; };
+struct jpeg_decomp_master { long dummy; };
+struct jpeg_d_main_controller { long dummy; };
+struct jpeg_d_coef_controller { long dummy; };
+struct jpeg_d_post_controller { long dummy; };
+struct jpeg_input_controller { long dummy; };
+struct jpeg_marker_reader { long dummy; };
+struct jpeg_entropy_decoder { long dummy; };
+struct jpeg_inverse_dct { long dummy; };
+struct jpeg_upsampler { long dummy; };
+struct jpeg_color_deconverter { long dummy; };
+struct jpeg_color_quantizer { long dummy; };
+#endif /* JPEG_INTERNALS */
+#endif /* INCOMPLETE_TYPES_BROKEN */
+
+
+/*
+ * The JPEG library modules define JPEG_INTERNALS before including this file.
+ * The internal structure declarations are read only when that is true.
+ * Applications using the library should not include jpegint.h, but may wish
+ * to include jerror.h.
+ */
+
+#ifdef JPEG_INTERNALS
+#include "jpegint.h"		/* fetch private declarations */
+#include "jerror.h"		/* fetch error codes too */
+#endif
+
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+}
+#endif
+#endif
+
+#endif /* JPEGLIB_H */
diff --git a/plugins/AdvaImg/src/LibJPEG/jpegtran.c b/plugins/AdvaImg/src/LibJPEG/jpegtran.c
index 2193ffe377..c15664a4f4 100644
--- a/plugins/AdvaImg/src/LibJPEG/jpegtran.c
+++ b/plugins/AdvaImg/src/LibJPEG/jpegtran.c
@@ -1,7 +1,7 @@
 /*
  * jpegtran.c
  *
- * Copyright (C) 1995-2011, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1995-2012, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -467,7 +467,7 @@ main (int argc, char **argv)
 
   /* Adjust default decompression parameters */
   if (scaleoption != NULL)
-    if (sscanf(scaleoption, "%d/%d",
+    if (sscanf(scaleoption, "%u/%u",
 	&srcinfo.scale_num, &srcinfo.scale_denom) < 1)
       usage();
 
diff --git a/plugins/AdvaImg/src/LibJPEG/jquant1.c b/plugins/AdvaImg/src/LibJPEG/jquant1.c
index 9d11f70669..1c482bc4b1 100644
--- a/plugins/AdvaImg/src/LibJPEG/jquant1.c
+++ b/plugins/AdvaImg/src/LibJPEG/jquant1.c
@@ -1,857 +1,857 @@
-/*
- * jquant1.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains 1-pass color quantization (color mapping) routines.
- * These routines provide mapping to a fixed color map using equally spaced
- * color values.  Optional Floyd-Steinberg or ordered dithering is available.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-#ifdef QUANT_1PASS_SUPPORTED
-
-
-/*
- * The main purpose of 1-pass quantization is to provide a fast, if not very
- * high quality, colormapped output capability.  A 2-pass quantizer usually
- * gives better visual quality; however, for quantized grayscale output this
- * quantizer is perfectly adequate.  Dithering is highly recommended with this
- * quantizer, though you can turn it off if you really want to.
- *
- * In 1-pass quantization the colormap must be chosen in advance of seeing the
- * image.  We use a map consisting of all combinations of Ncolors[i] color
- * values for the i'th component.  The Ncolors[] values are chosen so that
- * their product, the total number of colors, is no more than that requested.
- * (In most cases, the product will be somewhat less.)
- *
- * Since the colormap is orthogonal, the representative value for each color
- * component can be determined without considering the other components;
- * then these indexes can be combined into a colormap index by a standard
- * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
- * can be precalculated and stored in the lookup table colorindex[].
- * colorindex[i][j] maps pixel value j in component i to the nearest
- * representative value (grid plane) for that component; this index is
- * multiplied by the array stride for component i, so that the
- * index of the colormap entry closest to a given pixel value is just
- *    sum( colorindex[component-number][pixel-component-value] )
- * Aside from being fast, this scheme allows for variable spacing between
- * representative values with no additional lookup cost.
- *
- * If gamma correction has been applied in color conversion, it might be wise
- * to adjust the color grid spacing so that the representative colors are
- * equidistant in linear space.  At this writing, gamma correction is not
- * implemented by jdcolor, so nothing is done here.
- */
-
-
-/* Declarations for ordered dithering.
- *
- * We use a standard 16x16 ordered dither array.  The basic concept of ordered
- * dithering is described in many references, for instance Dale Schumacher's
- * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
- * In place of Schumacher's comparisons against a "threshold" value, we add a
- * "dither" value to the input pixel and then round the result to the nearest
- * output value.  The dither value is equivalent to (0.5 - threshold) times
- * the distance between output values.  For ordered dithering, we assume that
- * the output colors are equally spaced; if not, results will probably be
- * worse, since the dither may be too much or too little at a given point.
- *
- * The normal calculation would be to form pixel value + dither, range-limit
- * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
- * We can skip the separate range-limiting step by extending the colorindex
- * table in both directions.
- */
-
-#define ODITHER_SIZE  16	/* dimension of dither matrix */
-/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
-#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
-#define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
-
-typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
-typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
-
-static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
-  /* Bayer's order-4 dither array.  Generated by the code given in
-   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
-   * The values in this array must range from 0 to ODITHER_CELLS-1.
-   */
-  {   0,192, 48,240, 12,204, 60,252,  3,195, 51,243, 15,207, 63,255 },
-  { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 },
-  {  32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 },
-  { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 },
-  {   8,200, 56,248,  4,196, 52,244, 11,203, 59,251,  7,199, 55,247 },
-  { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 },
-  {  40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 },
-  { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 },
-  {   2,194, 50,242, 14,206, 62,254,  1,193, 49,241, 13,205, 61,253 },
-  { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 },
-  {  34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 },
-  { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 },
-  {  10,202, 58,250,  6,198, 54,246,  9,201, 57,249,  5,197, 53,245 },
-  { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 },
-  {  42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 },
-  { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 }
-};
-
-
-/* Declarations for Floyd-Steinberg dithering.
- *
- * Errors are accumulated into the array fserrors[], at a resolution of
- * 1/16th of a pixel count.  The error at a given pixel is propagated
- * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
- * We work left-to-right on even rows, right-to-left on odd rows.
- *
- * We can get away with a single array (holding one row's worth of errors)
- * by using it to store the current row's errors at pixel columns not yet
- * processed, but the next row's errors at columns already processed.  We
- * need only a few extra variables to hold the errors immediately around the
- * current column.  (If we are lucky, those variables are in registers, but
- * even if not, they're probably cheaper to access than array elements are.)
- *
- * The fserrors[] array is indexed [component#][position].
- * We provide (#columns + 2) entries per component; the extra entry at each
- * end saves us from special-casing the first and last pixels.
- *
- * Note: on a wide image, we might not have enough room in a PC's near data
- * segment to hold the error array; so it is allocated with alloc_large.
- */
-
-#if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
-#else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
-#endif
-
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
-
-
-/* Private subobject */
-
-#define MAX_Q_COMPS 4		/* max components I can handle */
-
-typedef struct {
-  struct jpeg_color_quantizer pub; /* public fields */
-
-  /* Initially allocated colormap is saved here */
-  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
-  int sv_actual;		/* number of entries in use */
-
-  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
-  /* colorindex[i][j] = index of color closest to pixel value j in component i,
-   * premultiplied as described above.  Since colormap indexes must fit into
-   * JSAMPLEs, the entries of this array will too.
-   */
-  boolean is_padded;		/* is the colorindex padded for odither? */
-
-  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
-
-  /* Variables for ordered dithering */
-  int row_index;		/* cur row's vertical index in dither matrix */
-  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
-
-  /* Variables for Floyd-Steinberg dithering */
-  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
-} my_cquantizer;
-
-typedef my_cquantizer * my_cquantize_ptr;
-
-
-/*
- * Policy-making subroutines for create_colormap and create_colorindex.
- * These routines determine the colormap to be used.  The rest of the module
- * only assumes that the colormap is orthogonal.
- *
- *  * select_ncolors decides how to divvy up the available colors
- *    among the components.
- *  * output_value defines the set of representative values for a component.
- *  * largest_input_value defines the mapping from input values to
- *    representative values for a component.
- * Note that the latter two routines may impose different policies for
- * different components, though this is not currently done.
- */
-
-
-LOCAL(int)
-select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
-/* Determine allocation of desired colors to components, */
-/* and fill in Ncolors[] array to indicate choice. */
-/* Return value is total number of colors (product of Ncolors[] values). */
-{
-  int nc = cinfo->out_color_components; /* number of color components */
-  int max_colors = cinfo->desired_number_of_colors;
-  int total_colors, iroot, i, j;
-  boolean changed;
-  long temp;
-  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
-
-  /* We can allocate at least the nc'th root of max_colors per component. */
-  /* Compute floor(nc'th root of max_colors). */
-  iroot = 1;
-  do {
-    iroot++;
-    temp = iroot;		/* set temp = iroot ** nc */
-    for (i = 1; i < nc; i++)
-      temp *= iroot;
-  } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
-  iroot--;			/* now iroot = floor(root) */
-
-  /* Must have at least 2 color values per component */
-  if (iroot < 2)
-    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp);
-
-  /* Initialize to iroot color values for each component */
-  total_colors = 1;
-  for (i = 0; i < nc; i++) {
-    Ncolors[i] = iroot;
-    total_colors *= iroot;
-  }
-  /* We may be able to increment the count for one or more components without
-   * exceeding max_colors, though we know not all can be incremented.
-   * Sometimes, the first component can be incremented more than once!
-   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
-   * In RGB colorspace, try to increment G first, then R, then B.
-   */
-  do {
-    changed = FALSE;
-    for (i = 0; i < nc; i++) {
-      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
-      /* calculate new total_colors if Ncolors[j] is incremented */
-      temp = total_colors / Ncolors[j];
-      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
-      if (temp > (long) max_colors)
-	break;			/* won't fit, done with this pass */
-      Ncolors[j]++;		/* OK, apply the increment */
-      total_colors = (int) temp;
-      changed = TRUE;
-    }
-  } while (changed);
-
-  return total_colors;
-}
-
-
-LOCAL(int)
-output_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
-/* Return j'th output value, where j will range from 0 to maxj */
-/* The output values must fall in 0..MAXJSAMPLE in increasing order */
-{
-  /* We always provide values 0 and MAXJSAMPLE for each component;
-   * any additional values are equally spaced between these limits.
-   * (Forcing the upper and lower values to the limits ensures that
-   * dithering can't produce a color outside the selected gamut.)
-   */
-  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
-}
-
-
-LOCAL(int)
-largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
-/* Return largest input value that should map to j'th output value */
-/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
-{
-  /* Breakpoints are halfway between values returned by output_value */
-  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
-}
-
-
-/*
- * Create the colormap.
- */
-
-LOCAL(void)
-create_colormap (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPARRAY colormap;		/* Created colormap */
-  int total_colors;		/* Number of distinct output colors */
-  int i,j,k, nci, blksize, blkdist, ptr, val;
-
-  /* Select number of colors for each component */
-  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
-
-  /* Report selected color counts */
-  if (cinfo->out_color_components == 3)
-    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
-	     total_colors, cquantize->Ncolors[0],
-	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
-  else
-    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
-
-  /* Allocate and fill in the colormap. */
-  /* The colors are ordered in the map in standard row-major order, */
-  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
-
-  colormap = (*cinfo->mem->alloc_sarray)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components);
-
-  /* blksize is number of adjacent repeated entries for a component */
-  /* blkdist is distance between groups of identical entries for a component */
-  blkdist = total_colors;
-
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    /* fill in colormap entries for i'th color component */
-    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    blksize = blkdist / nci;
-    for (j = 0; j < nci; j++) {
-      /* Compute j'th output value (out of nci) for component */
-      val = output_value(cinfo, i, j, nci-1);
-      /* Fill in all colormap entries that have this value of this component */
-      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
-	/* fill in blksize entries beginning at ptr */
-	for (k = 0; k < blksize; k++)
-	  colormap[i][ptr+k] = (JSAMPLE) val;
-      }
-    }
-    blkdist = blksize;		/* blksize of this color is blkdist of next */
-  }
-
-  /* Save the colormap in private storage,
-   * where it will survive color quantization mode changes.
-   */
-  cquantize->sv_colormap = colormap;
-  cquantize->sv_actual = total_colors;
-}
-
-
-/*
- * Create the color index table.
- */
-
-LOCAL(void)
-create_colorindex (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPROW indexptr;
-  int i,j,k, nci, blksize, val, pad;
-
-  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
-   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
-   * This is not necessary in the other dithering modes.  However, we
-   * flag whether it was done in case user changes dithering mode.
-   */
-  if (cinfo->dither_mode == JDITHER_ORDERED) {
-    pad = MAXJSAMPLE*2;
-    cquantize->is_padded = TRUE;
-  } else {
-    pad = 0;
-    cquantize->is_padded = FALSE;
-  }
-
-  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     (JDIMENSION) (MAXJSAMPLE+1 + pad),
-     (JDIMENSION) cinfo->out_color_components);
-
-  /* blksize is number of adjacent repeated entries for a component */
-  blksize = cquantize->sv_actual;
-
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    /* fill in colorindex entries for i'th color component */
-    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    blksize = blksize / nci;
-
-    /* adjust colorindex pointers to provide padding at negative indexes. */
-    if (pad)
-      cquantize->colorindex[i] += MAXJSAMPLE;
-
-    /* in loop, val = index of current output value, */
-    /* and k = largest j that maps to current val */
-    indexptr = cquantize->colorindex[i];
-    val = 0;
-    k = largest_input_value(cinfo, i, 0, nci-1);
-    for (j = 0; j <= MAXJSAMPLE; j++) {
-      while (j > k)		/* advance val if past boundary */
-	k = largest_input_value(cinfo, i, ++val, nci-1);
-      /* premultiply so that no multiplication needed in main processing */
-      indexptr[j] = (JSAMPLE) (val * blksize);
-    }
-    /* Pad at both ends if necessary */
-    if (pad)
-      for (j = 1; j <= MAXJSAMPLE; j++) {
-	indexptr[-j] = indexptr[0];
-	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
-      }
-  }
-}
-
-
-/*
- * Create an ordered-dither array for a component having ncolors
- * distinct output values.
- */
-
-LOCAL(ODITHER_MATRIX_PTR)
-make_odither_array (j_decompress_ptr cinfo, int ncolors)
-{
-  ODITHER_MATRIX_PTR odither;
-  int j,k;
-  INT32 num,den;
-
-  odither = (ODITHER_MATRIX_PTR)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(ODITHER_MATRIX));
-  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
-   * Hence the dither value for the matrix cell with fill order f
-   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
-   * On 16-bit-int machine, be careful to avoid overflow.
-   */
-  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
-  for (j = 0; j < ODITHER_SIZE; j++) {
-    for (k = 0; k < ODITHER_SIZE; k++) {
-      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
-	    * MAXJSAMPLE;
-      /* Ensure round towards zero despite C's lack of consistency
-       * about rounding negative values in integer division...
-       */
-      odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den);
-    }
-  }
-  return odither;
-}
-
-
-/*
- * Create the ordered-dither tables.
- * Components having the same number of representative colors may 
- * share a dither table.
- */
-
-LOCAL(void)
-create_odither_tables (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  ODITHER_MATRIX_PTR odither;
-  int i, j, nci;
-
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    odither = NULL;		/* search for matching prior component */
-    for (j = 0; j < i; j++) {
-      if (nci == cquantize->Ncolors[j]) {
-	odither = cquantize->odither[j];
-	break;
-      }
-    }
-    if (odither == NULL)	/* need a new table? */
-      odither = make_odither_array(cinfo, nci);
-    cquantize->odither[i] = odither;
-  }
-}
-
-
-/*
- * Map some rows of pixels to the output colormapped representation.
- */
-
-METHODDEF(void)
-color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		JSAMPARRAY output_buf, int num_rows)
-/* General case, no dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPARRAY colorindex = cquantize->colorindex;
-  register int pixcode, ci;
-  register JSAMPROW ptrin, ptrout;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-  register int nc = cinfo->out_color_components;
-
-  for (row = 0; row < num_rows; row++) {
-    ptrin = input_buf[row];
-    ptrout = output_buf[row];
-    for (col = width; col > 0; col--) {
-      pixcode = 0;
-      for (ci = 0; ci < nc; ci++) {
-	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
-      }
-      *ptrout++ = (JSAMPLE) pixcode;
-    }
-  }
-}
-
-
-METHODDEF(void)
-color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		 JSAMPARRAY output_buf, int num_rows)
-/* Fast path for out_color_components==3, no dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register int pixcode;
-  register JSAMPROW ptrin, ptrout;
-  JSAMPROW colorindex0 = cquantize->colorindex[0];
-  JSAMPROW colorindex1 = cquantize->colorindex[1];
-  JSAMPROW colorindex2 = cquantize->colorindex[2];
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    ptrin = input_buf[row];
-    ptrout = output_buf[row];
-    for (col = width; col > 0; col--) {
-      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
-      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
-      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
-      *ptrout++ = (JSAMPLE) pixcode;
-    }
-  }
-}
-
-
-METHODDEF(void)
-quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		     JSAMPARRAY output_buf, int num_rows)
-/* General case, with ordered dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex_ci;
-  int * dither;			/* points to active row of dither matrix */
-  int row_index, col_index;	/* current indexes into dither matrix */
-  int nc = cinfo->out_color_components;
-  int ci;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    /* Initialize output values to 0 so can process components separately */
-    FMEMZERO((void FAR *) output_buf[row],
-	     (size_t) (width * SIZEOF(JSAMPLE)));
-    row_index = cquantize->row_index;
-    for (ci = 0; ci < nc; ci++) {
-      input_ptr = input_buf[row] + ci;
-      output_ptr = output_buf[row];
-      colorindex_ci = cquantize->colorindex[ci];
-      dither = cquantize->odither[ci][row_index];
-      col_index = 0;
-
-      for (col = width; col > 0; col--) {
-	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
-	 * select output value, accumulate into output code for this pixel.
-	 * Range-limiting need not be done explicitly, as we have extended
-	 * the colorindex table to produce the right answers for out-of-range
-	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
-	 * required amount of padding.
-	 */
-	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
-	input_ptr += nc;
-	output_ptr++;
-	col_index = (col_index + 1) & ODITHER_MASK;
-      }
-    }
-    /* Advance row index for next row */
-    row_index = (row_index + 1) & ODITHER_MASK;
-    cquantize->row_index = row_index;
-  }
-}
-
-
-METHODDEF(void)
-quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		      JSAMPARRAY output_buf, int num_rows)
-/* Fast path for out_color_components==3, with ordered dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register int pixcode;
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex0 = cquantize->colorindex[0];
-  JSAMPROW colorindex1 = cquantize->colorindex[1];
-  JSAMPROW colorindex2 = cquantize->colorindex[2];
-  int * dither0;		/* points to active row of dither matrix */
-  int * dither1;
-  int * dither2;
-  int row_index, col_index;	/* current indexes into dither matrix */
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    row_index = cquantize->row_index;
-    input_ptr = input_buf[row];
-    output_ptr = output_buf[row];
-    dither0 = cquantize->odither[0][row_index];
-    dither1 = cquantize->odither[1][row_index];
-    dither2 = cquantize->odither[2][row_index];
-    col_index = 0;
-
-    for (col = width; col > 0; col--) {
-      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
-					dither0[col_index]]);
-      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
-					dither1[col_index]]);
-      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
-					dither2[col_index]]);
-      *output_ptr++ = (JSAMPLE) pixcode;
-      col_index = (col_index + 1) & ODITHER_MASK;
-    }
-    row_index = (row_index + 1) & ODITHER_MASK;
-    cquantize->row_index = row_index;
-  }
-}
-
-
-METHODDEF(void)
-quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		    JSAMPARRAY output_buf, int num_rows)
-/* General case, with Floyd-Steinberg dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register LOCFSERROR cur;	/* current error or pixel value */
-  LOCFSERROR belowerr;		/* error for pixel below cur */
-  LOCFSERROR bpreverr;		/* error for below/prev col */
-  LOCFSERROR bnexterr;		/* error for below/next col */
-  LOCFSERROR delta;
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex_ci;
-  JSAMPROW colormap_ci;
-  int pixcode;
-  int nc = cinfo->out_color_components;
-  int dir;			/* 1 for left-to-right, -1 for right-to-left */
-  int dirnc;			/* dir * nc */
-  int ci;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
-  SHIFT_TEMPS
-
-  for (row = 0; row < num_rows; row++) {
-    /* Initialize output values to 0 so can process components separately */
-    FMEMZERO((void FAR *) output_buf[row],
-	     (size_t) (width * SIZEOF(JSAMPLE)));
-    for (ci = 0; ci < nc; ci++) {
-      input_ptr = input_buf[row] + ci;
-      output_ptr = output_buf[row];
-      if (cquantize->on_odd_row) {
-	/* work right to left in this row */
-	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
-	output_ptr += width-1;
-	dir = -1;
-	dirnc = -nc;
-	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
-      } else {
-	/* work left to right in this row */
-	dir = 1;
-	dirnc = nc;
-	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
-      }
-      colorindex_ci = cquantize->colorindex[ci];
-      colormap_ci = cquantize->sv_colormap[ci];
-      /* Preset error values: no error propagated to first pixel from left */
-      cur = 0;
-      /* and no error propagated to row below yet */
-      belowerr = bpreverr = 0;
-
-      for (col = width; col > 0; col--) {
-	/* cur holds the error propagated from the previous pixel on the
-	 * current line.  Add the error propagated from the previous line
-	 * to form the complete error correction term for this pixel, and
-	 * round the error term (which is expressed * 16) to an integer.
-	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
-	 * for either sign of the error value.
-	 * Note: errorptr points to *previous* column's array entry.
-	 */
-	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
-	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-	 * The maximum error is +- MAXJSAMPLE; this sets the required size
-	 * of the range_limit array.
-	 */
-	cur += GETJSAMPLE(*input_ptr);
-	cur = GETJSAMPLE(range_limit[cur]);
-	/* Select output value, accumulate into output code for this pixel */
-	pixcode = GETJSAMPLE(colorindex_ci[cur]);
-	*output_ptr += (JSAMPLE) pixcode;
-	/* Compute actual representation error at this pixel */
-	/* Note: we can do this even though we don't have the final */
-	/* pixel code, because the colormap is orthogonal. */
-	cur -= GETJSAMPLE(colormap_ci[pixcode]);
-	/* Compute error fractions to be propagated to adjacent pixels.
-	 * Add these into the running sums, and simultaneously shift the
-	 * next-line error sums left by 1 column.
-	 */
-	bnexterr = cur;
-	delta = cur * 2;
-	cur += delta;		/* form error * 3 */
-	errorptr[0] = (FSERROR) (bpreverr + cur);
-	cur += delta;		/* form error * 5 */
-	bpreverr = belowerr + cur;
-	belowerr = bnexterr;
-	cur += delta;		/* form error * 7 */
-	/* At this point cur contains the 7/16 error value to be propagated
-	 * to the next pixel on the current line, and all the errors for the
-	 * next line have been shifted over. We are therefore ready to move on.
-	 */
-	input_ptr += dirnc;	/* advance input ptr to next column */
-	output_ptr += dir;	/* advance output ptr to next column */
-	errorptr += dir;	/* advance errorptr to current column */
-      }
-      /* Post-loop cleanup: we must unload the final error value into the
-       * final fserrors[] entry.  Note we need not unload belowerr because
-       * it is for the dummy column before or after the actual array.
-       */
-      errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */
-    }
-    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
-  }
-}
-
-
-/*
- * Allocate workspace for Floyd-Steinberg errors.
- */
-
-LOCAL(void)
-alloc_fs_workspace (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  size_t arraysize;
-  int i;
-
-  arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    cquantize->fserrors[i] = (FSERRPTR)
-      (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
-  }
-}
-
-
-/*
- * Initialize for one-pass color quantization.
- */
-
-METHODDEF(void)
-start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  size_t arraysize;
-  int i;
-
-  /* Install my colormap. */
-  cinfo->colormap = cquantize->sv_colormap;
-  cinfo->actual_number_of_colors = cquantize->sv_actual;
-
-  /* Initialize for desired dithering mode. */
-  switch (cinfo->dither_mode) {
-  case JDITHER_NONE:
-    if (cinfo->out_color_components == 3)
-      cquantize->pub.color_quantize = color_quantize3;
-    else
-      cquantize->pub.color_quantize = color_quantize;
-    break;
-  case JDITHER_ORDERED:
-    if (cinfo->out_color_components == 3)
-      cquantize->pub.color_quantize = quantize3_ord_dither;
-    else
-      cquantize->pub.color_quantize = quantize_ord_dither;
-    cquantize->row_index = 0;	/* initialize state for ordered dither */
-    /* If user changed to ordered dither from another mode,
-     * we must recreate the color index table with padding.
-     * This will cost extra space, but probably isn't very likely.
-     */
-    if (! cquantize->is_padded)
-      create_colorindex(cinfo);
-    /* Create ordered-dither tables if we didn't already. */
-    if (cquantize->odither[0] == NULL)
-      create_odither_tables(cinfo);
-    break;
-  case JDITHER_FS:
-    cquantize->pub.color_quantize = quantize_fs_dither;
-    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
-    /* Allocate Floyd-Steinberg workspace if didn't already. */
-    if (cquantize->fserrors[0] == NULL)
-      alloc_fs_workspace(cinfo);
-    /* Initialize the propagated errors to zero. */
-    arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
-    for (i = 0; i < cinfo->out_color_components; i++)
-      FMEMZERO((void FAR *) cquantize->fserrors[i], arraysize);
-    break;
-  default:
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-    break;
-  }
-}
-
-
-/*
- * Finish up at the end of the pass.
- */
-
-METHODDEF(void)
-finish_pass_1_quant (j_decompress_ptr cinfo)
-{
-  /* no work in 1-pass case */
-}
-
-
-/*
- * Switch to a new external colormap between output passes.
- * Shouldn't get to this module!
- */
-
-METHODDEF(void)
-new_color_map_1_quant (j_decompress_ptr cinfo)
-{
-  ERREXIT(cinfo, JERR_MODE_CHANGE);
-}
-
-
-/*
- * Module initialization routine for 1-pass color quantization.
- */
-
-GLOBAL(void)
-jinit_1pass_quantizer (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize;
-
-  cquantize = (my_cquantize_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_cquantizer));
-  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
-  cquantize->pub.start_pass = start_pass_1_quant;
-  cquantize->pub.finish_pass = finish_pass_1_quant;
-  cquantize->pub.new_color_map = new_color_map_1_quant;
-  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
-  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
-
-  /* Make sure my internal arrays won't overflow */
-  if (cinfo->out_color_components > MAX_Q_COMPS)
-    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
-  /* Make sure colormap indexes can be represented by JSAMPLEs */
-  if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1))
-    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1);
-
-  /* Create the colormap and color index table. */
-  create_colormap(cinfo);
-  create_colorindex(cinfo);
-
-  /* Allocate Floyd-Steinberg workspace now if requested.
-   * We do this now since it is FAR storage and may affect the memory
-   * manager's space calculations.  If the user changes to FS dither
-   * mode in a later pass, we will allocate the space then, and will
-   * possibly overrun the max_memory_to_use setting.
-   */
-  if (cinfo->dither_mode == JDITHER_FS)
-    alloc_fs_workspace(cinfo);
-}
-
-#endif /* QUANT_1PASS_SUPPORTED */
+/*
+ * jquant1.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 1-pass color quantization (color mapping) routines.
+ * These routines provide mapping to a fixed color map using equally spaced
+ * color values.  Optional Floyd-Steinberg or ordered dithering is available.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_1PASS_SUPPORTED
+
+
+/*
+ * The main purpose of 1-pass quantization is to provide a fast, if not very
+ * high quality, colormapped output capability.  A 2-pass quantizer usually
+ * gives better visual quality; however, for quantized grayscale output this
+ * quantizer is perfectly adequate.  Dithering is highly recommended with this
+ * quantizer, though you can turn it off if you really want to.
+ *
+ * In 1-pass quantization the colormap must be chosen in advance of seeing the
+ * image.  We use a map consisting of all combinations of Ncolors[i] color
+ * values for the i'th component.  The Ncolors[] values are chosen so that
+ * their product, the total number of colors, is no more than that requested.
+ * (In most cases, the product will be somewhat less.)
+ *
+ * Since the colormap is orthogonal, the representative value for each color
+ * component can be determined without considering the other components;
+ * then these indexes can be combined into a colormap index by a standard
+ * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
+ * can be precalculated and stored in the lookup table colorindex[].
+ * colorindex[i][j] maps pixel value j in component i to the nearest
+ * representative value (grid plane) for that component; this index is
+ * multiplied by the array stride for component i, so that the
+ * index of the colormap entry closest to a given pixel value is just
+ *    sum( colorindex[component-number][pixel-component-value] )
+ * Aside from being fast, this scheme allows for variable spacing between
+ * representative values with no additional lookup cost.
+ *
+ * If gamma correction has been applied in color conversion, it might be wise
+ * to adjust the color grid spacing so that the representative colors are
+ * equidistant in linear space.  At this writing, gamma correction is not
+ * implemented by jdcolor, so nothing is done here.
+ */
+
+
+/* Declarations for ordered dithering.
+ *
+ * We use a standard 16x16 ordered dither array.  The basic concept of ordered
+ * dithering is described in many references, for instance Dale Schumacher's
+ * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
+ * In place of Schumacher's comparisons against a "threshold" value, we add a
+ * "dither" value to the input pixel and then round the result to the nearest
+ * output value.  The dither value is equivalent to (0.5 - threshold) times
+ * the distance between output values.  For ordered dithering, we assume that
+ * the output colors are equally spaced; if not, results will probably be
+ * worse, since the dither may be too much or too little at a given point.
+ *
+ * The normal calculation would be to form pixel value + dither, range-limit
+ * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
+ * We can skip the separate range-limiting step by extending the colorindex
+ * table in both directions.
+ */
+
+#define ODITHER_SIZE  16	/* dimension of dither matrix */
+/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
+#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
+#define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
+
+typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
+typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
+
+static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
+  /* Bayer's order-4 dither array.  Generated by the code given in
+   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
+   * The values in this array must range from 0 to ODITHER_CELLS-1.
+   */
+  {   0,192, 48,240, 12,204, 60,252,  3,195, 51,243, 15,207, 63,255 },
+  { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 },
+  {  32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 },
+  { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 },
+  {   8,200, 56,248,  4,196, 52,244, 11,203, 59,251,  7,199, 55,247 },
+  { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 },
+  {  40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 },
+  { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 },
+  {   2,194, 50,242, 14,206, 62,254,  1,193, 49,241, 13,205, 61,253 },
+  { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 },
+  {  34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 },
+  { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 },
+  {  10,202, 58,250,  6,198, 54,246,  9,201, 57,249,  5,197, 53,245 },
+  { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 },
+  {  42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 },
+  { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 }
+};
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array is indexed [component#][position].
+ * We provide (#columns + 2) entries per component; the extra entry at each
+ * end saves us from special-casing the first and last pixels.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+#define MAX_Q_COMPS 4		/* max components I can handle */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Initially allocated colormap is saved here */
+  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
+  int sv_actual;		/* number of entries in use */
+
+  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
+  /* colorindex[i][j] = index of color closest to pixel value j in component i,
+   * premultiplied as described above.  Since colormap indexes must fit into
+   * JSAMPLEs, the entries of this array will too.
+   */
+  boolean is_padded;		/* is the colorindex padded for odither? */
+
+  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
+
+  /* Variables for ordered dithering */
+  int row_index;		/* cur row's vertical index in dither matrix */
+  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Policy-making subroutines for create_colormap and create_colorindex.
+ * These routines determine the colormap to be used.  The rest of the module
+ * only assumes that the colormap is orthogonal.
+ *
+ *  * select_ncolors decides how to divvy up the available colors
+ *    among the components.
+ *  * output_value defines the set of representative values for a component.
+ *  * largest_input_value defines the mapping from input values to
+ *    representative values for a component.
+ * Note that the latter two routines may impose different policies for
+ * different components, though this is not currently done.
+ */
+
+
+LOCAL(int)
+select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
+/* Determine allocation of desired colors to components, */
+/* and fill in Ncolors[] array to indicate choice. */
+/* Return value is total number of colors (product of Ncolors[] values). */
+{
+  int nc = cinfo->out_color_components; /* number of color components */
+  int max_colors = cinfo->desired_number_of_colors;
+  int total_colors, iroot, i, j;
+  boolean changed;
+  long temp;
+  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
+
+  /* We can allocate at least the nc'th root of max_colors per component. */
+  /* Compute floor(nc'th root of max_colors). */
+  iroot = 1;
+  do {
+    iroot++;
+    temp = iroot;		/* set temp = iroot ** nc */
+    for (i = 1; i < nc; i++)
+      temp *= iroot;
+  } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
+  iroot--;			/* now iroot = floor(root) */
+
+  /* Must have at least 2 color values per component */
+  if (iroot < 2)
+    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp);
+
+  /* Initialize to iroot color values for each component */
+  total_colors = 1;
+  for (i = 0; i < nc; i++) {
+    Ncolors[i] = iroot;
+    total_colors *= iroot;
+  }
+  /* We may be able to increment the count for one or more components without
+   * exceeding max_colors, though we know not all can be incremented.
+   * Sometimes, the first component can be incremented more than once!
+   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
+   * In RGB colorspace, try to increment G first, then R, then B.
+   */
+  do {
+    changed = FALSE;
+    for (i = 0; i < nc; i++) {
+      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
+      /* calculate new total_colors if Ncolors[j] is incremented */
+      temp = total_colors / Ncolors[j];
+      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
+      if (temp > (long) max_colors)
+	break;			/* won't fit, done with this pass */
+      Ncolors[j]++;		/* OK, apply the increment */
+      total_colors = (int) temp;
+      changed = TRUE;
+    }
+  } while (changed);
+
+  return total_colors;
+}
+
+
+LOCAL(int)
+output_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return j'th output value, where j will range from 0 to maxj */
+/* The output values must fall in 0..MAXJSAMPLE in increasing order */
+{
+  /* We always provide values 0 and MAXJSAMPLE for each component;
+   * any additional values are equally spaced between these limits.
+   * (Forcing the upper and lower values to the limits ensures that
+   * dithering can't produce a color outside the selected gamut.)
+   */
+  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
+}
+
+
+LOCAL(int)
+largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return largest input value that should map to j'th output value */
+/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
+{
+  /* Breakpoints are halfway between values returned by output_value */
+  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
+}
+
+
+/*
+ * Create the colormap.
+ */
+
+LOCAL(void)
+create_colormap (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colormap;		/* Created colormap */
+  int total_colors;		/* Number of distinct output colors */
+  int i,j,k, nci, blksize, blkdist, ptr, val;
+
+  /* Select number of colors for each component */
+  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
+
+  /* Report selected color counts */
+  if (cinfo->out_color_components == 3)
+    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
+	     total_colors, cquantize->Ncolors[0],
+	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
+  else
+    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
+
+  /* Allocate and fill in the colormap. */
+  /* The colors are ordered in the map in standard row-major order, */
+  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
+
+  colormap = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  /* blkdist is distance between groups of identical entries for a component */
+  blkdist = total_colors;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colormap entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blkdist / nci;
+    for (j = 0; j < nci; j++) {
+      /* Compute j'th output value (out of nci) for component */
+      val = output_value(cinfo, i, j, nci-1);
+      /* Fill in all colormap entries that have this value of this component */
+      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
+	/* fill in blksize entries beginning at ptr */
+	for (k = 0; k < blksize; k++)
+	  colormap[i][ptr+k] = (JSAMPLE) val;
+      }
+    }
+    blkdist = blksize;		/* blksize of this color is blkdist of next */
+  }
+
+  /* Save the colormap in private storage,
+   * where it will survive color quantization mode changes.
+   */
+  cquantize->sv_colormap = colormap;
+  cquantize->sv_actual = total_colors;
+}
+
+
+/*
+ * Create the color index table.
+ */
+
+LOCAL(void)
+create_colorindex (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPROW indexptr;
+  int i,j,k, nci, blksize, val, pad;
+
+  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
+   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
+   * This is not necessary in the other dithering modes.  However, we
+   * flag whether it was done in case user changes dithering mode.
+   */
+  if (cinfo->dither_mode == JDITHER_ORDERED) {
+    pad = MAXJSAMPLE*2;
+    cquantize->is_padded = TRUE;
+  } else {
+    pad = 0;
+    cquantize->is_padded = FALSE;
+  }
+
+  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) (MAXJSAMPLE+1 + pad),
+     (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  blksize = cquantize->sv_actual;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colorindex entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blksize / nci;
+
+    /* adjust colorindex pointers to provide padding at negative indexes. */
+    if (pad)
+      cquantize->colorindex[i] += MAXJSAMPLE;
+
+    /* in loop, val = index of current output value, */
+    /* and k = largest j that maps to current val */
+    indexptr = cquantize->colorindex[i];
+    val = 0;
+    k = largest_input_value(cinfo, i, 0, nci-1);
+    for (j = 0; j <= MAXJSAMPLE; j++) {
+      while (j > k)		/* advance val if past boundary */
+	k = largest_input_value(cinfo, i, ++val, nci-1);
+      /* premultiply so that no multiplication needed in main processing */
+      indexptr[j] = (JSAMPLE) (val * blksize);
+    }
+    /* Pad at both ends if necessary */
+    if (pad)
+      for (j = 1; j <= MAXJSAMPLE; j++) {
+	indexptr[-j] = indexptr[0];
+	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
+      }
+  }
+}
+
+
+/*
+ * Create an ordered-dither array for a component having ncolors
+ * distinct output values.
+ */
+
+LOCAL(ODITHER_MATRIX_PTR)
+make_odither_array (j_decompress_ptr cinfo, int ncolors)
+{
+  ODITHER_MATRIX_PTR odither;
+  int j,k;
+  INT32 num,den;
+
+  odither = (ODITHER_MATRIX_PTR)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(ODITHER_MATRIX));
+  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
+   * Hence the dither value for the matrix cell with fill order f
+   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
+   * On 16-bit-int machine, be careful to avoid overflow.
+   */
+  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
+  for (j = 0; j < ODITHER_SIZE; j++) {
+    for (k = 0; k < ODITHER_SIZE; k++) {
+      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
+	    * MAXJSAMPLE;
+      /* Ensure round towards zero despite C's lack of consistency
+       * about rounding negative values in integer division...
+       */
+      odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den);
+    }
+  }
+  return odither;
+}
+
+
+/*
+ * Create the ordered-dither tables.
+ * Components having the same number of representative colors may 
+ * share a dither table.
+ */
+
+LOCAL(void)
+create_odither_tables (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  ODITHER_MATRIX_PTR odither;
+  int i, j, nci;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    odither = NULL;		/* search for matching prior component */
+    for (j = 0; j < i; j++) {
+      if (nci == cquantize->Ncolors[j]) {
+	odither = cquantize->odither[j];
+	break;
+      }
+    }
+    if (odither == NULL)	/* need a new table? */
+      odither = make_odither_array(cinfo, nci);
+    cquantize->odither[i] = odither;
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		JSAMPARRAY output_buf, int num_rows)
+/* General case, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colorindex = cquantize->colorindex;
+  register int pixcode, ci;
+  register JSAMPROW ptrin, ptrout;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  register int nc = cinfo->out_color_components;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode = 0;
+      for (ci = 0; ci < nc; ci++) {
+	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+      }
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		 JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW ptrin, ptrout;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		     JSAMPARRAY output_buf, int num_rows)
+/* General case, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  int * dither;			/* points to active row of dither matrix */
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int nc = cinfo->out_color_components;
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    FMEMZERO((void FAR *) output_buf[row],
+	     (size_t) (width * SIZEOF(JSAMPLE)));
+    row_index = cquantize->row_index;
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      colorindex_ci = cquantize->colorindex[ci];
+      dither = cquantize->odither[ci][row_index];
+      col_index = 0;
+
+      for (col = width; col > 0; col--) {
+	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
+	 * select output value, accumulate into output code for this pixel.
+	 * Range-limiting need not be done explicitly, as we have extended
+	 * the colorindex table to produce the right answers for out-of-range
+	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
+	 * required amount of padding.
+	 */
+	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
+	input_ptr += nc;
+	output_ptr++;
+	col_index = (col_index + 1) & ODITHER_MASK;
+      }
+    }
+    /* Advance row index for next row */
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		      JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int * dither0;		/* points to active row of dither matrix */
+  int * dither1;
+  int * dither2;
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    row_index = cquantize->row_index;
+    input_ptr = input_buf[row];
+    output_ptr = output_buf[row];
+    dither0 = cquantize->odither[0][row_index];
+    dither1 = cquantize->odither[1][row_index];
+    dither2 = cquantize->odither[2][row_index];
+    col_index = 0;
+
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
+					dither0[col_index]]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
+					dither1[col_index]]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
+					dither2[col_index]]);
+      *output_ptr++ = (JSAMPLE) pixcode;
+      col_index = (col_index + 1) & ODITHER_MASK;
+    }
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		    JSAMPARRAY output_buf, int num_rows)
+/* General case, with Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register LOCFSERROR cur;	/* current error or pixel value */
+  LOCFSERROR belowerr;		/* error for pixel below cur */
+  LOCFSERROR bpreverr;		/* error for below/prev col */
+  LOCFSERROR bnexterr;		/* error for below/next col */
+  LOCFSERROR delta;
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  JSAMPROW colormap_ci;
+  int pixcode;
+  int nc = cinfo->out_color_components;
+  int dir;			/* 1 for left-to-right, -1 for right-to-left */
+  int dirnc;			/* dir * nc */
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    FMEMZERO((void FAR *) output_buf[row],
+	     (size_t) (width * SIZEOF(JSAMPLE)));
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      if (cquantize->on_odd_row) {
+	/* work right to left in this row */
+	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
+	output_ptr += width-1;
+	dir = -1;
+	dirnc = -nc;
+	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
+      } else {
+	/* work left to right in this row */
+	dir = 1;
+	dirnc = nc;
+	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
+      }
+      colorindex_ci = cquantize->colorindex[ci];
+      colormap_ci = cquantize->sv_colormap[ci];
+      /* Preset error values: no error propagated to first pixel from left */
+      cur = 0;
+      /* and no error propagated to row below yet */
+      belowerr = bpreverr = 0;
+
+      for (col = width; col > 0; col--) {
+	/* cur holds the error propagated from the previous pixel on the
+	 * current line.  Add the error propagated from the previous line
+	 * to form the complete error correction term for this pixel, and
+	 * round the error term (which is expressed * 16) to an integer.
+	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+	 * for either sign of the error value.
+	 * Note: errorptr points to *previous* column's array entry.
+	 */
+	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
+	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+	 * The maximum error is +- MAXJSAMPLE; this sets the required size
+	 * of the range_limit array.
+	 */
+	cur += GETJSAMPLE(*input_ptr);
+	cur = GETJSAMPLE(range_limit[cur]);
+	/* Select output value, accumulate into output code for this pixel */
+	pixcode = GETJSAMPLE(colorindex_ci[cur]);
+	*output_ptr += (JSAMPLE) pixcode;
+	/* Compute actual representation error at this pixel */
+	/* Note: we can do this even though we don't have the final */
+	/* pixel code, because the colormap is orthogonal. */
+	cur -= GETJSAMPLE(colormap_ci[pixcode]);
+	/* Compute error fractions to be propagated to adjacent pixels.
+	 * Add these into the running sums, and simultaneously shift the
+	 * next-line error sums left by 1 column.
+	 */
+	bnexterr = cur;
+	delta = cur * 2;
+	cur += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr + cur);
+	cur += delta;		/* form error * 5 */
+	bpreverr = belowerr + cur;
+	belowerr = bnexterr;
+	cur += delta;		/* form error * 7 */
+	/* At this point cur contains the 7/16 error value to be propagated
+	 * to the next pixel on the current line, and all the errors for the
+	 * next line have been shifted over. We are therefore ready to move on.
+	 */
+	input_ptr += dirnc;	/* advance input ptr to next column */
+	output_ptr += dir;	/* advance output ptr to next column */
+	errorptr += dir;	/* advance errorptr to current column */
+      }
+      /* Post-loop cleanup: we must unload the final error value into the
+       * final fserrors[] entry.  Note we need not unload belowerr because
+       * it is for the dummy column before or after the actual array.
+       */
+      errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */
+    }
+    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
+  }
+}
+
+
+/*
+ * Allocate workspace for Floyd-Steinberg errors.
+ */
+
+LOCAL(void)
+alloc_fs_workspace (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    cquantize->fserrors[i] = (FSERRPTR)
+      (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+  }
+}
+
+
+/*
+ * Initialize for one-pass color quantization.
+ */
+
+METHODDEF(void)
+start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  /* Install my colormap. */
+  cinfo->colormap = cquantize->sv_colormap;
+  cinfo->actual_number_of_colors = cquantize->sv_actual;
+
+  /* Initialize for desired dithering mode. */
+  switch (cinfo->dither_mode) {
+  case JDITHER_NONE:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = color_quantize3;
+    else
+      cquantize->pub.color_quantize = color_quantize;
+    break;
+  case JDITHER_ORDERED:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = quantize3_ord_dither;
+    else
+      cquantize->pub.color_quantize = quantize_ord_dither;
+    cquantize->row_index = 0;	/* initialize state for ordered dither */
+    /* If user changed to ordered dither from another mode,
+     * we must recreate the color index table with padding.
+     * This will cost extra space, but probably isn't very likely.
+     */
+    if (! cquantize->is_padded)
+      create_colorindex(cinfo);
+    /* Create ordered-dither tables if we didn't already. */
+    if (cquantize->odither[0] == NULL)
+      create_odither_tables(cinfo);
+    break;
+  case JDITHER_FS:
+    cquantize->pub.color_quantize = quantize_fs_dither;
+    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
+    /* Allocate Floyd-Steinberg workspace if didn't already. */
+    if (cquantize->fserrors[0] == NULL)
+      alloc_fs_workspace(cinfo);
+    /* Initialize the propagated errors to zero. */
+    arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+    for (i = 0; i < cinfo->out_color_components; i++)
+      FMEMZERO((void FAR *) cquantize->fserrors[i], arraysize);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+}
+
+
+/*
+ * Finish up at the end of the pass.
+ */
+
+METHODDEF(void)
+finish_pass_1_quant (j_decompress_ptr cinfo)
+{
+  /* no work in 1-pass case */
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ * Shouldn't get to this module!
+ */
+
+METHODDEF(void)
+new_color_map_1_quant (j_decompress_ptr cinfo)
+{
+  ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+
+/*
+ * Module initialization routine for 1-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_1pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_1_quant;
+  cquantize->pub.finish_pass = finish_pass_1_quant;
+  cquantize->pub.new_color_map = new_color_map_1_quant;
+  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
+  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
+
+  /* Make sure my internal arrays won't overflow */
+  if (cinfo->out_color_components > MAX_Q_COMPS)
+    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
+  /* Make sure colormap indexes can be represented by JSAMPLEs */
+  if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1))
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1);
+
+  /* Create the colormap and color index table. */
+  create_colormap(cinfo);
+  create_colorindex(cinfo);
+
+  /* Allocate Floyd-Steinberg workspace now if requested.
+   * We do this now since it is FAR storage and may affect the memory
+   * manager's space calculations.  If the user changes to FS dither
+   * mode in a later pass, we will allocate the space then, and will
+   * possibly overrun the max_memory_to_use setting.
+   */
+  if (cinfo->dither_mode == JDITHER_FS)
+    alloc_fs_workspace(cinfo);
+}
+
+#endif /* QUANT_1PASS_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jquant2.c b/plugins/AdvaImg/src/LibJPEG/jquant2.c
index 38fc2af7a5..f7e351f2ac 100644
--- a/plugins/AdvaImg/src/LibJPEG/jquant2.c
+++ b/plugins/AdvaImg/src/LibJPEG/jquant2.c
@@ -1,1311 +1,1311 @@
-/*
- * jquant2.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains 2-pass color quantization (color mapping) routines.
- * These routines provide selection of a custom color map for an image,
- * followed by mapping of the image to that color map, with optional
- * Floyd-Steinberg dithering.
- * It is also possible to use just the second pass to map to an arbitrary
- * externally-given color map.
- *
- * Note: ordered dithering is not supported, since there isn't any fast
- * way to compute intercolor distances; it's unclear that ordered dither's
- * fundamental assumptions even hold with an irregularly spaced color map.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-#ifdef QUANT_2PASS_SUPPORTED
-
-
-/*
- * This module implements the well-known Heckbert paradigm for color
- * quantization.  Most of the ideas used here can be traced back to
- * Heckbert's seminal paper
- *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
- *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
- *
- * In the first pass over the image, we accumulate a histogram showing the
- * usage count of each possible color.  To keep the histogram to a reasonable
- * size, we reduce the precision of the input; typical practice is to retain
- * 5 or 6 bits per color, so that 8 or 4 different input values are counted
- * in the same histogram cell.
- *
- * Next, the color-selection step begins with a box representing the whole
- * color space, and repeatedly splits the "largest" remaining box until we
- * have as many boxes as desired colors.  Then the mean color in each
- * remaining box becomes one of the possible output colors.
- * 
- * The second pass over the image maps each input pixel to the closest output
- * color (optionally after applying a Floyd-Steinberg dithering correction).
- * This mapping is logically trivial, but making it go fast enough requires
- * considerable care.
- *
- * Heckbert-style quantizers vary a good deal in their policies for choosing
- * the "largest" box and deciding where to cut it.  The particular policies
- * used here have proved out well in experimental comparisons, but better ones
- * may yet be found.
- *
- * In earlier versions of the IJG code, this module quantized in YCbCr color
- * space, processing the raw upsampled data without a color conversion step.
- * This allowed the color conversion math to be done only once per colormap
- * entry, not once per pixel.  However, that optimization precluded other
- * useful optimizations (such as merging color conversion with upsampling)
- * and it also interfered with desired capabilities such as quantizing to an
- * externally-supplied colormap.  We have therefore abandoned that approach.
- * The present code works in the post-conversion color space, typically RGB.
- *
- * To improve the visual quality of the results, we actually work in scaled
- * RGB space, giving G distances more weight than R, and R in turn more than
- * B.  To do everything in integer math, we must use integer scale factors.
- * The 2/3/1 scale factors used here correspond loosely to the relative
- * weights of the colors in the NTSC grayscale equation.
- * If you want to use this code to quantize a non-RGB color space, you'll
- * probably need to change these scale factors.
- */
-
-#define R_SCALE 2		/* scale R distances by this much */
-#define G_SCALE 3		/* scale G distances by this much */
-#define B_SCALE 1		/* and B by this much */
-
-/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
- * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
- * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
- * you'll get compile errors until you extend this logic.  In that case
- * you'll probably want to tweak the histogram sizes too.
- */
-
-#if RGB_RED == 0
-#define C0_SCALE R_SCALE
-#endif
-#if RGB_BLUE == 0
-#define C0_SCALE B_SCALE
-#endif
-#if RGB_GREEN == 1
-#define C1_SCALE G_SCALE
-#endif
-#if RGB_RED == 2
-#define C2_SCALE R_SCALE
-#endif
-#if RGB_BLUE == 2
-#define C2_SCALE B_SCALE
-#endif
-
-
-/*
- * First we have the histogram data structure and routines for creating it.
- *
- * The number of bits of precision can be adjusted by changing these symbols.
- * We recommend keeping 6 bits for G and 5 each for R and B.
- * If you have plenty of memory and cycles, 6 bits all around gives marginally
- * better results; if you are short of memory, 5 bits all around will save
- * some space but degrade the results.
- * To maintain a fully accurate histogram, we'd need to allocate a "long"
- * (preferably unsigned long) for each cell.  In practice this is overkill;
- * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
- * and clamping those that do overflow to the maximum value will give close-
- * enough results.  This reduces the recommended histogram size from 256Kb
- * to 128Kb, which is a useful savings on PC-class machines.
- * (In the second pass the histogram space is re-used for pixel mapping data;
- * in that capacity, each cell must be able to store zero to the number of
- * desired colors.  16 bits/cell is plenty for that too.)
- * Since the JPEG code is intended to run in small memory model on 80x86
- * machines, we can't just allocate the histogram in one chunk.  Instead
- * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
- * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
- * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
- * on 80x86 machines, the pointer row is in near memory but the actual
- * arrays are in far memory (same arrangement as we use for image arrays).
- */
-
-#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
-
-/* These will do the right thing for either R,G,B or B,G,R color order,
- * but you may not like the results for other color orders.
- */
-#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
-#define HIST_C1_BITS  6		/* bits of precision in G histogram */
-#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
-
-/* Number of elements along histogram axes. */
-#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
-#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
-#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
-
-/* These are the amounts to shift an input value to get a histogram index. */
-#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
-#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
-#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
-
-
-typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
-
-typedef histcell FAR * histptr;	/* for pointers to histogram cells */
-
-typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
-typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
-typedef hist2d * hist3d;	/* type for top-level pointer */
-
-
-/* Declarations for Floyd-Steinberg dithering.
- *
- * Errors are accumulated into the array fserrors[], at a resolution of
- * 1/16th of a pixel count.  The error at a given pixel is propagated
- * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
- * We work left-to-right on even rows, right-to-left on odd rows.
- *
- * We can get away with a single array (holding one row's worth of errors)
- * by using it to store the current row's errors at pixel columns not yet
- * processed, but the next row's errors at columns already processed.  We
- * need only a few extra variables to hold the errors immediately around the
- * current column.  (If we are lucky, those variables are in registers, but
- * even if not, they're probably cheaper to access than array elements are.)
- *
- * The fserrors[] array has (#columns + 2) entries; the extra entry at
- * each end saves us from special-casing the first and last pixels.
- * Each entry is three values long, one value for each color component.
- *
- * Note: on a wide image, we might not have enough room in a PC's near data
- * segment to hold the error array; so it is allocated with alloc_large.
- */
-
-#if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
-#else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
-#endif
-
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_color_quantizer pub; /* public fields */
-
-  /* Space for the eventually created colormap is stashed here */
-  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
-  int desired;			/* desired # of colors = size of colormap */
-
-  /* Variables for accumulating image statistics */
-  hist3d histogram;		/* pointer to the histogram */
-
-  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
-
-  /* Variables for Floyd-Steinberg dithering */
-  FSERRPTR fserrors;		/* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
-  int * error_limiter;		/* table for clamping the applied error */
-} my_cquantizer;
-
-typedef my_cquantizer * my_cquantize_ptr;
-
-
-/*
- * Prescan some rows of pixels.
- * In this module the prescan simply updates the histogram, which has been
- * initialized to zeroes by start_pass.
- * An output_buf parameter is required by the method signature, but no data
- * is actually output (in fact the buffer controller is probably passing a
- * NULL pointer).
- */
-
-METHODDEF(void)
-prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register JSAMPROW ptr;
-  register histptr histp;
-  register hist3d histogram = cquantize->histogram;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    ptr = input_buf[row];
-    for (col = width; col > 0; col--) {
-      /* get pixel value and index into the histogram */
-      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
-			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
-			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
-      /* increment, check for overflow and undo increment if so. */
-      if (++(*histp) <= 0)
-	(*histp)--;
-      ptr += 3;
-    }
-  }
-}
-
-
-/*
- * Next we have the really interesting routines: selection of a colormap
- * given the completed histogram.
- * These routines work with a list of "boxes", each representing a rectangular
- * subset of the input color space (to histogram precision).
- */
-
-typedef struct {
-  /* The bounds of the box (inclusive); expressed as histogram indexes */
-  int c0min, c0max;
-  int c1min, c1max;
-  int c2min, c2max;
-  /* The volume (actually 2-norm) of the box */
-  INT32 volume;
-  /* The number of nonzero histogram cells within this box */
-  long colorcount;
-} box;
-
-typedef box * boxptr;
-
-
-LOCAL(boxptr)
-find_biggest_color_pop (boxptr boxlist, int numboxes)
-/* Find the splittable box with the largest color population */
-/* Returns NULL if no splittable boxes remain */
-{
-  register boxptr boxp;
-  register int i;
-  register long maxc = 0;
-  boxptr which = NULL;
-  
-  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
-    if (boxp->colorcount > maxc && boxp->volume > 0) {
-      which = boxp;
-      maxc = boxp->colorcount;
-    }
-  }
-  return which;
-}
-
-
-LOCAL(boxptr)
-find_biggest_volume (boxptr boxlist, int numboxes)
-/* Find the splittable box with the largest (scaled) volume */
-/* Returns NULL if no splittable boxes remain */
-{
-  register boxptr boxp;
-  register int i;
-  register INT32 maxv = 0;
-  boxptr which = NULL;
-  
-  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
-    if (boxp->volume > maxv) {
-      which = boxp;
-      maxv = boxp->volume;
-    }
-  }
-  return which;
-}
-
-
-LOCAL(void)
-update_box (j_decompress_ptr cinfo, boxptr boxp)
-/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
-/* and recompute its volume and population */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  histptr histp;
-  int c0,c1,c2;
-  int c0min,c0max,c1min,c1max,c2min,c2max;
-  INT32 dist0,dist1,dist2;
-  long ccount;
-  
-  c0min = boxp->c0min;  c0max = boxp->c0max;
-  c1min = boxp->c1min;  c1max = boxp->c1max;
-  c2min = boxp->c2min;  c2max = boxp->c2max;
-  
-  if (c0max > c0min)
-    for (c0 = c0min; c0 <= c0max; c0++)
-      for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0min = c0min = c0;
-	    goto have_c0min;
-	  }
-      }
- have_c0min:
-  if (c0max > c0min)
-    for (c0 = c0max; c0 >= c0min; c0--)
-      for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0max = c0max = c0;
-	    goto have_c0max;
-	  }
-      }
- have_c0max:
-  if (c1max > c1min)
-    for (c1 = c1min; c1 <= c1max; c1++)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1min = c1min = c1;
-	    goto have_c1min;
-	  }
-      }
- have_c1min:
-  if (c1max > c1min)
-    for (c1 = c1max; c1 >= c1min; c1--)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1max = c1max = c1;
-	    goto have_c1max;
-	  }
-      }
- have_c1max:
-  if (c2max > c2min)
-    for (c2 = c2min; c2 <= c2max; c2++)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2min = c2min = c2;
-	    goto have_c2min;
-	  }
-      }
- have_c2min:
-  if (c2max > c2min)
-    for (c2 = c2max; c2 >= c2min; c2--)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2max = c2max = c2;
-	    goto have_c2max;
-	  }
-      }
- have_c2max:
-
-  /* Update box volume.
-   * We use 2-norm rather than real volume here; this biases the method
-   * against making long narrow boxes, and it has the side benefit that
-   * a box is splittable iff norm > 0.
-   * Since the differences are expressed in histogram-cell units,
-   * we have to shift back to JSAMPLE units to get consistent distances;
-   * after which, we scale according to the selected distance scale factors.
-   */
-  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
-  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
-  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
-  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
-  
-  /* Now scan remaining volume of box and compute population */
-  ccount = 0;
-  for (c0 = c0min; c0 <= c0max; c0++)
-    for (c1 = c1min; c1 <= c1max; c1++) {
-      histp = & histogram[c0][c1][c2min];
-      for (c2 = c2min; c2 <= c2max; c2++, histp++)
-	if (*histp != 0) {
-	  ccount++;
-	}
-    }
-  boxp->colorcount = ccount;
-}
-
-
-LOCAL(int)
-median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
-	    int desired_colors)
-/* Repeatedly select and split the largest box until we have enough boxes */
-{
-  int n,lb;
-  int c0,c1,c2,cmax;
-  register boxptr b1,b2;
-
-  while (numboxes < desired_colors) {
-    /* Select box to split.
-     * Current algorithm: by population for first half, then by volume.
-     */
-    if (numboxes*2 <= desired_colors) {
-      b1 = find_biggest_color_pop(boxlist, numboxes);
-    } else {
-      b1 = find_biggest_volume(boxlist, numboxes);
-    }
-    if (b1 == NULL)		/* no splittable boxes left! */
-      break;
-    b2 = &boxlist[numboxes];	/* where new box will go */
-    /* Copy the color bounds to the new box. */
-    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
-    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
-    /* Choose which axis to split the box on.
-     * Current algorithm: longest scaled axis.
-     * See notes in update_box about scaling distances.
-     */
-    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
-    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
-    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
-    /* We want to break any ties in favor of green, then red, blue last.
-     * This code does the right thing for R,G,B or B,G,R color orders only.
-     */
-#if RGB_RED == 0
-    cmax = c1; n = 1;
-    if (c0 > cmax) { cmax = c0; n = 0; }
-    if (c2 > cmax) { n = 2; }
-#else
-    cmax = c1; n = 1;
-    if (c2 > cmax) { cmax = c2; n = 2; }
-    if (c0 > cmax) { n = 0; }
-#endif
-    /* Choose split point along selected axis, and update box bounds.
-     * Current algorithm: split at halfway point.
-     * (Since the box has been shrunk to minimum volume,
-     * any split will produce two nonempty subboxes.)
-     * Note that lb value is max for lower box, so must be < old max.
-     */
-    switch (n) {
-    case 0:
-      lb = (b1->c0max + b1->c0min) / 2;
-      b1->c0max = lb;
-      b2->c0min = lb+1;
-      break;
-    case 1:
-      lb = (b1->c1max + b1->c1min) / 2;
-      b1->c1max = lb;
-      b2->c1min = lb+1;
-      break;
-    case 2:
-      lb = (b1->c2max + b1->c2min) / 2;
-      b1->c2max = lb;
-      b2->c2min = lb+1;
-      break;
-    }
-    /* Update stats for boxes */
-    update_box(cinfo, b1);
-    update_box(cinfo, b2);
-    numboxes++;
-  }
-  return numboxes;
-}
-
-
-LOCAL(void)
-compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
-/* Compute representative color for a box, put it in colormap[icolor] */
-{
-  /* Current algorithm: mean weighted by pixels (not colors) */
-  /* Note it is important to get the rounding correct! */
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  histptr histp;
-  int c0,c1,c2;
-  int c0min,c0max,c1min,c1max,c2min,c2max;
-  long count;
-  long total = 0;
-  long c0total = 0;
-  long c1total = 0;
-  long c2total = 0;
-  
-  c0min = boxp->c0min;  c0max = boxp->c0max;
-  c1min = boxp->c1min;  c1max = boxp->c1max;
-  c2min = boxp->c2min;  c2max = boxp->c2max;
-  
-  for (c0 = c0min; c0 <= c0max; c0++)
-    for (c1 = c1min; c1 <= c1max; c1++) {
-      histp = & histogram[c0][c1][c2min];
-      for (c2 = c2min; c2 <= c2max; c2++) {
-	if ((count = *histp++) != 0) {
-	  total += count;
-	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
-	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
-	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
-	}
-      }
-    }
-  
-  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
-  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
-  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
-}
-
-
-LOCAL(void)
-select_colors (j_decompress_ptr cinfo, int desired_colors)
-/* Master routine for color selection */
-{
-  boxptr boxlist;
-  int numboxes;
-  int i;
-
-  /* Allocate workspace for box list */
-  boxlist = (boxptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
-  /* Initialize one box containing whole space */
-  numboxes = 1;
-  boxlist[0].c0min = 0;
-  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
-  boxlist[0].c1min = 0;
-  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
-  boxlist[0].c2min = 0;
-  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
-  /* Shrink it to actually-used volume and set its statistics */
-  update_box(cinfo, & boxlist[0]);
-  /* Perform median-cut to produce final box list */
-  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
-  /* Compute the representative color for each box, fill colormap */
-  for (i = 0; i < numboxes; i++)
-    compute_color(cinfo, & boxlist[i], i);
-  cinfo->actual_number_of_colors = numboxes;
-  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
-}
-
-
-/*
- * These routines are concerned with the time-critical task of mapping input
- * colors to the nearest color in the selected colormap.
- *
- * We re-use the histogram space as an "inverse color map", essentially a
- * cache for the results of nearest-color searches.  All colors within a
- * histogram cell will be mapped to the same colormap entry, namely the one
- * closest to the cell's center.  This may not be quite the closest entry to
- * the actual input color, but it's almost as good.  A zero in the cache
- * indicates we haven't found the nearest color for that cell yet; the array
- * is cleared to zeroes before starting the mapping pass.  When we find the
- * nearest color for a cell, its colormap index plus one is recorded in the
- * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
- * when they need to use an unfilled entry in the cache.
- *
- * Our method of efficiently finding nearest colors is based on the "locally
- * sorted search" idea described by Heckbert and on the incremental distance
- * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
- * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
- * the distances from a given colormap entry to each cell of the histogram can
- * be computed quickly using an incremental method: the differences between
- * distances to adjacent cells themselves differ by a constant.  This allows a
- * fairly fast implementation of the "brute force" approach of computing the
- * distance from every colormap entry to every histogram cell.  Unfortunately,
- * it needs a work array to hold the best-distance-so-far for each histogram
- * cell (because the inner loop has to be over cells, not colormap entries).
- * The work array elements have to be INT32s, so the work array would need
- * 256Kb at our recommended precision.  This is not feasible in DOS machines.
- *
- * To get around these problems, we apply Thomas' method to compute the
- * nearest colors for only the cells within a small subbox of the histogram.
- * The work array need be only as big as the subbox, so the memory usage
- * problem is solved.  Furthermore, we need not fill subboxes that are never
- * referenced in pass2; many images use only part of the color gamut, so a
- * fair amount of work is saved.  An additional advantage of this
- * approach is that we can apply Heckbert's locality criterion to quickly
- * eliminate colormap entries that are far away from the subbox; typically
- * three-fourths of the colormap entries are rejected by Heckbert's criterion,
- * and we need not compute their distances to individual cells in the subbox.
- * The speed of this approach is heavily influenced by the subbox size: too
- * small means too much overhead, too big loses because Heckbert's criterion
- * can't eliminate as many colormap entries.  Empirically the best subbox
- * size seems to be about 1/512th of the histogram (1/8th in each direction).
- *
- * Thomas' article also describes a refined method which is asymptotically
- * faster than the brute-force method, but it is also far more complex and
- * cannot efficiently be applied to small subboxes.  It is therefore not
- * useful for programs intended to be portable to DOS machines.  On machines
- * with plenty of memory, filling the whole histogram in one shot with Thomas'
- * refined method might be faster than the present code --- but then again,
- * it might not be any faster, and it's certainly more complicated.
- */
-
-
-/* log2(histogram cells in update box) for each axis; this can be adjusted */
-#define BOX_C0_LOG  (HIST_C0_BITS-3)
-#define BOX_C1_LOG  (HIST_C1_BITS-3)
-#define BOX_C2_LOG  (HIST_C2_BITS-3)
-
-#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
-#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
-#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
-
-#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
-#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
-#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
-
-
-/*
- * The next three routines implement inverse colormap filling.  They could
- * all be folded into one big routine, but splitting them up this way saves
- * some stack space (the mindist[] and bestdist[] arrays need not coexist)
- * and may allow some compilers to produce better code by registerizing more
- * inner-loop variables.
- */
-
-LOCAL(int)
-find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		    JSAMPLE colorlist[])
-/* Locate the colormap entries close enough to an update box to be candidates
- * for the nearest entry to some cell(s) in the update box.  The update box
- * is specified by the center coordinates of its first cell.  The number of
- * candidate colormap entries is returned, and their colormap indexes are
- * placed in colorlist[].
- * This routine uses Heckbert's "locally sorted search" criterion to select
- * the colors that need further consideration.
- */
-{
-  int numcolors = cinfo->actual_number_of_colors;
-  int maxc0, maxc1, maxc2;
-  int centerc0, centerc1, centerc2;
-  int i, x, ncolors;
-  INT32 minmaxdist, min_dist, max_dist, tdist;
-  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
-
-  /* Compute true coordinates of update box's upper corner and center.
-   * Actually we compute the coordinates of the center of the upper-corner
-   * histogram cell, which are the upper bounds of the volume we care about.
-   * Note that since ">>" rounds down, the "center" values may be closer to
-   * min than to max; hence comparisons to them must be "<=", not "<".
-   */
-  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
-  centerc0 = (minc0 + maxc0) >> 1;
-  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
-  centerc1 = (minc1 + maxc1) >> 1;
-  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
-  centerc2 = (minc2 + maxc2) >> 1;
-
-  /* For each color in colormap, find:
-   *  1. its minimum squared-distance to any point in the update box
-   *     (zero if color is within update box);
-   *  2. its maximum squared-distance to any point in the update box.
-   * Both of these can be found by considering only the corners of the box.
-   * We save the minimum distance for each color in mindist[];
-   * only the smallest maximum distance is of interest.
-   */
-  minmaxdist = 0x7FFFFFFFL;
-
-  for (i = 0; i < numcolors; i++) {
-    /* We compute the squared-c0-distance term, then add in the other two. */
-    x = GETJSAMPLE(cinfo->colormap[0][i]);
-    if (x < minc0) {
-      tdist = (x - minc0) * C0_SCALE;
-      min_dist = tdist*tdist;
-      tdist = (x - maxc0) * C0_SCALE;
-      max_dist = tdist*tdist;
-    } else if (x > maxc0) {
-      tdist = (x - maxc0) * C0_SCALE;
-      min_dist = tdist*tdist;
-      tdist = (x - minc0) * C0_SCALE;
-      max_dist = tdist*tdist;
-    } else {
-      /* within cell range so no contribution to min_dist */
-      min_dist = 0;
-      if (x <= centerc0) {
-	tdist = (x - maxc0) * C0_SCALE;
-	max_dist = tdist*tdist;
-      } else {
-	tdist = (x - minc0) * C0_SCALE;
-	max_dist = tdist*tdist;
-      }
-    }
-
-    x = GETJSAMPLE(cinfo->colormap[1][i]);
-    if (x < minc1) {
-      tdist = (x - minc1) * C1_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - maxc1) * C1_SCALE;
-      max_dist += tdist*tdist;
-    } else if (x > maxc1) {
-      tdist = (x - maxc1) * C1_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - minc1) * C1_SCALE;
-      max_dist += tdist*tdist;
-    } else {
-      /* within cell range so no contribution to min_dist */
-      if (x <= centerc1) {
-	tdist = (x - maxc1) * C1_SCALE;
-	max_dist += tdist*tdist;
-      } else {
-	tdist = (x - minc1) * C1_SCALE;
-	max_dist += tdist*tdist;
-      }
-    }
-
-    x = GETJSAMPLE(cinfo->colormap[2][i]);
-    if (x < minc2) {
-      tdist = (x - minc2) * C2_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - maxc2) * C2_SCALE;
-      max_dist += tdist*tdist;
-    } else if (x > maxc2) {
-      tdist = (x - maxc2) * C2_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - minc2) * C2_SCALE;
-      max_dist += tdist*tdist;
-    } else {
-      /* within cell range so no contribution to min_dist */
-      if (x <= centerc2) {
-	tdist = (x - maxc2) * C2_SCALE;
-	max_dist += tdist*tdist;
-      } else {
-	tdist = (x - minc2) * C2_SCALE;
-	max_dist += tdist*tdist;
-      }
-    }
-
-    mindist[i] = min_dist;	/* save away the results */
-    if (max_dist < minmaxdist)
-      minmaxdist = max_dist;
-  }
-
-  /* Now we know that no cell in the update box is more than minmaxdist
-   * away from some colormap entry.  Therefore, only colors that are
-   * within minmaxdist of some part of the box need be considered.
-   */
-  ncolors = 0;
-  for (i = 0; i < numcolors; i++) {
-    if (mindist[i] <= minmaxdist)
-      colorlist[ncolors++] = (JSAMPLE) i;
-  }
-  return ncolors;
-}
-
-
-LOCAL(void)
-find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
-/* Find the closest colormap entry for each cell in the update box,
- * given the list of candidate colors prepared by find_nearby_colors.
- * Return the indexes of the closest entries in the bestcolor[] array.
- * This routine uses Thomas' incremental distance calculation method to
- * find the distance from a colormap entry to successive cells in the box.
- */
-{
-  int ic0, ic1, ic2;
-  int i, icolor;
-  register INT32 * bptr;	/* pointer into bestdist[] array */
-  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
-  INT32 dist0, dist1;		/* initial distance values */
-  register INT32 dist2;		/* current distance in inner loop */
-  INT32 xx0, xx1;		/* distance increments */
-  register INT32 xx2;
-  INT32 inc0, inc1, inc2;	/* initial values for increments */
-  /* This array holds the distance to the nearest-so-far color for each cell */
-  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
-
-  /* Initialize best-distance for each cell of the update box */
-  bptr = bestdist;
-  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
-    *bptr++ = 0x7FFFFFFFL;
-  
-  /* For each color selected by find_nearby_colors,
-   * compute its distance to the center of each cell in the box.
-   * If that's less than best-so-far, update best distance and color number.
-   */
-  
-  /* Nominal steps between cell centers ("x" in Thomas article) */
-#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
-#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
-#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
-  
-  for (i = 0; i < numcolors; i++) {
-    icolor = GETJSAMPLE(colorlist[i]);
-    /* Compute (square of) distance from minc0/c1/c2 to this color */
-    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
-    dist0 = inc0*inc0;
-    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
-    dist0 += inc1*inc1;
-    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
-    dist0 += inc2*inc2;
-    /* Form the initial difference increments */
-    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
-    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
-    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
-    /* Now loop over all cells in box, updating distance per Thomas method */
-    bptr = bestdist;
-    cptr = bestcolor;
-    xx0 = inc0;
-    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
-      dist1 = dist0;
-      xx1 = inc1;
-      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
-	dist2 = dist1;
-	xx2 = inc2;
-	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
-	  if (dist2 < *bptr) {
-	    *bptr = dist2;
-	    *cptr = (JSAMPLE) icolor;
-	  }
-	  dist2 += xx2;
-	  xx2 += 2 * STEP_C2 * STEP_C2;
-	  bptr++;
-	  cptr++;
-	}
-	dist1 += xx1;
-	xx1 += 2 * STEP_C1 * STEP_C1;
-      }
-      dist0 += xx0;
-      xx0 += 2 * STEP_C0 * STEP_C0;
-    }
-  }
-}
-
-
-LOCAL(void)
-fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
-/* Fill the inverse-colormap entries in the update box that contains */
-/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
-/* we can fill as many others as we wish.) */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  int minc0, minc1, minc2;	/* lower left corner of update box */
-  int ic0, ic1, ic2;
-  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
-  register histptr cachep;	/* pointer into main cache array */
-  /* This array lists the candidate colormap indexes. */
-  JSAMPLE colorlist[MAXNUMCOLORS];
-  int numcolors;		/* number of candidate colors */
-  /* This array holds the actually closest colormap index for each cell. */
-  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
-
-  /* Convert cell coordinates to update box ID */
-  c0 >>= BOX_C0_LOG;
-  c1 >>= BOX_C1_LOG;
-  c2 >>= BOX_C2_LOG;
-
-  /* Compute true coordinates of update box's origin corner.
-   * Actually we compute the coordinates of the center of the corner
-   * histogram cell, which are the lower bounds of the volume we care about.
-   */
-  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
-  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
-  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
-  
-  /* Determine which colormap entries are close enough to be candidates
-   * for the nearest entry to some cell in the update box.
-   */
-  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
-
-  /* Determine the actually nearest colors. */
-  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
-		   bestcolor);
-
-  /* Save the best color numbers (plus 1) in the main cache array */
-  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
-  c1 <<= BOX_C1_LOG;
-  c2 <<= BOX_C2_LOG;
-  cptr = bestcolor;
-  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
-    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
-      cachep = & histogram[c0+ic0][c1+ic1][c2];
-      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
-	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
-      }
-    }
-  }
-}
-
-
-/*
- * Map some rows of pixels to the output colormapped representation.
- */
-
-METHODDEF(void)
-pass2_no_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
-/* This version performs no dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  register JSAMPROW inptr, outptr;
-  register histptr cachep;
-  register int c0, c1, c2;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    inptr = input_buf[row];
-    outptr = output_buf[row];
-    for (col = width; col > 0; col--) {
-      /* get pixel value and index into the cache */
-      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
-      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
-      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
-      cachep = & histogram[c0][c1][c2];
-      /* If we have not seen this color before, find nearest colormap entry */
-      /* and update the cache */
-      if (*cachep == 0)
-	fill_inverse_cmap(cinfo, c0,c1,c2);
-      /* Now emit the colormap index for this cell */
-      *outptr++ = (JSAMPLE) (*cachep - 1);
-    }
-  }
-}
-
-
-METHODDEF(void)
-pass2_fs_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
-/* This version performs Floyd-Steinberg dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
-  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
-  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
-  JSAMPROW inptr;		/* => current input pixel */
-  JSAMPROW outptr;		/* => current output pixel */
-  histptr cachep;
-  int dir;			/* +1 or -1 depending on direction */
-  int dir3;			/* 3*dir, for advancing inptr & errorptr */
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
-  int *error_limit = cquantize->error_limiter;
-  JSAMPROW colormap0 = cinfo->colormap[0];
-  JSAMPROW colormap1 = cinfo->colormap[1];
-  JSAMPROW colormap2 = cinfo->colormap[2];
-  SHIFT_TEMPS
-
-  for (row = 0; row < num_rows; row++) {
-    inptr = input_buf[row];
-    outptr = output_buf[row];
-    if (cquantize->on_odd_row) {
-      /* work right to left in this row */
-      inptr += (width-1) * 3;	/* so point to rightmost pixel */
-      outptr += width-1;
-      dir = -1;
-      dir3 = -3;
-      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
-      cquantize->on_odd_row = FALSE; /* flip for next time */
-    } else {
-      /* work left to right in this row */
-      dir = 1;
-      dir3 = 3;
-      errorptr = cquantize->fserrors; /* => entry before first real column */
-      cquantize->on_odd_row = TRUE; /* flip for next time */
-    }
-    /* Preset error values: no error propagated to first pixel from left */
-    cur0 = cur1 = cur2 = 0;
-    /* and no error propagated to row below yet */
-    belowerr0 = belowerr1 = belowerr2 = 0;
-    bpreverr0 = bpreverr1 = bpreverr2 = 0;
-
-    for (col = width; col > 0; col--) {
-      /* curN holds the error propagated from the previous pixel on the
-       * current line.  Add the error propagated from the previous line
-       * to form the complete error correction term for this pixel, and
-       * round the error term (which is expressed * 16) to an integer.
-       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
-       * for either sign of the error value.
-       * Note: errorptr points to *previous* column's array entry.
-       */
-      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
-      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
-      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
-      /* Limit the error using transfer function set by init_error_limit.
-       * See comments with init_error_limit for rationale.
-       */
-      cur0 = error_limit[cur0];
-      cur1 = error_limit[cur1];
-      cur2 = error_limit[cur2];
-      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
-       * this sets the required size of the range_limit array.
-       */
-      cur0 += GETJSAMPLE(inptr[0]);
-      cur1 += GETJSAMPLE(inptr[1]);
-      cur2 += GETJSAMPLE(inptr[2]);
-      cur0 = GETJSAMPLE(range_limit[cur0]);
-      cur1 = GETJSAMPLE(range_limit[cur1]);
-      cur2 = GETJSAMPLE(range_limit[cur2]);
-      /* Index into the cache with adjusted pixel value */
-      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
-      /* If we have not seen this color before, find nearest colormap */
-      /* entry and update the cache */
-      if (*cachep == 0)
-	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
-      /* Now emit the colormap index for this cell */
-      { register int pixcode = *cachep - 1;
-	*outptr = (JSAMPLE) pixcode;
-	/* Compute representation error for this pixel */
-	cur0 -= GETJSAMPLE(colormap0[pixcode]);
-	cur1 -= GETJSAMPLE(colormap1[pixcode]);
-	cur2 -= GETJSAMPLE(colormap2[pixcode]);
-      }
-      /* Compute error fractions to be propagated to adjacent pixels.
-       * Add these into the running sums, and simultaneously shift the
-       * next-line error sums left by 1 column.
-       */
-      { register LOCFSERROR bnexterr, delta;
-
-	bnexterr = cur0;	/* Process component 0 */
-	delta = cur0 * 2;
-	cur0 += delta;		/* form error * 3 */
-	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
-	cur0 += delta;		/* form error * 5 */
-	bpreverr0 = belowerr0 + cur0;
-	belowerr0 = bnexterr;
-	cur0 += delta;		/* form error * 7 */
-	bnexterr = cur1;	/* Process component 1 */
-	delta = cur1 * 2;
-	cur1 += delta;		/* form error * 3 */
-	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
-	cur1 += delta;		/* form error * 5 */
-	bpreverr1 = belowerr1 + cur1;
-	belowerr1 = bnexterr;
-	cur1 += delta;		/* form error * 7 */
-	bnexterr = cur2;	/* Process component 2 */
-	delta = cur2 * 2;
-	cur2 += delta;		/* form error * 3 */
-	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
-	cur2 += delta;		/* form error * 5 */
-	bpreverr2 = belowerr2 + cur2;
-	belowerr2 = bnexterr;
-	cur2 += delta;		/* form error * 7 */
-      }
-      /* At this point curN contains the 7/16 error value to be propagated
-       * to the next pixel on the current line, and all the errors for the
-       * next line have been shifted over.  We are therefore ready to move on.
-       */
-      inptr += dir3;		/* Advance pixel pointers to next column */
-      outptr += dir;
-      errorptr += dir3;		/* advance errorptr to current column */
-    }
-    /* Post-loop cleanup: we must unload the final error values into the
-     * final fserrors[] entry.  Note we need not unload belowerrN because
-     * it is for the dummy column before or after the actual array.
-     */
-    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
-    errorptr[1] = (FSERROR) bpreverr1;
-    errorptr[2] = (FSERROR) bpreverr2;
-  }
-}
-
-
-/*
- * Initialize the error-limiting transfer function (lookup table).
- * The raw F-S error computation can potentially compute error values of up to
- * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
- * much less, otherwise obviously wrong pixels will be created.  (Typical
- * effects include weird fringes at color-area boundaries, isolated bright
- * pixels in a dark area, etc.)  The standard advice for avoiding this problem
- * is to ensure that the "corners" of the color cube are allocated as output
- * colors; then repeated errors in the same direction cannot cause cascading
- * error buildup.  However, that only prevents the error from getting
- * completely out of hand; Aaron Giles reports that error limiting improves
- * the results even with corner colors allocated.
- * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
- * well, but the smoother transfer function used below is even better.  Thanks
- * to Aaron Giles for this idea.
- */
-
-LOCAL(void)
-init_error_limit (j_decompress_ptr cinfo)
-/* Allocate and fill in the error_limiter table */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  int * table;
-  int in, out;
-
-  table = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
-  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
-  cquantize->error_limiter = table;
-
-#define STEPSIZE ((MAXJSAMPLE+1)/16)
-  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
-  out = 0;
-  for (in = 0; in < STEPSIZE; in++, out++) {
-    table[in] = out; table[-in] = -out;
-  }
-  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
-  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
-    table[in] = out; table[-in] = -out;
-  }
-  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
-  for (; in <= MAXJSAMPLE; in++) {
-    table[in] = out; table[-in] = -out;
-  }
-#undef STEPSIZE
-}
-
-
-/*
- * Finish up at the end of each pass.
- */
-
-METHODDEF(void)
-finish_pass1 (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-
-  /* Select the representative colors and fill in cinfo->colormap */
-  cinfo->colormap = cquantize->sv_colormap;
-  select_colors(cinfo, cquantize->desired);
-  /* Force next pass to zero the color index table */
-  cquantize->needs_zeroed = TRUE;
-}
-
-
-METHODDEF(void)
-finish_pass2 (j_decompress_ptr cinfo)
-{
-  /* no work */
-}
-
-
-/*
- * Initialize for each processing pass.
- */
-
-METHODDEF(void)
-start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  int i;
-
-  /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
-  if (cinfo->dither_mode != JDITHER_NONE)
-    cinfo->dither_mode = JDITHER_FS;
-
-  if (is_pre_scan) {
-    /* Set up method pointers */
-    cquantize->pub.color_quantize = prescan_quantize;
-    cquantize->pub.finish_pass = finish_pass1;
-    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
-  } else {
-    /* Set up method pointers */
-    if (cinfo->dither_mode == JDITHER_FS)
-      cquantize->pub.color_quantize = pass2_fs_dither;
-    else
-      cquantize->pub.color_quantize = pass2_no_dither;
-    cquantize->pub.finish_pass = finish_pass2;
-
-    /* Make sure color count is acceptable */
-    i = cinfo->actual_number_of_colors;
-    if (i < 1)
-      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
-    if (i > MAXNUMCOLORS)
-      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
-
-    if (cinfo->dither_mode == JDITHER_FS) {
-      size_t arraysize = (size_t) ((cinfo->output_width + 2) *
-				   (3 * SIZEOF(FSERROR)));
-      /* Allocate Floyd-Steinberg workspace if we didn't already. */
-      if (cquantize->fserrors == NULL)
-	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
-      /* Initialize the propagated errors to zero. */
-      FMEMZERO((void FAR *) cquantize->fserrors, arraysize);
-      /* Make the error-limit table if we didn't already. */
-      if (cquantize->error_limiter == NULL)
-	init_error_limit(cinfo);
-      cquantize->on_odd_row = FALSE;
-    }
-
-  }
-  /* Zero the histogram or inverse color map, if necessary */
-  if (cquantize->needs_zeroed) {
-    for (i = 0; i < HIST_C0_ELEMS; i++) {
-      FMEMZERO((void FAR *) histogram[i],
-	       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
-    }
-    cquantize->needs_zeroed = FALSE;
-  }
-}
-
-
-/*
- * Switch to a new external colormap between output passes.
- */
-
-METHODDEF(void)
-new_color_map_2_quant (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-
-  /* Reset the inverse color map */
-  cquantize->needs_zeroed = TRUE;
-}
-
-
-/*
- * Module initialization routine for 2-pass color quantization.
- */
-
-GLOBAL(void)
-jinit_2pass_quantizer (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize;
-  int i;
-
-  cquantize = (my_cquantize_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_cquantizer));
-  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
-  cquantize->pub.start_pass = start_pass_2_quant;
-  cquantize->pub.new_color_map = new_color_map_2_quant;
-  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
-  cquantize->error_limiter = NULL;
-
-  /* Make sure jdmaster didn't give me a case I can't handle */
-  if (cinfo->out_color_components != 3)
-    ERREXIT(cinfo, JERR_NOTIMPL);
-
-  /* Allocate the histogram/inverse colormap storage */
-  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
-  for (i = 0; i < HIST_C0_ELEMS; i++) {
-    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
-  }
-  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
-
-  /* Allocate storage for the completed colormap, if required.
-   * We do this now since it is FAR storage and may affect
-   * the memory manager's space calculations.
-   */
-  if (cinfo->enable_2pass_quant) {
-    /* Make sure color count is acceptable */
-    int desired = cinfo->desired_number_of_colors;
-    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
-    if (desired < 8)
-      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
-    /* Make sure colormap indexes can be represented by JSAMPLEs */
-    if (desired > MAXNUMCOLORS)
-      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
-    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo,JPOOL_IMAGE, (JDIMENSION) desired, (JDIMENSION) 3);
-    cquantize->desired = desired;
-  } else
-    cquantize->sv_colormap = NULL;
-
-  /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
-  if (cinfo->dither_mode != JDITHER_NONE)
-    cinfo->dither_mode = JDITHER_FS;
-
-  /* Allocate Floyd-Steinberg workspace if necessary.
-   * This isn't really needed until pass 2, but again it is FAR storage.
-   * Although we will cope with a later change in dither_mode,
-   * we do not promise to honor max_memory_to_use if dither_mode changes.
-   */
-  if (cinfo->dither_mode == JDITHER_FS) {
-    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (size_t) ((cinfo->output_width + 2) * (3 * SIZEOF(FSERROR))));
-    /* Might as well create the error-limiting table too. */
-    init_error_limit(cinfo);
-  }
-}
-
-#endif /* QUANT_2PASS_SUPPORTED */
+/*
+ * jquant2.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 2-pass color quantization (color mapping) routines.
+ * These routines provide selection of a custom color map for an image,
+ * followed by mapping of the image to that color map, with optional
+ * Floyd-Steinberg dithering.
+ * It is also possible to use just the second pass to map to an arbitrary
+ * externally-given color map.
+ *
+ * Note: ordered dithering is not supported, since there isn't any fast
+ * way to compute intercolor distances; it's unclear that ordered dither's
+ * fundamental assumptions even hold with an irregularly spaced color map.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+
+/*
+ * This module implements the well-known Heckbert paradigm for color
+ * quantization.  Most of the ideas used here can be traced back to
+ * Heckbert's seminal paper
+ *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
+ *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
+ *
+ * In the first pass over the image, we accumulate a histogram showing the
+ * usage count of each possible color.  To keep the histogram to a reasonable
+ * size, we reduce the precision of the input; typical practice is to retain
+ * 5 or 6 bits per color, so that 8 or 4 different input values are counted
+ * in the same histogram cell.
+ *
+ * Next, the color-selection step begins with a box representing the whole
+ * color space, and repeatedly splits the "largest" remaining box until we
+ * have as many boxes as desired colors.  Then the mean color in each
+ * remaining box becomes one of the possible output colors.
+ * 
+ * The second pass over the image maps each input pixel to the closest output
+ * color (optionally after applying a Floyd-Steinberg dithering correction).
+ * This mapping is logically trivial, but making it go fast enough requires
+ * considerable care.
+ *
+ * Heckbert-style quantizers vary a good deal in their policies for choosing
+ * the "largest" box and deciding where to cut it.  The particular policies
+ * used here have proved out well in experimental comparisons, but better ones
+ * may yet be found.
+ *
+ * In earlier versions of the IJG code, this module quantized in YCbCr color
+ * space, processing the raw upsampled data without a color conversion step.
+ * This allowed the color conversion math to be done only once per colormap
+ * entry, not once per pixel.  However, that optimization precluded other
+ * useful optimizations (such as merging color conversion with upsampling)
+ * and it also interfered with desired capabilities such as quantizing to an
+ * externally-supplied colormap.  We have therefore abandoned that approach.
+ * The present code works in the post-conversion color space, typically RGB.
+ *
+ * To improve the visual quality of the results, we actually work in scaled
+ * RGB space, giving G distances more weight than R, and R in turn more than
+ * B.  To do everything in integer math, we must use integer scale factors.
+ * The 2/3/1 scale factors used here correspond loosely to the relative
+ * weights of the colors in the NTSC grayscale equation.
+ * If you want to use this code to quantize a non-RGB color space, you'll
+ * probably need to change these scale factors.
+ */
+
+#define R_SCALE 2		/* scale R distances by this much */
+#define G_SCALE 3		/* scale G distances by this much */
+#define B_SCALE 1		/* and B by this much */
+
+/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
+ * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
+ * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
+ * you'll get compile errors until you extend this logic.  In that case
+ * you'll probably want to tweak the histogram sizes too.
+ */
+
+#if RGB_RED == 0
+#define C0_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 0
+#define C0_SCALE B_SCALE
+#endif
+#if RGB_GREEN == 1
+#define C1_SCALE G_SCALE
+#endif
+#if RGB_RED == 2
+#define C2_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 2
+#define C2_SCALE B_SCALE
+#endif
+
+
+/*
+ * First we have the histogram data structure and routines for creating it.
+ *
+ * The number of bits of precision can be adjusted by changing these symbols.
+ * We recommend keeping 6 bits for G and 5 each for R and B.
+ * If you have plenty of memory and cycles, 6 bits all around gives marginally
+ * better results; if you are short of memory, 5 bits all around will save
+ * some space but degrade the results.
+ * To maintain a fully accurate histogram, we'd need to allocate a "long"
+ * (preferably unsigned long) for each cell.  In practice this is overkill;
+ * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
+ * and clamping those that do overflow to the maximum value will give close-
+ * enough results.  This reduces the recommended histogram size from 256Kb
+ * to 128Kb, which is a useful savings on PC-class machines.
+ * (In the second pass the histogram space is re-used for pixel mapping data;
+ * in that capacity, each cell must be able to store zero to the number of
+ * desired colors.  16 bits/cell is plenty for that too.)
+ * Since the JPEG code is intended to run in small memory model on 80x86
+ * machines, we can't just allocate the histogram in one chunk.  Instead
+ * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
+ * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
+ * on 80x86 machines, the pointer row is in near memory but the actual
+ * arrays are in far memory (same arrangement as we use for image arrays).
+ */
+
+#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
+
+/* These will do the right thing for either R,G,B or B,G,R color order,
+ * but you may not like the results for other color orders.
+ */
+#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6		/* bits of precision in G histogram */
+#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
+
+/* Number of elements along histogram axes. */
+#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
+#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
+#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
+
+/* These are the amounts to shift an input value to get a histogram index. */
+#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
+#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
+#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
+
+
+typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
+
+typedef histcell FAR * histptr;	/* for pointers to histogram cells */
+
+typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
+typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
+typedef hist2d * hist3d;	/* type for top-level pointer */
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array has (#columns + 2) entries; the extra entry at
+ * each end saves us from special-casing the first and last pixels.
+ * Each entry is three values long, one value for each color component.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Space for the eventually created colormap is stashed here */
+  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
+  int desired;			/* desired # of colors = size of colormap */
+
+  /* Variables for accumulating image statistics */
+  hist3d histogram;		/* pointer to the histogram */
+
+  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors;		/* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+  int * error_limiter;		/* table for clamping the applied error */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Prescan some rows of pixels.
+ * In this module the prescan simply updates the histogram, which has been
+ * initialized to zeroes by start_pass.
+ * An output_buf parameter is required by the method signature, but no data
+ * is actually output (in fact the buffer controller is probably passing a
+ * NULL pointer).
+ */
+
+METHODDEF(void)
+prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW ptr;
+  register histptr histp;
+  register hist3d histogram = cquantize->histogram;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptr = input_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the histogram */
+      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
+			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+      /* increment, check for overflow and undo increment if so. */
+      if (++(*histp) <= 0)
+	(*histp)--;
+      ptr += 3;
+    }
+  }
+}
+
+
+/*
+ * Next we have the really interesting routines: selection of a colormap
+ * given the completed histogram.
+ * These routines work with a list of "boxes", each representing a rectangular
+ * subset of the input color space (to histogram precision).
+ */
+
+typedef struct {
+  /* The bounds of the box (inclusive); expressed as histogram indexes */
+  int c0min, c0max;
+  int c1min, c1max;
+  int c2min, c2max;
+  /* The volume (actually 2-norm) of the box */
+  INT32 volume;
+  /* The number of nonzero histogram cells within this box */
+  long colorcount;
+} box;
+
+typedef box * boxptr;
+
+
+LOCAL(boxptr)
+find_biggest_color_pop (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest color population */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register long maxc = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->colorcount > maxc && boxp->volume > 0) {
+      which = boxp;
+      maxc = boxp->colorcount;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(boxptr)
+find_biggest_volume (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest (scaled) volume */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register INT32 maxv = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->volume > maxv) {
+      which = boxp;
+      maxv = boxp->volume;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(void)
+update_box (j_decompress_ptr cinfo, boxptr boxp)
+/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
+/* and recompute its volume and population */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  INT32 dist0,dist1,dist2;
+  long ccount;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  if (c0max > c0min)
+    for (c0 = c0min; c0 <= c0max; c0++)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0min = c0min = c0;
+	    goto have_c0min;
+	  }
+      }
+ have_c0min:
+  if (c0max > c0min)
+    for (c0 = c0max; c0 >= c0min; c0--)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0max = c0max = c0;
+	    goto have_c0max;
+	  }
+      }
+ have_c0max:
+  if (c1max > c1min)
+    for (c1 = c1min; c1 <= c1max; c1++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1min = c1min = c1;
+	    goto have_c1min;
+	  }
+      }
+ have_c1min:
+  if (c1max > c1min)
+    for (c1 = c1max; c1 >= c1min; c1--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1max = c1max = c1;
+	    goto have_c1max;
+	  }
+      }
+ have_c1max:
+  if (c2max > c2min)
+    for (c2 = c2min; c2 <= c2max; c2++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2min = c2min = c2;
+	    goto have_c2min;
+	  }
+      }
+ have_c2min:
+  if (c2max > c2min)
+    for (c2 = c2max; c2 >= c2min; c2--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2max = c2max = c2;
+	    goto have_c2max;
+	  }
+      }
+ have_c2max:
+
+  /* Update box volume.
+   * We use 2-norm rather than real volume here; this biases the method
+   * against making long narrow boxes, and it has the side benefit that
+   * a box is splittable iff norm > 0.
+   * Since the differences are expressed in histogram-cell units,
+   * we have to shift back to JSAMPLE units to get consistent distances;
+   * after which, we scale according to the selected distance scale factors.
+   */
+  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
+  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
+  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
+  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
+  
+  /* Now scan remaining volume of box and compute population */
+  ccount = 0;
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++, histp++)
+	if (*histp != 0) {
+	  ccount++;
+	}
+    }
+  boxp->colorcount = ccount;
+}
+
+
+LOCAL(int)
+median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
+	    int desired_colors)
+/* Repeatedly select and split the largest box until we have enough boxes */
+{
+  int n,lb;
+  int c0,c1,c2,cmax;
+  register boxptr b1,b2;
+
+  while (numboxes < desired_colors) {
+    /* Select box to split.
+     * Current algorithm: by population for first half, then by volume.
+     */
+    if (numboxes*2 <= desired_colors) {
+      b1 = find_biggest_color_pop(boxlist, numboxes);
+    } else {
+      b1 = find_biggest_volume(boxlist, numboxes);
+    }
+    if (b1 == NULL)		/* no splittable boxes left! */
+      break;
+    b2 = &boxlist[numboxes];	/* where new box will go */
+    /* Copy the color bounds to the new box. */
+    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
+    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
+    /* Choose which axis to split the box on.
+     * Current algorithm: longest scaled axis.
+     * See notes in update_box about scaling distances.
+     */
+    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
+    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
+    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
+    /* We want to break any ties in favor of green, then red, blue last.
+     * This code does the right thing for R,G,B or B,G,R color orders only.
+     */
+#if RGB_RED == 0
+    cmax = c1; n = 1;
+    if (c0 > cmax) { cmax = c0; n = 0; }
+    if (c2 > cmax) { n = 2; }
+#else
+    cmax = c1; n = 1;
+    if (c2 > cmax) { cmax = c2; n = 2; }
+    if (c0 > cmax) { n = 0; }
+#endif
+    /* Choose split point along selected axis, and update box bounds.
+     * Current algorithm: split at halfway point.
+     * (Since the box has been shrunk to minimum volume,
+     * any split will produce two nonempty subboxes.)
+     * Note that lb value is max for lower box, so must be < old max.
+     */
+    switch (n) {
+    case 0:
+      lb = (b1->c0max + b1->c0min) / 2;
+      b1->c0max = lb;
+      b2->c0min = lb+1;
+      break;
+    case 1:
+      lb = (b1->c1max + b1->c1min) / 2;
+      b1->c1max = lb;
+      b2->c1min = lb+1;
+      break;
+    case 2:
+      lb = (b1->c2max + b1->c2min) / 2;
+      b1->c2max = lb;
+      b2->c2min = lb+1;
+      break;
+    }
+    /* Update stats for boxes */
+    update_box(cinfo, b1);
+    update_box(cinfo, b2);
+    numboxes++;
+  }
+  return numboxes;
+}
+
+
+LOCAL(void)
+compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
+/* Compute representative color for a box, put it in colormap[icolor] */
+{
+  /* Current algorithm: mean weighted by pixels (not colors) */
+  /* Note it is important to get the rounding correct! */
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  long count;
+  long total = 0;
+  long c0total = 0;
+  long c1total = 0;
+  long c2total = 0;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++) {
+	if ((count = *histp++) != 0) {
+	  total += count;
+	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
+	}
+      }
+    }
+  
+  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
+  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
+  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
+}
+
+
+LOCAL(void)
+select_colors (j_decompress_ptr cinfo, int desired_colors)
+/* Master routine for color selection */
+{
+  boxptr boxlist;
+  int numboxes;
+  int i;
+
+  /* Allocate workspace for box list */
+  boxlist = (boxptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
+  /* Initialize one box containing whole space */
+  numboxes = 1;
+  boxlist[0].c0min = 0;
+  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
+  boxlist[0].c1min = 0;
+  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
+  boxlist[0].c2min = 0;
+  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
+  /* Shrink it to actually-used volume and set its statistics */
+  update_box(cinfo, & boxlist[0]);
+  /* Perform median-cut to produce final box list */
+  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
+  /* Compute the representative color for each box, fill colormap */
+  for (i = 0; i < numboxes; i++)
+    compute_color(cinfo, & boxlist[i], i);
+  cinfo->actual_number_of_colors = numboxes;
+  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
+}
+
+
+/*
+ * These routines are concerned with the time-critical task of mapping input
+ * colors to the nearest color in the selected colormap.
+ *
+ * We re-use the histogram space as an "inverse color map", essentially a
+ * cache for the results of nearest-color searches.  All colors within a
+ * histogram cell will be mapped to the same colormap entry, namely the one
+ * closest to the cell's center.  This may not be quite the closest entry to
+ * the actual input color, but it's almost as good.  A zero in the cache
+ * indicates we haven't found the nearest color for that cell yet; the array
+ * is cleared to zeroes before starting the mapping pass.  When we find the
+ * nearest color for a cell, its colormap index plus one is recorded in the
+ * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
+ * when they need to use an unfilled entry in the cache.
+ *
+ * Our method of efficiently finding nearest colors is based on the "locally
+ * sorted search" idea described by Heckbert and on the incremental distance
+ * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
+ * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
+ * the distances from a given colormap entry to each cell of the histogram can
+ * be computed quickly using an incremental method: the differences between
+ * distances to adjacent cells themselves differ by a constant.  This allows a
+ * fairly fast implementation of the "brute force" approach of computing the
+ * distance from every colormap entry to every histogram cell.  Unfortunately,
+ * it needs a work array to hold the best-distance-so-far for each histogram
+ * cell (because the inner loop has to be over cells, not colormap entries).
+ * The work array elements have to be INT32s, so the work array would need
+ * 256Kb at our recommended precision.  This is not feasible in DOS machines.
+ *
+ * To get around these problems, we apply Thomas' method to compute the
+ * nearest colors for only the cells within a small subbox of the histogram.
+ * The work array need be only as big as the subbox, so the memory usage
+ * problem is solved.  Furthermore, we need not fill subboxes that are never
+ * referenced in pass2; many images use only part of the color gamut, so a
+ * fair amount of work is saved.  An additional advantage of this
+ * approach is that we can apply Heckbert's locality criterion to quickly
+ * eliminate colormap entries that are far away from the subbox; typically
+ * three-fourths of the colormap entries are rejected by Heckbert's criterion,
+ * and we need not compute their distances to individual cells in the subbox.
+ * The speed of this approach is heavily influenced by the subbox size: too
+ * small means too much overhead, too big loses because Heckbert's criterion
+ * can't eliminate as many colormap entries.  Empirically the best subbox
+ * size seems to be about 1/512th of the histogram (1/8th in each direction).
+ *
+ * Thomas' article also describes a refined method which is asymptotically
+ * faster than the brute-force method, but it is also far more complex and
+ * cannot efficiently be applied to small subboxes.  It is therefore not
+ * useful for programs intended to be portable to DOS machines.  On machines
+ * with plenty of memory, filling the whole histogram in one shot with Thomas'
+ * refined method might be faster than the present code --- but then again,
+ * it might not be any faster, and it's certainly more complicated.
+ */
+
+
+/* log2(histogram cells in update box) for each axis; this can be adjusted */
+#define BOX_C0_LOG  (HIST_C0_BITS-3)
+#define BOX_C1_LOG  (HIST_C1_BITS-3)
+#define BOX_C2_LOG  (HIST_C2_BITS-3)
+
+#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
+#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
+#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
+
+#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
+#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
+#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
+
+
+/*
+ * The next three routines implement inverse colormap filling.  They could
+ * all be folded into one big routine, but splitting them up this way saves
+ * some stack space (the mindist[] and bestdist[] arrays need not coexist)
+ * and may allow some compilers to produce better code by registerizing more
+ * inner-loop variables.
+ */
+
+LOCAL(int)
+find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		    JSAMPLE colorlist[])
+/* Locate the colormap entries close enough to an update box to be candidates
+ * for the nearest entry to some cell(s) in the update box.  The update box
+ * is specified by the center coordinates of its first cell.  The number of
+ * candidate colormap entries is returned, and their colormap indexes are
+ * placed in colorlist[].
+ * This routine uses Heckbert's "locally sorted search" criterion to select
+ * the colors that need further consideration.
+ */
+{
+  int numcolors = cinfo->actual_number_of_colors;
+  int maxc0, maxc1, maxc2;
+  int centerc0, centerc1, centerc2;
+  int i, x, ncolors;
+  INT32 minmaxdist, min_dist, max_dist, tdist;
+  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
+
+  /* Compute true coordinates of update box's upper corner and center.
+   * Actually we compute the coordinates of the center of the upper-corner
+   * histogram cell, which are the upper bounds of the volume we care about.
+   * Note that since ">>" rounds down, the "center" values may be closer to
+   * min than to max; hence comparisons to them must be "<=", not "<".
+   */
+  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
+  centerc0 = (minc0 + maxc0) >> 1;
+  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
+  centerc1 = (minc1 + maxc1) >> 1;
+  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
+  centerc2 = (minc2 + maxc2) >> 1;
+
+  /* For each color in colormap, find:
+   *  1. its minimum squared-distance to any point in the update box
+   *     (zero if color is within update box);
+   *  2. its maximum squared-distance to any point in the update box.
+   * Both of these can be found by considering only the corners of the box.
+   * We save the minimum distance for each color in mindist[];
+   * only the smallest maximum distance is of interest.
+   */
+  minmaxdist = 0x7FFFFFFFL;
+
+  for (i = 0; i < numcolors; i++) {
+    /* We compute the squared-c0-distance term, then add in the other two. */
+    x = GETJSAMPLE(cinfo->colormap[0][i]);
+    if (x < minc0) {
+      tdist = (x - minc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - maxc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else if (x > maxc0) {
+      tdist = (x - maxc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - minc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      min_dist = 0;
+      if (x <= centerc0) {
+	tdist = (x - maxc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      } else {
+	tdist = (x - minc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[1][i]);
+    if (x < minc1) {
+      tdist = (x - minc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc1) {
+      tdist = (x - maxc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc1) {
+	tdist = (x - maxc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[2][i]);
+    if (x < minc2) {
+      tdist = (x - minc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc2) {
+      tdist = (x - maxc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc2) {
+	tdist = (x - maxc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    mindist[i] = min_dist;	/* save away the results */
+    if (max_dist < minmaxdist)
+      minmaxdist = max_dist;
+  }
+
+  /* Now we know that no cell in the update box is more than minmaxdist
+   * away from some colormap entry.  Therefore, only colors that are
+   * within minmaxdist of some part of the box need be considered.
+   */
+  ncolors = 0;
+  for (i = 0; i < numcolors; i++) {
+    if (mindist[i] <= minmaxdist)
+      colorlist[ncolors++] = (JSAMPLE) i;
+  }
+  return ncolors;
+}
+
+
+LOCAL(void)
+find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
+/* Find the closest colormap entry for each cell in the update box,
+ * given the list of candidate colors prepared by find_nearby_colors.
+ * Return the indexes of the closest entries in the bestcolor[] array.
+ * This routine uses Thomas' incremental distance calculation method to
+ * find the distance from a colormap entry to successive cells in the box.
+ */
+{
+  int ic0, ic1, ic2;
+  int i, icolor;
+  register INT32 * bptr;	/* pointer into bestdist[] array */
+  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
+  INT32 dist0, dist1;		/* initial distance values */
+  register INT32 dist2;		/* current distance in inner loop */
+  INT32 xx0, xx1;		/* distance increments */
+  register INT32 xx2;
+  INT32 inc0, inc1, inc2;	/* initial values for increments */
+  /* This array holds the distance to the nearest-so-far color for each cell */
+  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Initialize best-distance for each cell of the update box */
+  bptr = bestdist;
+  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
+    *bptr++ = 0x7FFFFFFFL;
+  
+  /* For each color selected by find_nearby_colors,
+   * compute its distance to the center of each cell in the box.
+   * If that's less than best-so-far, update best distance and color number.
+   */
+  
+  /* Nominal steps between cell centers ("x" in Thomas article) */
+#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
+#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
+#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
+  
+  for (i = 0; i < numcolors; i++) {
+    icolor = GETJSAMPLE(colorlist[i]);
+    /* Compute (square of) distance from minc0/c1/c2 to this color */
+    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
+    dist0 = inc0*inc0;
+    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
+    dist0 += inc1*inc1;
+    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
+    dist0 += inc2*inc2;
+    /* Form the initial difference increments */
+    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
+    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
+    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
+    /* Now loop over all cells in box, updating distance per Thomas method */
+    bptr = bestdist;
+    cptr = bestcolor;
+    xx0 = inc0;
+    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
+      dist1 = dist0;
+      xx1 = inc1;
+      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
+	dist2 = dist1;
+	xx2 = inc2;
+	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
+	  if (dist2 < *bptr) {
+	    *bptr = dist2;
+	    *cptr = (JSAMPLE) icolor;
+	  }
+	  dist2 += xx2;
+	  xx2 += 2 * STEP_C2 * STEP_C2;
+	  bptr++;
+	  cptr++;
+	}
+	dist1 += xx1;
+	xx1 += 2 * STEP_C1 * STEP_C1;
+      }
+      dist0 += xx0;
+      xx0 += 2 * STEP_C0 * STEP_C0;
+    }
+  }
+}
+
+
+LOCAL(void)
+fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
+/* Fill the inverse-colormap entries in the update box that contains */
+/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
+/* we can fill as many others as we wish.) */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int minc0, minc1, minc2;	/* lower left corner of update box */
+  int ic0, ic1, ic2;
+  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
+  register histptr cachep;	/* pointer into main cache array */
+  /* This array lists the candidate colormap indexes. */
+  JSAMPLE colorlist[MAXNUMCOLORS];
+  int numcolors;		/* number of candidate colors */
+  /* This array holds the actually closest colormap index for each cell. */
+  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Convert cell coordinates to update box ID */
+  c0 >>= BOX_C0_LOG;
+  c1 >>= BOX_C1_LOG;
+  c2 >>= BOX_C2_LOG;
+
+  /* Compute true coordinates of update box's origin corner.
+   * Actually we compute the coordinates of the center of the corner
+   * histogram cell, which are the lower bounds of the volume we care about.
+   */
+  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
+  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
+  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
+  
+  /* Determine which colormap entries are close enough to be candidates
+   * for the nearest entry to some cell in the update box.
+   */
+  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
+
+  /* Determine the actually nearest colors. */
+  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
+		   bestcolor);
+
+  /* Save the best color numbers (plus 1) in the main cache array */
+  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c1 <<= BOX_C1_LOG;
+  c2 <<= BOX_C2_LOG;
+  cptr = bestcolor;
+  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
+    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
+      cachep = & histogram[c0+ic0][c1+ic1][c2];
+      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
+	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
+      }
+    }
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+pass2_no_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register JSAMPROW inptr, outptr;
+  register histptr cachep;
+  register int c0, c1, c2;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the cache */
+      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
+      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
+      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
+      cachep = & histogram[c0][c1][c2];
+      /* If we have not seen this color before, find nearest colormap entry */
+      /* and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, c0,c1,c2);
+      /* Now emit the colormap index for this cell */
+      *outptr++ = (JSAMPLE) (*cachep - 1);
+    }
+  }
+}
+
+
+METHODDEF(void)
+pass2_fs_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
+  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
+  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  JSAMPROW inptr;		/* => current input pixel */
+  JSAMPROW outptr;		/* => current output pixel */
+  histptr cachep;
+  int dir;			/* +1 or -1 depending on direction */
+  int dir3;			/* 3*dir, for advancing inptr & errorptr */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  int *error_limit = cquantize->error_limiter;
+  JSAMPROW colormap0 = cinfo->colormap[0];
+  JSAMPROW colormap1 = cinfo->colormap[1];
+  JSAMPROW colormap2 = cinfo->colormap[2];
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    if (cquantize->on_odd_row) {
+      /* work right to left in this row */
+      inptr += (width-1) * 3;	/* so point to rightmost pixel */
+      outptr += width-1;
+      dir = -1;
+      dir3 = -3;
+      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
+      cquantize->on_odd_row = FALSE; /* flip for next time */
+    } else {
+      /* work left to right in this row */
+      dir = 1;
+      dir3 = 3;
+      errorptr = cquantize->fserrors; /* => entry before first real column */
+      cquantize->on_odd_row = TRUE; /* flip for next time */
+    }
+    /* Preset error values: no error propagated to first pixel from left */
+    cur0 = cur1 = cur2 = 0;
+    /* and no error propagated to row below yet */
+    belowerr0 = belowerr1 = belowerr2 = 0;
+    bpreverr0 = bpreverr1 = bpreverr2 = 0;
+
+    for (col = width; col > 0; col--) {
+      /* curN holds the error propagated from the previous pixel on the
+       * current line.  Add the error propagated from the previous line
+       * to form the complete error correction term for this pixel, and
+       * round the error term (which is expressed * 16) to an integer.
+       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+       * for either sign of the error value.
+       * Note: errorptr points to *previous* column's array entry.
+       */
+      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
+      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
+      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
+      /* Limit the error using transfer function set by init_error_limit.
+       * See comments with init_error_limit for rationale.
+       */
+      cur0 = error_limit[cur0];
+      cur1 = error_limit[cur1];
+      cur2 = error_limit[cur2];
+      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
+       * this sets the required size of the range_limit array.
+       */
+      cur0 += GETJSAMPLE(inptr[0]);
+      cur1 += GETJSAMPLE(inptr[1]);
+      cur2 += GETJSAMPLE(inptr[2]);
+      cur0 = GETJSAMPLE(range_limit[cur0]);
+      cur1 = GETJSAMPLE(range_limit[cur1]);
+      cur2 = GETJSAMPLE(range_limit[cur2]);
+      /* Index into the cache with adjusted pixel value */
+      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
+      /* If we have not seen this color before, find nearest colormap */
+      /* entry and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
+      /* Now emit the colormap index for this cell */
+      { register int pixcode = *cachep - 1;
+	*outptr = (JSAMPLE) pixcode;
+	/* Compute representation error for this pixel */
+	cur0 -= GETJSAMPLE(colormap0[pixcode]);
+	cur1 -= GETJSAMPLE(colormap1[pixcode]);
+	cur2 -= GETJSAMPLE(colormap2[pixcode]);
+      }
+      /* Compute error fractions to be propagated to adjacent pixels.
+       * Add these into the running sums, and simultaneously shift the
+       * next-line error sums left by 1 column.
+       */
+      { register LOCFSERROR bnexterr, delta;
+
+	bnexterr = cur0;	/* Process component 0 */
+	delta = cur0 * 2;
+	cur0 += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
+	cur0 += delta;		/* form error * 5 */
+	bpreverr0 = belowerr0 + cur0;
+	belowerr0 = bnexterr;
+	cur0 += delta;		/* form error * 7 */
+	bnexterr = cur1;	/* Process component 1 */
+	delta = cur1 * 2;
+	cur1 += delta;		/* form error * 3 */
+	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
+	cur1 += delta;		/* form error * 5 */
+	bpreverr1 = belowerr1 + cur1;
+	belowerr1 = bnexterr;
+	cur1 += delta;		/* form error * 7 */
+	bnexterr = cur2;	/* Process component 2 */
+	delta = cur2 * 2;
+	cur2 += delta;		/* form error * 3 */
+	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
+	cur2 += delta;		/* form error * 5 */
+	bpreverr2 = belowerr2 + cur2;
+	belowerr2 = bnexterr;
+	cur2 += delta;		/* form error * 7 */
+      }
+      /* At this point curN contains the 7/16 error value to be propagated
+       * to the next pixel on the current line, and all the errors for the
+       * next line have been shifted over.  We are therefore ready to move on.
+       */
+      inptr += dir3;		/* Advance pixel pointers to next column */
+      outptr += dir;
+      errorptr += dir3;		/* advance errorptr to current column */
+    }
+    /* Post-loop cleanup: we must unload the final error values into the
+     * final fserrors[] entry.  Note we need not unload belowerrN because
+     * it is for the dummy column before or after the actual array.
+     */
+    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
+    errorptr[1] = (FSERROR) bpreverr1;
+    errorptr[2] = (FSERROR) bpreverr2;
+  }
+}
+
+
+/*
+ * Initialize the error-limiting transfer function (lookup table).
+ * The raw F-S error computation can potentially compute error values of up to
+ * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
+ * much less, otherwise obviously wrong pixels will be created.  (Typical
+ * effects include weird fringes at color-area boundaries, isolated bright
+ * pixels in a dark area, etc.)  The standard advice for avoiding this problem
+ * is to ensure that the "corners" of the color cube are allocated as output
+ * colors; then repeated errors in the same direction cannot cause cascading
+ * error buildup.  However, that only prevents the error from getting
+ * completely out of hand; Aaron Giles reports that error limiting improves
+ * the results even with corner colors allocated.
+ * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
+ * well, but the smoother transfer function used below is even better.  Thanks
+ * to Aaron Giles for this idea.
+ */
+
+LOCAL(void)
+init_error_limit (j_decompress_ptr cinfo)
+/* Allocate and fill in the error_limiter table */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  int * table;
+  int in, out;
+
+  table = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
+  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+  cquantize->error_limiter = table;
+
+#define STEPSIZE ((MAXJSAMPLE+1)/16)
+  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
+  out = 0;
+  for (in = 0; in < STEPSIZE; in++, out++) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
+  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
+  for (; in <= MAXJSAMPLE; in++) {
+    table[in] = out; table[-in] = -out;
+  }
+#undef STEPSIZE
+}
+
+
+/*
+ * Finish up at the end of each pass.
+ */
+
+METHODDEF(void)
+finish_pass1 (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Select the representative colors and fill in cinfo->colormap */
+  cinfo->colormap = cquantize->sv_colormap;
+  select_colors(cinfo, cquantize->desired);
+  /* Force next pass to zero the color index table */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+METHODDEF(void)
+finish_pass2 (j_decompress_ptr cinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * Initialize for each processing pass.
+ */
+
+METHODDEF(void)
+start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int i;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  if (is_pre_scan) {
+    /* Set up method pointers */
+    cquantize->pub.color_quantize = prescan_quantize;
+    cquantize->pub.finish_pass = finish_pass1;
+    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
+  } else {
+    /* Set up method pointers */
+    if (cinfo->dither_mode == JDITHER_FS)
+      cquantize->pub.color_quantize = pass2_fs_dither;
+    else
+      cquantize->pub.color_quantize = pass2_no_dither;
+    cquantize->pub.finish_pass = finish_pass2;
+
+    /* Make sure color count is acceptable */
+    i = cinfo->actual_number_of_colors;
+    if (i < 1)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
+    if (i > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+
+    if (cinfo->dither_mode == JDITHER_FS) {
+      size_t arraysize = (size_t) ((cinfo->output_width + 2) *
+				   (3 * SIZEOF(FSERROR)));
+      /* Allocate Floyd-Steinberg workspace if we didn't already. */
+      if (cquantize->fserrors == NULL)
+	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+      /* Initialize the propagated errors to zero. */
+      FMEMZERO((void FAR *) cquantize->fserrors, arraysize);
+      /* Make the error-limit table if we didn't already. */
+      if (cquantize->error_limiter == NULL)
+	init_error_limit(cinfo);
+      cquantize->on_odd_row = FALSE;
+    }
+
+  }
+  /* Zero the histogram or inverse color map, if necessary */
+  if (cquantize->needs_zeroed) {
+    for (i = 0; i < HIST_C0_ELEMS; i++) {
+      FMEMZERO((void FAR *) histogram[i],
+	       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+    }
+    cquantize->needs_zeroed = FALSE;
+  }
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+METHODDEF(void)
+new_color_map_2_quant (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Reset the inverse color map */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+/*
+ * Module initialization routine for 2-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_2pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+  int i;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_2_quant;
+  cquantize->pub.new_color_map = new_color_map_2_quant;
+  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
+  cquantize->error_limiter = NULL;
+
+  /* Make sure jdmaster didn't give me a case I can't handle */
+  if (cinfo->out_color_components != 3)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* Allocate the histogram/inverse colormap storage */
+  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
+  for (i = 0; i < HIST_C0_ELEMS; i++) {
+    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+  }
+  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
+
+  /* Allocate storage for the completed colormap, if required.
+   * We do this now since it is FAR storage and may affect
+   * the memory manager's space calculations.
+   */
+  if (cinfo->enable_2pass_quant) {
+    /* Make sure color count is acceptable */
+    int desired = cinfo->desired_number_of_colors;
+    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
+    if (desired < 8)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
+    /* Make sure colormap indexes can be represented by JSAMPLEs */
+    if (desired > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo,JPOOL_IMAGE, (JDIMENSION) desired, (JDIMENSION) 3);
+    cquantize->desired = desired;
+  } else
+    cquantize->sv_colormap = NULL;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  /* Allocate Floyd-Steinberg workspace if necessary.
+   * This isn't really needed until pass 2, but again it is FAR storage.
+   * Although we will cope with a later change in dither_mode,
+   * we do not promise to honor max_memory_to_use if dither_mode changes.
+   */
+  if (cinfo->dither_mode == JDITHER_FS) {
+    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (size_t) ((cinfo->output_width + 2) * (3 * SIZEOF(FSERROR))));
+    /* Might as well create the error-limiting table too. */
+    init_error_limit(cinfo);
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
diff --git a/plugins/AdvaImg/src/LibJPEG/jutils.c b/plugins/AdvaImg/src/LibJPEG/jutils.c
index 5b16b6d03c..037a795290 100644
--- a/plugins/AdvaImg/src/LibJPEG/jutils.c
+++ b/plugins/AdvaImg/src/LibJPEG/jutils.c
@@ -1,227 +1,227 @@
-/*
- * jutils.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2009-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains tables and miscellaneous utility routines needed
- * for both compression and decompression.
- * Note we prefix all global names with "j" to minimize conflicts with
- * a surrounding application.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
- * of a DCT block read in natural order (left to right, top to bottom).
- */
-
-#if 0				/* This table is not actually needed in v6a */
-
-const int jpeg_zigzag_order[DCTSIZE2] = {
-   0,  1,  5,  6, 14, 15, 27, 28,
-   2,  4,  7, 13, 16, 26, 29, 42,
-   3,  8, 12, 17, 25, 30, 41, 43,
-   9, 11, 18, 24, 31, 40, 44, 53,
-  10, 19, 23, 32, 39, 45, 52, 54,
-  20, 22, 33, 38, 46, 51, 55, 60,
-  21, 34, 37, 47, 50, 56, 59, 61,
-  35, 36, 48, 49, 57, 58, 62, 63
-};
-
-#endif
-
-/*
- * jpeg_natural_order[i] is the natural-order position of the i'th element
- * of zigzag order.
- *
- * When reading corrupted data, the Huffman decoders could attempt
- * to reference an entry beyond the end of this array (if the decoded
- * zero run length reaches past the end of the block).  To prevent
- * wild stores without adding an inner-loop test, we put some extra
- * "63"s after the real entries.  This will cause the extra coefficient
- * to be stored in location 63 of the block, not somewhere random.
- * The worst case would be a run-length of 15, which means we need 16
- * fake entries.
- */
-
-const int jpeg_natural_order[DCTSIZE2+16] = {
-  0,  1,  8, 16,  9,  2,  3, 10,
- 17, 24, 32, 25, 18, 11,  4,  5,
- 12, 19, 26, 33, 40, 48, 41, 34,
- 27, 20, 13,  6,  7, 14, 21, 28,
- 35, 42, 49, 56, 57, 50, 43, 36,
- 29, 22, 15, 23, 30, 37, 44, 51,
- 58, 59, 52, 45, 38, 31, 39, 46,
- 53, 60, 61, 54, 47, 55, 62, 63,
- 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
- 63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order7[7*7+16] = {
-  0,  1,  8, 16,  9,  2,  3, 10,
- 17, 24, 32, 25, 18, 11,  4,  5,
- 12, 19, 26, 33, 40, 48, 41, 34,
- 27, 20, 13,  6, 14, 21, 28, 35,
- 42, 49, 50, 43, 36, 29, 22, 30,
- 37, 44, 51, 52, 45, 38, 46, 53,
- 54,
- 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
- 63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order6[6*6+16] = {
-  0,  1,  8, 16,  9,  2,  3, 10,
- 17, 24, 32, 25, 18, 11,  4,  5,
- 12, 19, 26, 33, 40, 41, 34, 27,
- 20, 13, 21, 28, 35, 42, 43, 36,
- 29, 37, 44, 45,
- 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
- 63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order5[5*5+16] = {
-  0,  1,  8, 16,  9,  2,  3, 10,
- 17, 24, 32, 25, 18, 11,  4, 12,
- 19, 26, 33, 34, 27, 20, 28, 35,
- 36,
- 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
- 63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order4[4*4+16] = {
-  0,  1,  8, 16,  9,  2,  3, 10,
- 17, 24, 25, 18, 11, 19, 26, 27,
- 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
- 63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order3[3*3+16] = {
-  0,  1,  8, 16,  9,  2, 10, 17,
- 18,
- 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
- 63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order2[2*2+16] = {
-  0,  1,  8,  9,
- 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
- 63, 63, 63, 63, 63, 63, 63, 63
-};
-
-
-/*
- * Arithmetic utilities
- */
-
-GLOBAL(long)
-jdiv_round_up (long a, long b)
-/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
-/* Assumes a >= 0, b > 0 */
-{
-  return (a + b - 1L) / b;
-}
-
-
-GLOBAL(long)
-jround_up (long a, long b)
-/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
-/* Assumes a >= 0, b > 0 */
-{
-  a += b - 1L;
-  return a - (a % b);
-}
-
-
-/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
- * and coefficient-block arrays.  This won't work on 80x86 because the arrays
- * are FAR and we're assuming a small-pointer memory model.  However, some
- * DOS compilers provide far-pointer versions of memcpy() and memset() even
- * in the small-model libraries.  These will be used if USE_FMEM is defined.
- * Otherwise, the routines below do it the hard way.  (The performance cost
- * is not all that great, because these routines aren't very heavily used.)
- */
-
-#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
-#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
-#else				/* 80x86 case, define if we can */
-#ifdef USE_FMEM
-#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
-#else
-/* This function is for use by the FMEMZERO macro defined in jpegint.h.
- * Do not call this function directly, use the FMEMZERO macro instead.
- */
-GLOBAL(void)
-jzero_far (void FAR * target, size_t bytestozero)
-/* Zero out a chunk of FAR memory. */
-/* This might be sample-array data, block-array data, or alloc_large data. */
-{
-  register char FAR * ptr = (char FAR *) target;
-  register size_t count;
-
-  for (count = bytestozero; count > 0; count--) {
-    *ptr++ = 0;
-  }
-}
-#endif
-#endif
-
-
-GLOBAL(void)
-jcopy_sample_rows (JSAMPARRAY input_array, int source_row,
-		   JSAMPARRAY output_array, int dest_row,
-		   int num_rows, JDIMENSION num_cols)
-/* Copy some rows of samples from one place to another.
- * num_rows rows are copied from input_array[source_row++]
- * to output_array[dest_row++]; these areas may overlap for duplication.
- * The source and destination arrays must be at least as wide as num_cols.
- */
-{
-  register JSAMPROW inptr, outptr;
-#ifdef FMEMCOPY
-  register size_t count = (size_t) (num_cols * SIZEOF(JSAMPLE));
-#else
-  register JDIMENSION count;
-#endif
-  register int row;
-
-  input_array += source_row;
-  output_array += dest_row;
-
-  for (row = num_rows; row > 0; row--) {
-    inptr = *input_array++;
-    outptr = *output_array++;
-#ifdef FMEMCOPY
-    FMEMCOPY(outptr, inptr, count);
-#else
-    for (count = num_cols; count > 0; count--)
-      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
-#endif
-  }
-}
-
-
-GLOBAL(void)
-jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
-		 JDIMENSION num_blocks)
-/* Copy a row of coefficient blocks from one place to another. */
-{
-#ifdef FMEMCOPY
-  FMEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
-#else
-  register JCOEFPTR inptr, outptr;
-  register long count;
-
-  inptr = (JCOEFPTR) input_row;
-  outptr = (JCOEFPTR) output_row;
-  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
-    *outptr++ = *inptr++;
-  }
-#endif
-}
+/*
+ * jutils.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2009-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains tables and miscellaneous utility routines needed
+ * for both compression and decompression.
+ * Note we prefix all global names with "j" to minimize conflicts with
+ * a surrounding application.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
+ * of a DCT block read in natural order (left to right, top to bottom).
+ */
+
+#if 0				/* This table is not actually needed in v6a */
+
+const int jpeg_zigzag_order[DCTSIZE2] = {
+   0,  1,  5,  6, 14, 15, 27, 28,
+   2,  4,  7, 13, 16, 26, 29, 42,
+   3,  8, 12, 17, 25, 30, 41, 43,
+   9, 11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+
+#endif
+
+/*
+ * jpeg_natural_order[i] is the natural-order position of the i'th element
+ * of zigzag order.
+ *
+ * When reading corrupted data, the Huffman decoders could attempt
+ * to reference an entry beyond the end of this array (if the decoded
+ * zero run length reaches past the end of the block).  To prevent
+ * wild stores without adding an inner-loop test, we put some extra
+ * "63"s after the real entries.  This will cause the extra coefficient
+ * to be stored in location 63 of the block, not somewhere random.
+ * The worst case would be a run-length of 15, which means we need 16
+ * fake entries.
+ */
+
+const int jpeg_natural_order[DCTSIZE2+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13,  6,  7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order7[7*7+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13,  6, 14, 21, 28, 35,
+ 42, 49, 50, 43, 36, 29, 22, 30,
+ 37, 44, 51, 52, 45, 38, 46, 53,
+ 54,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order6[6*6+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 41, 34, 27,
+ 20, 13, 21, 28, 35, 42, 43, 36,
+ 29, 37, 44, 45,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order5[5*5+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4, 12,
+ 19, 26, 33, 34, 27, 20, 28, 35,
+ 36,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order4[4*4+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 25, 18, 11, 19, 26, 27,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order3[3*3+16] = {
+  0,  1,  8, 16,  9,  2, 10, 17,
+ 18,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order2[2*2+16] = {
+  0,  1,  8,  9,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+
+/*
+ * Arithmetic utilities
+ */
+
+GLOBAL(long)
+jdiv_round_up (long a, long b)
+/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
+/* Assumes a >= 0, b > 0 */
+{
+  return (a + b - 1L) / b;
+}
+
+
+GLOBAL(long)
+jround_up (long a, long b)
+/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
+/* Assumes a >= 0, b > 0 */
+{
+  a += b - 1L;
+  return a - (a % b);
+}
+
+
+/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
+ * and coefficient-block arrays.  This won't work on 80x86 because the arrays
+ * are FAR and we're assuming a small-pointer memory model.  However, some
+ * DOS compilers provide far-pointer versions of memcpy() and memset() even
+ * in the small-model libraries.  These will be used if USE_FMEM is defined.
+ * Otherwise, the routines below do it the hard way.  (The performance cost
+ * is not all that great, because these routines aren't very heavily used.)
+ */
+
+#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
+#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
+#else				/* 80x86 case, define if we can */
+#ifdef USE_FMEM
+#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
+#else
+/* This function is for use by the FMEMZERO macro defined in jpegint.h.
+ * Do not call this function directly, use the FMEMZERO macro instead.
+ */
+GLOBAL(void)
+jzero_far (void FAR * target, size_t bytestozero)
+/* Zero out a chunk of FAR memory. */
+/* This might be sample-array data, block-array data, or alloc_large data. */
+{
+  register char FAR * ptr = (char FAR *) target;
+  register size_t count;
+
+  for (count = bytestozero; count > 0; count--) {
+    *ptr++ = 0;
+  }
+}
+#endif
+#endif
+
+
+GLOBAL(void)
+jcopy_sample_rows (JSAMPARRAY input_array, int source_row,
+		   JSAMPARRAY output_array, int dest_row,
+		   int num_rows, JDIMENSION num_cols)
+/* Copy some rows of samples from one place to another.
+ * num_rows rows are copied from input_array[source_row++]
+ * to output_array[dest_row++]; these areas may overlap for duplication.
+ * The source and destination arrays must be at least as wide as num_cols.
+ */
+{
+  register JSAMPROW inptr, outptr;
+#ifdef FMEMCOPY
+  register size_t count = (size_t) (num_cols * SIZEOF(JSAMPLE));
+#else
+  register JDIMENSION count;
+#endif
+  register int row;
+
+  input_array += source_row;
+  output_array += dest_row;
+
+  for (row = num_rows; row > 0; row--) {
+    inptr = *input_array++;
+    outptr = *output_array++;
+#ifdef FMEMCOPY
+    FMEMCOPY(outptr, inptr, count);
+#else
+    for (count = num_cols; count > 0; count--)
+      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+#endif
+  }
+}
+
+
+GLOBAL(void)
+jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
+		 JDIMENSION num_blocks)
+/* Copy a row of coefficient blocks from one place to another. */
+{
+#ifdef FMEMCOPY
+  FMEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
+#else
+  register JCOEFPTR inptr, outptr;
+  register long count;
+
+  inptr = (JCOEFPTR) input_row;
+  outptr = (JCOEFPTR) output_row;
+  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
+    *outptr++ = *inptr++;
+  }
+#endif
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/jversion.h b/plugins/AdvaImg/src/LibJPEG/jversion.h
index 5d4915103e..41726ccbc2 100644
--- a/plugins/AdvaImg/src/LibJPEG/jversion.h
+++ b/plugins/AdvaImg/src/LibJPEG/jversion.h
@@ -1,14 +1,14 @@
-/*
- * jversion.h
- *
- * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains software version identification.
- */
-
-
-#define JVERSION	"8d  15-Jan-2012"
-
-#define JCOPYRIGHT	"Copyright (C) 2012, Thomas G. Lane, Guido Vollbeding"
+/*
+ * jversion.h
+ *
+ * Copyright (C) 1991-2013, Thomas G. Lane, Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains software version identification.
+ */
+
+
+#define JVERSION	"9  13-Jan-2013"
+
+#define JCOPYRIGHT	"Copyright (C) 2013, Thomas G. Lane, Guido Vollbeding"
diff --git a/plugins/AdvaImg/src/LibJPEG/libjpeg.txt b/plugins/AdvaImg/src/LibJPEG/libjpeg.txt
index 98394c8e18..b4e1d886c6 100644
--- a/plugins/AdvaImg/src/LibJPEG/libjpeg.txt
+++ b/plugins/AdvaImg/src/LibJPEG/libjpeg.txt
@@ -1,6 +1,6 @@
 USING THE IJG JPEG LIBRARY
 
-Copyright (C) 1994-2011, Thomas G. Lane, Guido Vollbeding.
+Copyright (C) 1994-2013, Thomas G. Lane, Guido Vollbeding.
 This file is part of the Independent JPEG Group's software.
 For conditions of distribution and use, see the accompanying README file.
 
@@ -876,6 +876,10 @@ jpeg_simple_progression (j_compress_ptr cinfo)
 
 Compression parameters (cinfo fields) include:
 
+boolean arith_code
+	If TRUE, use arithmetic coding.
+	If FALSE, use Huffman coding.
+
 int block_size
 	Set DCT block size.  All N from 1 to 16 are possible.
 	Default is 8 (baseline format).
@@ -916,7 +920,16 @@ J_COLOR_SPACE jpeg_color_space
 int num_components
 	The JPEG color space and corresponding number of components; see
 	"Special color spaces", below, for more info.  We recommend using
-	jpeg_set_color_space() if you want to change these.
+	jpeg_set_colorspace() if you want to change these.
+
+J_COLOR_TRANSFORM color_transform
+	Internal color transform identifier, writes LSE marker if nonzero
+	(requires decoder with inverse color transform support, introduced
+	with IJG JPEG 9).
+	Two values are currently possible: JCT_NONE and JCT_SUBTRACT_GREEN.
+	Set this value for lossless RGB application *before* calling
+	jpeg_set_colorspace(), because entropy table assignment in
+	jpeg_set_colorspace() depends on color_transform.
 
 boolean optimize_coding
 	TRUE causes the compressor to compute optimal Huffman coding tables
diff --git a/plugins/AdvaImg/src/LibJPEG/transupp.c b/plugins/AdvaImg/src/LibJPEG/transupp.c
index 016f383d4f..e0af04cbd8 100644
--- a/plugins/AdvaImg/src/LibJPEG/transupp.c
+++ b/plugins/AdvaImg/src/LibJPEG/transupp.c
@@ -1,1597 +1,1597 @@
-/*
- * transupp.c
- *
- * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains image transformation routines and other utility code
- * used by the jpegtran sample application.  These are NOT part of the core
- * JPEG library.  But we keep these routines separate from jpegtran.c to
- * ease the task of maintaining jpegtran-like programs that have other user
- * interfaces.
- */
-
-/* Although this file really shouldn't have access to the library internals,
- * it's helpful to let it call jround_up() and jcopy_block_row().
- */
-#define JPEG_INTERNALS
-
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "transupp.h"		/* My own external interface */
-#include <ctype.h>		/* to declare isdigit() */
-
-
-#if TRANSFORMS_SUPPORTED
-
-/*
- * Lossless image transformation routines.  These routines work on DCT
- * coefficient arrays and thus do not require any lossy decompression
- * or recompression of the image.
- * Thanks to Guido Vollbeding for the initial design and code of this feature,
- * and to Ben Jackson for introducing the cropping feature.
- *
- * Horizontal flipping is done in-place, using a single top-to-bottom
- * pass through the virtual source array.  It will thus be much the
- * fastest option for images larger than main memory.
- *
- * The other routines require a set of destination virtual arrays, so they
- * need twice as much memory as jpegtran normally does.  The destination
- * arrays are always written in normal scan order (top to bottom) because
- * the virtual array manager expects this.  The source arrays will be scanned
- * in the corresponding order, which means multiple passes through the source
- * arrays for most of the transforms.  That could result in much thrashing
- * if the image is larger than main memory.
- *
- * If cropping or trimming is involved, the destination arrays may be smaller
- * than the source arrays.  Note it is not possible to do horizontal flip
- * in-place when a nonzero Y crop offset is specified, since we'd have to move
- * data from one block row to another but the virtual array manager doesn't
- * guarantee we can touch more than one row at a time.  So in that case,
- * we have to use a separate destination array.
- *
- * Some notes about the operating environment of the individual transform
- * routines:
- * 1. Both the source and destination virtual arrays are allocated from the
- *    source JPEG object, and therefore should be manipulated by calling the
- *    source's memory manager.
- * 2. The destination's component count should be used.  It may be smaller
- *    than the source's when forcing to grayscale.
- * 3. Likewise the destination's sampling factors should be used.  When
- *    forcing to grayscale the destination's sampling factors will be all 1,
- *    and we may as well take that as the effective iMCU size.
- * 4. When "trim" is in effect, the destination's dimensions will be the
- *    trimmed values but the source's will be untrimmed.
- * 5. When "crop" is in effect, the destination's dimensions will be the
- *    cropped values but the source's will be uncropped.  Each transform
- *    routine is responsible for picking up source data starting at the
- *    correct X and Y offset for the crop region.  (The X and Y offsets
- *    passed to the transform routines are measured in iMCU blocks of the
- *    destination.)
- * 6. All the routines assume that the source and destination buffers are
- *    padded out to a full iMCU boundary.  This is true, although for the
- *    source buffer it is an undocumented property of jdcoefct.c.
- */
-
-
-LOCAL(void)
-do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	 jvirt_barray_ptr *src_coef_arrays,
-	 jvirt_barray_ptr *dst_coef_arrays)
-/* Crop.  This is only used when no rotate/flip is requested with the crop. */
-{
-  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
-  int ci, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  jpeg_component_info *compptr;
-
-  /* We simply have to copy the right amount of data (the destination's
-   * image size) starting at the given X and Y offsets in the source.
-   */
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      src_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	 dst_blk_y + y_crop_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, FALSE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
-			dst_buffer[offset_y],
-			compptr->width_in_blocks);
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		   JDIMENSION x_crop_offset,
-		   jvirt_barray_ptr *src_coef_arrays)
-/* Horizontal flip; done in-place, so no separate dest array is required.
- * NB: this only works when y_crop_offset is zero.
- */
-{
-  JDIMENSION MCU_cols, comp_width, blk_x, blk_y, x_crop_blocks;
-  int ci, k, offset_y;
-  JBLOCKARRAY buffer;
-  JCOEFPTR ptr1, ptr2;
-  JCOEF temp1, temp2;
-  jpeg_component_info *compptr;
-
-  /* Horizontal mirroring of DCT blocks is accomplished by swapping
-   * pairs of blocks in-place.  Within a DCT block, we perform horizontal
-   * mirroring by changing the signs of odd-numbered columns.
-   * Partial iMCUs at the right edge are left untouched.
-   */
-  MCU_cols = srcinfo->output_width /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    for (blk_y = 0; blk_y < compptr->height_in_blocks;
-	 blk_y += compptr->v_samp_factor) {
-      buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	/* Do the mirroring */
-	for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
-	  ptr1 = buffer[offset_y][blk_x];
-	  ptr2 = buffer[offset_y][comp_width - blk_x - 1];
-	  /* this unrolled loop doesn't need to know which row it's on... */
-	  for (k = 0; k < DCTSIZE2; k += 2) {
-	    temp1 = *ptr1;	/* swap even column */
-	    temp2 = *ptr2;
-	    *ptr1++ = temp2;
-	    *ptr2++ = temp1;
-	    temp1 = *ptr1;	/* swap odd column with sign change */
-	    temp2 = *ptr2;
-	    *ptr1++ = -temp2;
-	    *ptr2++ = -temp1;
-	  }
-	}
-	if (x_crop_blocks > 0) {
-	  /* Now left-justify the portion of the data to be kept.
-	   * We can't use a single jcopy_block_row() call because that routine
-	   * depends on memcpy(), whose behavior is unspecified for overlapping
-	   * source and destination areas.  Sigh.
-	   */
-	  for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
-	    jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks,
-			    buffer[offset_y] + blk_x,
-			    (JDIMENSION) 1);
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
-/* Horizontal flip in general cropping case */
-{
-  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, k, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JBLOCKROW src_row_ptr, dst_row_ptr;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* Here we must output into a separate array because we can't touch
-   * different rows of a single virtual array simultaneously.  Otherwise,
-   * this is essentially the same as the routine above.
-   */
-  MCU_cols = srcinfo->output_width /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      src_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	 dst_blk_y + y_crop_blocks,
-	 (JDIMENSION) compptr->v_samp_factor, FALSE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	dst_row_ptr = dst_buffer[offset_y];
-	src_row_ptr = src_buffer[offset_y];
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	  if (x_crop_blocks + dst_blk_x < comp_width) {
-	    /* Do the mirrorable blocks */
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
-	    /* this unrolled loop doesn't need to know which row it's on... */
-	    for (k = 0; k < DCTSIZE2; k += 2) {
-	      *dst_ptr++ = *src_ptr++;	 /* copy even column */
-	      *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
-	    }
-	  } else {
-	    /* Copy last partial block(s) verbatim */
-	    jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
-			    dst_row_ptr + dst_blk_x,
-			    (JDIMENSION) 1);
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
-/* Vertical flip */
-{
-  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JBLOCKROW src_row_ptr, dst_row_ptr;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* We output into a separate array because we can't touch different
-   * rows of the source virtual array simultaneously.  Otherwise, this
-   * is a pretty straightforward analog of horizontal flip.
-   * Within a DCT block, vertical mirroring is done by changing the signs
-   * of odd-numbered rows.
-   * Partial iMCUs at the bottom edge are copied verbatim.
-   */
-  MCU_rows = srcinfo->output_height /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (y_crop_blocks + dst_blk_y < comp_height) {
-	/* Row is within the mirrorable area. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   comp_height - y_crop_blocks - dst_blk_y -
-	   (JDIMENSION) compptr->v_samp_factor,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      } else {
-	/* Bottom-edge blocks will be copied verbatim. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y + y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      }
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	if (y_crop_blocks + dst_blk_y < comp_height) {
-	  /* Row is within the mirrorable area. */
-	  dst_row_ptr = dst_buffer[offset_y];
-	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
-	  src_row_ptr += x_crop_blocks;
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	       dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[dst_blk_x];
-	    for (i = 0; i < DCTSIZE; i += 2) {
-	      /* copy even row */
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = *src_ptr++;
-	      /* copy odd row with sign change */
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = - *src_ptr++;
-	    }
-	  }
-	} else {
-	  /* Just copy row verbatim. */
-	  jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
-			  dst_buffer[offset_y],
-			  compptr->width_in_blocks);
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	      JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	      jvirt_barray_ptr *src_coef_arrays,
-	      jvirt_barray_ptr *dst_coef_arrays)
-/* Transpose source into destination */
-{
-  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_x, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* Transposing pixels within a block just requires transposing the
-   * DCT coefficients.
-   * Partial iMCUs at the edges require no special treatment; we simply
-   * process all the available DCT blocks for every component.
-   */
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	     dst_blk_x + x_crop_blocks,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks];
-	    for (i = 0; i < DCTSIZE; i++)
-	      for (j = 0; j < DCTSIZE; j++)
-		dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
-/* 90 degree rotation is equivalent to
- *   1. Transposing the image;
- *   2. Horizontal mirroring.
- * These two steps are merged into a single processing routine.
- */
-{
-  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_x, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* Because of the horizontal mirror step, we can't process partial iMCUs
-   * at the (output) right edge properly.  They just get transposed and
-   * not mirrored.
-   */
-  MCU_cols = srcinfo->output_height /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  if (x_crop_blocks + dst_blk_x < comp_width) {
-	    /* Block is within the mirrorable area. */
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       comp_width - x_crop_blocks - dst_blk_x -
-	       (JDIMENSION) compptr->h_samp_factor,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  } else {
-	    /* Edge blocks are transposed but not mirrored. */
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       dst_blk_x + x_crop_blocks,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  }
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (x_crop_blocks + dst_blk_x < comp_width) {
-	      /* Block is within the mirrorable area. */
-	      src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
-		[dst_blk_y + offset_y + y_crop_blocks];
-	      for (i = 0; i < DCTSIZE; i++) {
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		i++;
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-	      }
-	    } else {
-	      /* Edge blocks are transposed but not mirrored. */
-	      src_ptr = src_buffer[offset_x]
-		[dst_blk_y + offset_y + y_crop_blocks];
-	      for (i = 0; i < DCTSIZE; i++)
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	    }
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    jvirt_barray_ptr *dst_coef_arrays)
-/* 270 degree rotation is equivalent to
- *   1. Horizontal mirroring;
- *   2. Transposing the image.
- * These two steps are merged into a single processing routine.
- */
-{
-  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_x, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  /* Because of the horizontal mirror step, we can't process partial iMCUs
-   * at the (output) bottom edge properly.  They just get transposed and
-   * not mirrored.
-   */
-  MCU_rows = srcinfo->output_width /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	     dst_blk_x + x_crop_blocks,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (y_crop_blocks + dst_blk_y < comp_height) {
-	      /* Block is within the mirrorable area. */
-	      src_ptr = src_buffer[offset_x]
-		[comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
-	      for (i = 0; i < DCTSIZE; i++) {
-		for (j = 0; j < DCTSIZE; j++) {
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  j++;
-		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		}
-	      }
-	    } else {
-	      /* Edge blocks are transposed but not mirrored. */
-	      src_ptr = src_buffer[offset_x]
-		[dst_blk_y + offset_y + y_crop_blocks];
-	      for (i = 0; i < DCTSIZE; i++)
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	    }
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    jvirt_barray_ptr *dst_coef_arrays)
-/* 180 degree rotation is equivalent to
- *   1. Vertical mirroring;
- *   2. Horizontal mirroring.
- * These two steps are merged into a single processing routine.
- */
-{
-  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JBLOCKROW src_row_ptr, dst_row_ptr;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  MCU_cols = srcinfo->output_width /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-  MCU_rows = srcinfo->output_height /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (y_crop_blocks + dst_blk_y < comp_height) {
-	/* Row is within the vertically mirrorable area. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   comp_height - y_crop_blocks - dst_blk_y -
-	   (JDIMENSION) compptr->v_samp_factor,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      } else {
-	/* Bottom-edge rows are only mirrored horizontally. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   dst_blk_y + y_crop_blocks,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
-      }
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	dst_row_ptr = dst_buffer[offset_y];
-	if (y_crop_blocks + dst_blk_y < comp_height) {
-	  /* Row is within the mirrorable area. */
-	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    if (x_crop_blocks + dst_blk_x < comp_width) {
-	      /* Process the blocks that can be mirrored both ways. */
-	      src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
-	      for (i = 0; i < DCTSIZE; i += 2) {
-		/* For even row, negate every odd column. */
-		for (j = 0; j < DCTSIZE; j += 2) {
-		  *dst_ptr++ = *src_ptr++;
-		  *dst_ptr++ = - *src_ptr++;
-		}
-		/* For odd row, negate every even column. */
-		for (j = 0; j < DCTSIZE; j += 2) {
-		  *dst_ptr++ = - *src_ptr++;
-		  *dst_ptr++ = *src_ptr++;
-		}
-	      }
-	    } else {
-	      /* Any remaining right-edge blocks are only mirrored vertically. */
-	      src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x];
-	      for (i = 0; i < DCTSIZE; i += 2) {
-		for (j = 0; j < DCTSIZE; j++)
-		  *dst_ptr++ = *src_ptr++;
-		for (j = 0; j < DCTSIZE; j++)
-		  *dst_ptr++ = - *src_ptr++;
-	      }
-	    }
-	  }
-	} else {
-	  /* Remaining rows are just mirrored horizontally. */
-	  src_row_ptr = src_buffer[offset_y];
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	    if (x_crop_blocks + dst_blk_x < comp_width) {
-	      /* Process the blocks that can be mirrored. */
-	      dst_ptr = dst_row_ptr[dst_blk_x];
-	      src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
-	      for (i = 0; i < DCTSIZE2; i += 2) {
-		*dst_ptr++ = *src_ptr++;
-		*dst_ptr++ = - *src_ptr++;
-	      }
-	    } else {
-	      /* Any remaining right-edge blocks are only copied. */
-	      jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
-			      dst_row_ptr + dst_blk_x,
-			      (JDIMENSION) 1);
-	    }
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-LOCAL(void)
-do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	       JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
-	       jvirt_barray_ptr *src_coef_arrays,
-	       jvirt_barray_ptr *dst_coef_arrays)
-/* Transverse transpose is equivalent to
- *   1. 180 degree rotation;
- *   2. Transposition;
- * or
- *   1. Horizontal mirroring;
- *   2. Transposition;
- *   3. Horizontal mirroring.
- * These steps are merged into a single processing routine.
- */
-{
-  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
-  JDIMENSION x_crop_blocks, y_crop_blocks;
-  int ci, i, j, offset_x, offset_y;
-  JBLOCKARRAY src_buffer, dst_buffer;
-  JCOEFPTR src_ptr, dst_ptr;
-  jpeg_component_info *compptr;
-
-  MCU_cols = srcinfo->output_height /
-    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
-  MCU_rows = srcinfo->output_width /
-    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
-
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    comp_width = MCU_cols * compptr->h_samp_factor;
-    comp_height = MCU_rows * compptr->v_samp_factor;
-    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
-    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
-    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
-      dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  if (x_crop_blocks + dst_blk_x < comp_width) {
-	    /* Block is within the mirrorable area. */
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       comp_width - x_crop_blocks - dst_blk_x -
-	       (JDIMENSION) compptr->h_samp_factor,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  } else {
-	    src_buffer = (*srcinfo->mem->access_virt_barray)
-	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	       dst_blk_x + x_crop_blocks,
-	       (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  }
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (y_crop_blocks + dst_blk_y < comp_height) {
-	      if (x_crop_blocks + dst_blk_x < comp_width) {
-		/* Block is within the mirrorable area. */
-		src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
-		  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		  }
-		  i++;
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  }
-		}
-	      } else {
-		/* Right-edge blocks are mirrored in y only */
-		src_ptr = src_buffer[offset_x]
-		  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		  }
-		}
-	      }
-	    } else {
-	      if (x_crop_blocks + dst_blk_x < comp_width) {
-		/* Bottom-edge blocks are mirrored in x only */
-		src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
-		  [dst_blk_y + offset_y + y_crop_blocks];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  i++;
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		}
-	      } else {
-		/* At lower right corner, just transpose, no mirroring */
-		src_ptr = src_buffer[offset_x]
-		  [dst_blk_y + offset_y + y_crop_blocks];
-		for (i = 0; i < DCTSIZE; i++)
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	      }
-	    }
-	  }
-	}
-      }
-    }
-  }
-}
-
-
-/* Parse an unsigned integer: subroutine for jtransform_parse_crop_spec.
- * Returns TRUE if valid integer found, FALSE if not.
- * *strptr is advanced over the digit string, and *result is set to its value.
- */
-
-LOCAL(boolean)
-jt_read_integer (const char ** strptr, JDIMENSION * result)
-{
-  const char * ptr = *strptr;
-  JDIMENSION val = 0;
-
-  for (; isdigit(*ptr); ptr++) {
-    val = val * 10 + (JDIMENSION) (*ptr - '0');
-  }
-  *result = val;
-  if (ptr == *strptr)
-    return FALSE;		/* oops, no digits */
-  *strptr = ptr;
-  return TRUE;
-}
-
-
-/* Parse a crop specification (written in X11 geometry style).
- * The routine returns TRUE if the spec string is valid, FALSE if not.
- *
- * The crop spec string should have the format
- *	<width>[f]x<height>[f]{+-}<xoffset>{+-}<yoffset>
- * where width, height, xoffset, and yoffset are unsigned integers.
- * Each of the elements can be omitted to indicate a default value.
- * (A weakness of this style is that it is not possible to omit xoffset
- * while specifying yoffset, since they look alike.)
- *
- * This code is loosely based on XParseGeometry from the X11 distribution.
- */
-
-GLOBAL(boolean)
-jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec)
-{
-  info->crop = FALSE;
-  info->crop_width_set = JCROP_UNSET;
-  info->crop_height_set = JCROP_UNSET;
-  info->crop_xoffset_set = JCROP_UNSET;
-  info->crop_yoffset_set = JCROP_UNSET;
-
-  if (isdigit(*spec)) {
-    /* fetch width */
-    if (! jt_read_integer(&spec, &info->crop_width))
-      return FALSE;
-    if (*spec == 'f' || *spec == 'F') {
-      spec++;
-      info->crop_width_set = JCROP_FORCE;
-    } else
-      info->crop_width_set = JCROP_POS;
-  }
-  if (*spec == 'x' || *spec == 'X') {
-    /* fetch height */
-    spec++;
-    if (! jt_read_integer(&spec, &info->crop_height))
-      return FALSE;
-    if (*spec == 'f' || *spec == 'F') {
-      spec++;
-      info->crop_height_set = JCROP_FORCE;
-    } else
-      info->crop_height_set = JCROP_POS;
-  }
-  if (*spec == '+' || *spec == '-') {
-    /* fetch xoffset */
-    info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
-    spec++;
-    if (! jt_read_integer(&spec, &info->crop_xoffset))
-      return FALSE;
-  }
-  if (*spec == '+' || *spec == '-') {
-    /* fetch yoffset */
-    info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
-    spec++;
-    if (! jt_read_integer(&spec, &info->crop_yoffset))
-      return FALSE;
-  }
-  /* We had better have gotten to the end of the string. */
-  if (*spec != '\0')
-    return FALSE;
-  info->crop = TRUE;
-  return TRUE;
-}
-
-
-/* Trim off any partial iMCUs on the indicated destination edge */
-
-LOCAL(void)
-trim_right_edge (jpeg_transform_info *info, JDIMENSION full_width)
-{
-  JDIMENSION MCU_cols;
-
-  MCU_cols = info->output_width / info->iMCU_sample_width;
-  if (MCU_cols > 0 && info->x_crop_offset + MCU_cols ==
-      full_width / info->iMCU_sample_width)
-    info->output_width = MCU_cols * info->iMCU_sample_width;
-}
-
-LOCAL(void)
-trim_bottom_edge (jpeg_transform_info *info, JDIMENSION full_height)
-{
-  JDIMENSION MCU_rows;
-
-  MCU_rows = info->output_height / info->iMCU_sample_height;
-  if (MCU_rows > 0 && info->y_crop_offset + MCU_rows ==
-      full_height / info->iMCU_sample_height)
-    info->output_height = MCU_rows * info->iMCU_sample_height;
-}
-
-
-/* Request any required workspace.
- *
- * This routine figures out the size that the output image will be
- * (which implies that all the transform parameters must be set before
- * it is called).
- *
- * We allocate the workspace virtual arrays from the source decompression
- * object, so that all the arrays (both the original data and the workspace)
- * will be taken into account while making memory management decisions.
- * Hence, this routine must be called after jpeg_read_header (which reads
- * the image dimensions) and before jpeg_read_coefficients (which realizes
- * the source's virtual arrays).
- *
- * This function returns FALSE right away if -perfect is given
- * and transformation is not perfect.  Otherwise returns TRUE.
- */
-
-GLOBAL(boolean)
-jtransform_request_workspace (j_decompress_ptr srcinfo,
-			      jpeg_transform_info *info)
-{
-  jvirt_barray_ptr *coef_arrays;
-  boolean need_workspace, transpose_it;
-  jpeg_component_info *compptr;
-  JDIMENSION xoffset, yoffset;
-  JDIMENSION width_in_iMCUs, height_in_iMCUs;
-  JDIMENSION width_in_blocks, height_in_blocks;
-  int ci, h_samp_factor, v_samp_factor;
-
-  /* Determine number of components in output image */
-  if (info->force_grayscale &&
-      srcinfo->jpeg_color_space == JCS_YCbCr &&
-      srcinfo->num_components == 3)
-    /* We'll only process the first component */
-    info->num_components = 1;
-  else
-    /* Process all the components */
-    info->num_components = srcinfo->num_components;
-
-  /* Compute output image dimensions and related values. */
-  jpeg_core_output_dimensions(srcinfo);
-
-  /* Return right away if -perfect is given and transformation is not perfect.
-   */
-  if (info->perfect) {
-    if (info->num_components == 1) {
-      if (!jtransform_perfect_transform(srcinfo->output_width,
-	  srcinfo->output_height,
-	  srcinfo->min_DCT_h_scaled_size,
-	  srcinfo->min_DCT_v_scaled_size,
-	  info->transform))
-	return FALSE;
-    } else {
-      if (!jtransform_perfect_transform(srcinfo->output_width,
-	  srcinfo->output_height,
-	  srcinfo->max_h_samp_factor * srcinfo->min_DCT_h_scaled_size,
-	  srcinfo->max_v_samp_factor * srcinfo->min_DCT_v_scaled_size,
-	  info->transform))
-	return FALSE;
-    }
-  }
-
-  /* If there is only one output component, force the iMCU size to be 1;
-   * else use the source iMCU size.  (This allows us to do the right thing
-   * when reducing color to grayscale, and also provides a handy way of
-   * cleaning up "funny" grayscale images whose sampling factors are not 1x1.)
-   */
-  switch (info->transform) {
-  case JXFORM_TRANSPOSE:
-  case JXFORM_TRANSVERSE:
-  case JXFORM_ROT_90:
-  case JXFORM_ROT_270:
-    info->output_width = srcinfo->output_height;
-    info->output_height = srcinfo->output_width;
-    if (info->num_components == 1) {
-      info->iMCU_sample_width = srcinfo->min_DCT_v_scaled_size;
-      info->iMCU_sample_height = srcinfo->min_DCT_h_scaled_size;
-    } else {
-      info->iMCU_sample_width =
-	srcinfo->max_v_samp_factor * srcinfo->min_DCT_v_scaled_size;
-      info->iMCU_sample_height =
-	srcinfo->max_h_samp_factor * srcinfo->min_DCT_h_scaled_size;
-    }
-    break;
-  default:
-    info->output_width = srcinfo->output_width;
-    info->output_height = srcinfo->output_height;
-    if (info->num_components == 1) {
-      info->iMCU_sample_width = srcinfo->min_DCT_h_scaled_size;
-      info->iMCU_sample_height = srcinfo->min_DCT_v_scaled_size;
-    } else {
-      info->iMCU_sample_width =
-	srcinfo->max_h_samp_factor * srcinfo->min_DCT_h_scaled_size;
-      info->iMCU_sample_height =
-	srcinfo->max_v_samp_factor * srcinfo->min_DCT_v_scaled_size;
-    }
-    break;
-  }
-
-  /* If cropping has been requested, compute the crop area's position and
-   * dimensions, ensuring that its upper left corner falls at an iMCU boundary.
-   */
-  if (info->crop) {
-    /* Insert default values for unset crop parameters */
-    if (info->crop_xoffset_set == JCROP_UNSET)
-      info->crop_xoffset = 0;	/* default to +0 */
-    if (info->crop_yoffset_set == JCROP_UNSET)
-      info->crop_yoffset = 0;	/* default to +0 */
-    if (info->crop_xoffset >= info->output_width ||
-	info->crop_yoffset >= info->output_height)
-      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-    if (info->crop_width_set == JCROP_UNSET)
-      info->crop_width = info->output_width - info->crop_xoffset;
-    if (info->crop_height_set == JCROP_UNSET)
-      info->crop_height = info->output_height - info->crop_yoffset;
-    /* Ensure parameters are valid */
-    if (info->crop_width <= 0 || info->crop_width > info->output_width ||
-	info->crop_height <= 0 || info->crop_height > info->output_height ||
-	info->crop_xoffset > info->output_width - info->crop_width ||
-	info->crop_yoffset > info->output_height - info->crop_height)
-      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-    /* Convert negative crop offsets into regular offsets */
-    if (info->crop_xoffset_set == JCROP_NEG)
-      xoffset = info->output_width - info->crop_width - info->crop_xoffset;
-    else
-      xoffset = info->crop_xoffset;
-    if (info->crop_yoffset_set == JCROP_NEG)
-      yoffset = info->output_height - info->crop_height - info->crop_yoffset;
-    else
-      yoffset = info->crop_yoffset;
-    /* Now adjust so that upper left corner falls at an iMCU boundary */
-    if (info->crop_width_set == JCROP_FORCE)
-      info->output_width = info->crop_width;
-    else
-      info->output_width =
-        info->crop_width + (xoffset % info->iMCU_sample_width);
-    if (info->crop_height_set == JCROP_FORCE)
-      info->output_height = info->crop_height;
-    else
-      info->output_height =
-        info->crop_height + (yoffset % info->iMCU_sample_height);
-    /* Save x/y offsets measured in iMCUs */
-    info->x_crop_offset = xoffset / info->iMCU_sample_width;
-    info->y_crop_offset = yoffset / info->iMCU_sample_height;
-  } else {
-    info->x_crop_offset = 0;
-    info->y_crop_offset = 0;
-  }
-
-  /* Figure out whether we need workspace arrays,
-   * and if so whether they are transposed relative to the source.
-   */
-  need_workspace = FALSE;
-  transpose_it = FALSE;
-  switch (info->transform) {
-  case JXFORM_NONE:
-    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
-      need_workspace = TRUE;
-    /* No workspace needed if neither cropping nor transforming */
-    break;
-  case JXFORM_FLIP_H:
-    if (info->trim)
-      trim_right_edge(info, srcinfo->output_width);
-    if (info->y_crop_offset != 0)
-      need_workspace = TRUE;
-    /* do_flip_h_no_crop doesn't need a workspace array */
-    break;
-  case JXFORM_FLIP_V:
-    if (info->trim)
-      trim_bottom_edge(info, srcinfo->output_height);
-    /* Need workspace arrays having same dimensions as source image. */
-    need_workspace = TRUE;
-    break;
-  case JXFORM_TRANSPOSE:
-    /* transpose does NOT have to trim anything */
-    /* Need workspace arrays having transposed dimensions. */
-    need_workspace = TRUE;
-    transpose_it = TRUE;
-    break;
-  case JXFORM_TRANSVERSE:
-    if (info->trim) {
-      trim_right_edge(info, srcinfo->output_height);
-      trim_bottom_edge(info, srcinfo->output_width);
-    }
-    /* Need workspace arrays having transposed dimensions. */
-    need_workspace = TRUE;
-    transpose_it = TRUE;
-    break;
-  case JXFORM_ROT_90:
-    if (info->trim)
-      trim_right_edge(info, srcinfo->output_height);
-    /* Need workspace arrays having transposed dimensions. */
-    need_workspace = TRUE;
-    transpose_it = TRUE;
-    break;
-  case JXFORM_ROT_180:
-    if (info->trim) {
-      trim_right_edge(info, srcinfo->output_width);
-      trim_bottom_edge(info, srcinfo->output_height);
-    }
-    /* Need workspace arrays having same dimensions as source image. */
-    need_workspace = TRUE;
-    break;
-  case JXFORM_ROT_270:
-    if (info->trim)
-      trim_bottom_edge(info, srcinfo->output_width);
-    /* Need workspace arrays having transposed dimensions. */
-    need_workspace = TRUE;
-    transpose_it = TRUE;
-    break;
-  }
-
-  /* Allocate workspace if needed.
-   * Note that we allocate arrays padded out to the next iMCU boundary,
-   * so that transform routines need not worry about missing edge blocks.
-   */
-  if (need_workspace) {
-    coef_arrays = (jvirt_barray_ptr *)
-      (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE,
-		SIZEOF(jvirt_barray_ptr) * info->num_components);
-    width_in_iMCUs = (JDIMENSION)
-      jdiv_round_up((long) info->output_width,
-		    (long) info->iMCU_sample_width);
-    height_in_iMCUs = (JDIMENSION)
-      jdiv_round_up((long) info->output_height,
-		    (long) info->iMCU_sample_height);
-    for (ci = 0; ci < info->num_components; ci++) {
-      compptr = srcinfo->comp_info + ci;
-      if (info->num_components == 1) {
-	/* we're going to force samp factors to 1x1 in this case */
-	h_samp_factor = v_samp_factor = 1;
-      } else if (transpose_it) {
-	h_samp_factor = compptr->v_samp_factor;
-	v_samp_factor = compptr->h_samp_factor;
-      } else {
-	h_samp_factor = compptr->h_samp_factor;
-	v_samp_factor = compptr->v_samp_factor;
-      }
-      width_in_blocks = width_in_iMCUs * h_samp_factor;
-      height_in_blocks = height_in_iMCUs * v_samp_factor;
-      coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
-	((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
-	 width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor);
-    }
-    info->workspace_coef_arrays = coef_arrays;
-  } else
-    info->workspace_coef_arrays = NULL;
-
-  return TRUE;
-}
-
-
-/* Transpose destination image parameters */
-
-LOCAL(void)
-transpose_critical_parameters (j_compress_ptr dstinfo)
-{
-  int tblno, i, j, ci, itemp;
-  jpeg_component_info *compptr;
-  JQUANT_TBL *qtblptr;
-  JDIMENSION jtemp;
-  UINT16 qtemp;
-
-  /* Transpose image dimensions */
-  jtemp = dstinfo->image_width;
-  dstinfo->image_width = dstinfo->image_height;
-  dstinfo->image_height = jtemp;
-  itemp = dstinfo->min_DCT_h_scaled_size;
-  dstinfo->min_DCT_h_scaled_size = dstinfo->min_DCT_v_scaled_size;
-  dstinfo->min_DCT_v_scaled_size = itemp;
-
-  /* Transpose sampling factors */
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    compptr = dstinfo->comp_info + ci;
-    itemp = compptr->h_samp_factor;
-    compptr->h_samp_factor = compptr->v_samp_factor;
-    compptr->v_samp_factor = itemp;
-  }
-
-  /* Transpose quantization tables */
-  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
-    qtblptr = dstinfo->quant_tbl_ptrs[tblno];
-    if (qtblptr != NULL) {
-      for (i = 0; i < DCTSIZE; i++) {
-	for (j = 0; j < i; j++) {
-	  qtemp = qtblptr->quantval[i*DCTSIZE+j];
-	  qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i];
-	  qtblptr->quantval[j*DCTSIZE+i] = qtemp;
-	}
-      }
-    }
-  }
-}
-
-
-/* Adjust Exif image parameters.
- *
- * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible.
- */
-
-LOCAL(void)
-adjust_exif_parameters (JOCTET FAR * data, unsigned int length,
-			JDIMENSION new_width, JDIMENSION new_height)
-{
-  boolean is_motorola; /* Flag for byte order */
-  unsigned int number_of_tags, tagnum;
-  unsigned int firstoffset, offset;
-  JDIMENSION new_value;
-
-  if (length < 12) return; /* Length of an IFD entry */
-
-  /* Discover byte order */
-  if (GETJOCTET(data[0]) == 0x49 && GETJOCTET(data[1]) == 0x49)
-    is_motorola = FALSE;
-  else if (GETJOCTET(data[0]) == 0x4D && GETJOCTET(data[1]) == 0x4D)
-    is_motorola = TRUE;
-  else
-    return;
-
-  /* Check Tag Mark */
-  if (is_motorola) {
-    if (GETJOCTET(data[2]) != 0) return;
-    if (GETJOCTET(data[3]) != 0x2A) return;
-  } else {
-    if (GETJOCTET(data[3]) != 0) return;
-    if (GETJOCTET(data[2]) != 0x2A) return;
-  }
-
-  /* Get first IFD offset (offset to IFD0) */
-  if (is_motorola) {
-    if (GETJOCTET(data[4]) != 0) return;
-    if (GETJOCTET(data[5]) != 0) return;
-    firstoffset = GETJOCTET(data[6]);
-    firstoffset <<= 8;
-    firstoffset += GETJOCTET(data[7]);
-  } else {
-    if (GETJOCTET(data[7]) != 0) return;
-    if (GETJOCTET(data[6]) != 0) return;
-    firstoffset = GETJOCTET(data[5]);
-    firstoffset <<= 8;
-    firstoffset += GETJOCTET(data[4]);
-  }
-  if (firstoffset > length - 2) return; /* check end of data segment */
-
-  /* Get the number of directory entries contained in this IFD */
-  if (is_motorola) {
-    number_of_tags = GETJOCTET(data[firstoffset]);
-    number_of_tags <<= 8;
-    number_of_tags += GETJOCTET(data[firstoffset+1]);
-  } else {
-    number_of_tags = GETJOCTET(data[firstoffset+1]);
-    number_of_tags <<= 8;
-    number_of_tags += GETJOCTET(data[firstoffset]);
-  }
-  if (number_of_tags == 0) return;
-  firstoffset += 2;
-
-  /* Search for ExifSubIFD offset Tag in IFD0 */
-  for (;;) {
-    if (firstoffset > length - 12) return; /* check end of data segment */
-    /* Get Tag number */
-    if (is_motorola) {
-      tagnum = GETJOCTET(data[firstoffset]);
-      tagnum <<= 8;
-      tagnum += GETJOCTET(data[firstoffset+1]);
-    } else {
-      tagnum = GETJOCTET(data[firstoffset+1]);
-      tagnum <<= 8;
-      tagnum += GETJOCTET(data[firstoffset]);
-    }
-    if (tagnum == 0x8769) break; /* found ExifSubIFD offset Tag */
-    if (--number_of_tags == 0) return;
-    firstoffset += 12;
-  }
-
-  /* Get the ExifSubIFD offset */
-  if (is_motorola) {
-    if (GETJOCTET(data[firstoffset+8]) != 0) return;
-    if (GETJOCTET(data[firstoffset+9]) != 0) return;
-    offset = GETJOCTET(data[firstoffset+10]);
-    offset <<= 8;
-    offset += GETJOCTET(data[firstoffset+11]);
-  } else {
-    if (GETJOCTET(data[firstoffset+11]) != 0) return;
-    if (GETJOCTET(data[firstoffset+10]) != 0) return;
-    offset = GETJOCTET(data[firstoffset+9]);
-    offset <<= 8;
-    offset += GETJOCTET(data[firstoffset+8]);
-  }
-  if (offset > length - 2) return; /* check end of data segment */
-
-  /* Get the number of directory entries contained in this SubIFD */
-  if (is_motorola) {
-    number_of_tags = GETJOCTET(data[offset]);
-    number_of_tags <<= 8;
-    number_of_tags += GETJOCTET(data[offset+1]);
-  } else {
-    number_of_tags = GETJOCTET(data[offset+1]);
-    number_of_tags <<= 8;
-    number_of_tags += GETJOCTET(data[offset]);
-  }
-  if (number_of_tags < 2) return;
-  offset += 2;
-
-  /* Search for ExifImageWidth and ExifImageHeight Tags in this SubIFD */
-  do {
-    if (offset > length - 12) return; /* check end of data segment */
-    /* Get Tag number */
-    if (is_motorola) {
-      tagnum = GETJOCTET(data[offset]);
-      tagnum <<= 8;
-      tagnum += GETJOCTET(data[offset+1]);
-    } else {
-      tagnum = GETJOCTET(data[offset+1]);
-      tagnum <<= 8;
-      tagnum += GETJOCTET(data[offset]);
-    }
-    if (tagnum == 0xA002 || tagnum == 0xA003) {
-      if (tagnum == 0xA002)
-	new_value = new_width; /* ExifImageWidth Tag */
-      else
-	new_value = new_height; /* ExifImageHeight Tag */
-      if (is_motorola) {
-	data[offset+2] = 0; /* Format = unsigned long (4 octets) */
-	data[offset+3] = 4;
-	data[offset+4] = 0; /* Number Of Components = 1 */
-	data[offset+5] = 0;
-	data[offset+6] = 0;
-	data[offset+7] = 1;
-	data[offset+8] = 0;
-	data[offset+9] = 0;
-	data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF);
-	data[offset+11] = (JOCTET)(new_value & 0xFF);
-      } else {
-	data[offset+2] = 4; /* Format = unsigned long (4 octets) */
-	data[offset+3] = 0;
-	data[offset+4] = 1; /* Number Of Components = 1 */
-	data[offset+5] = 0;
-	data[offset+6] = 0;
-	data[offset+7] = 0;
-	data[offset+8] = (JOCTET)(new_value & 0xFF);
-	data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF);
-	data[offset+10] = 0;
-	data[offset+11] = 0;
-      }
-    }
-    offset += 12;
-  } while (--number_of_tags);
-}
-
-
-/* Adjust output image parameters as needed.
- *
- * This must be called after jpeg_copy_critical_parameters()
- * and before jpeg_write_coefficients().
- *
- * The return value is the set of virtual coefficient arrays to be written
- * (either the ones allocated by jtransform_request_workspace, or the
- * original source data arrays).  The caller will need to pass this value
- * to jpeg_write_coefficients().
- */
-
-GLOBAL(jvirt_barray_ptr *)
-jtransform_adjust_parameters (j_decompress_ptr srcinfo,
-			      j_compress_ptr dstinfo,
-			      jvirt_barray_ptr *src_coef_arrays,
-			      jpeg_transform_info *info)
-{
-  /* If force-to-grayscale is requested, adjust destination parameters */
-  if (info->force_grayscale) {
-    /* First, ensure we have YCbCr or grayscale data, and that the source's
-     * Y channel is full resolution.  (No reasonable person would make Y
-     * be less than full resolution, so actually coping with that case
-     * isn't worth extra code space.  But we check it to avoid crashing.)
-     */
-    if (((dstinfo->jpeg_color_space == JCS_YCbCr &&
-	  dstinfo->num_components == 3) ||
-	 (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
-	  dstinfo->num_components == 1)) &&
-	srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor &&
-	srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) {
-      /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
-       * properly.  Among other things, it sets the target h_samp_factor &
-       * v_samp_factor to 1, which typically won't match the source.
-       * We have to preserve the source's quantization table number, however.
-       */
-      int sv_quant_tbl_no = dstinfo->comp_info[0].quant_tbl_no;
-      jpeg_set_colorspace(dstinfo, JCS_GRAYSCALE);
-      dstinfo->comp_info[0].quant_tbl_no = sv_quant_tbl_no;
-    } else {
-      /* Sorry, can't do it */
-      ERREXIT(dstinfo, JERR_CONVERSION_NOTIMPL);
-    }
-  } else if (info->num_components == 1) {
-    /* For a single-component source, we force the destination sampling factors
-     * to 1x1, with or without force_grayscale.  This is useful because some
-     * decoders choke on grayscale images with other sampling factors.
-     */
-    dstinfo->comp_info[0].h_samp_factor = 1;
-    dstinfo->comp_info[0].v_samp_factor = 1;
-  }
-
-  /* Correct the destination's image dimensions as necessary
-   * for rotate/flip, resize, and crop operations.
-   */
-  dstinfo->jpeg_width = info->output_width;
-  dstinfo->jpeg_height = info->output_height;
-
-  /* Transpose destination image parameters */
-  switch (info->transform) {
-  case JXFORM_TRANSPOSE:
-  case JXFORM_TRANSVERSE:
-  case JXFORM_ROT_90:
-  case JXFORM_ROT_270:
-    transpose_critical_parameters(dstinfo);
-    break;
-  default:
-    break;
-  }
-
-  /* Adjust Exif properties */
-  if (srcinfo->marker_list != NULL &&
-      srcinfo->marker_list->marker == JPEG_APP0+1 &&
-      srcinfo->marker_list->data_length >= 6 &&
-      GETJOCTET(srcinfo->marker_list->data[0]) == 0x45 &&
-      GETJOCTET(srcinfo->marker_list->data[1]) == 0x78 &&
-      GETJOCTET(srcinfo->marker_list->data[2]) == 0x69 &&
-      GETJOCTET(srcinfo->marker_list->data[3]) == 0x66 &&
-      GETJOCTET(srcinfo->marker_list->data[4]) == 0 &&
-      GETJOCTET(srcinfo->marker_list->data[5]) == 0) {
-    /* Suppress output of JFIF marker */
-    dstinfo->write_JFIF_header = FALSE;
-    /* Adjust Exif image parameters */
-    if (dstinfo->jpeg_width != srcinfo->image_width ||
-	dstinfo->jpeg_height != srcinfo->image_height)
-      /* Align data segment to start of TIFF structure for parsing */
-      adjust_exif_parameters(srcinfo->marker_list->data + 6,
-	srcinfo->marker_list->data_length - 6,
-	dstinfo->jpeg_width, dstinfo->jpeg_height);
-  }
-
-  /* Return the appropriate output data set */
-  if (info->workspace_coef_arrays != NULL)
-    return info->workspace_coef_arrays;
-  return src_coef_arrays;
-}
-
-
-/* Execute the actual transformation, if any.
- *
- * This must be called *after* jpeg_write_coefficients, because it depends
- * on jpeg_write_coefficients to have computed subsidiary values such as
- * the per-component width and height fields in the destination object.
- *
- * Note that some transformations will modify the source data arrays!
- */
-
-GLOBAL(void)
-jtransform_execute_transform (j_decompress_ptr srcinfo,
-			      j_compress_ptr dstinfo,
-			      jvirt_barray_ptr *src_coef_arrays,
-			      jpeg_transform_info *info)
-{
-  jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays;
-
-  /* Note: conditions tested here should match those in switch statement
-   * in jtransform_request_workspace()
-   */
-  switch (info->transform) {
-  case JXFORM_NONE:
-    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
-      do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_FLIP_H:
-    if (info->y_crop_offset != 0)
-      do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		src_coef_arrays, dst_coef_arrays);
-    else
-      do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset,
-			src_coef_arrays);
-    break;
-  case JXFORM_FLIP_V:
-    do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_TRANSPOSE:
-    do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		 src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_TRANSVERSE:
-    do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-		  src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_ROT_90:
-    do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	      src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_ROT_180:
-    do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	       src_coef_arrays, dst_coef_arrays);
-    break;
-  case JXFORM_ROT_270:
-    do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
-	       src_coef_arrays, dst_coef_arrays);
-    break;
-  }
-}
-
-/* jtransform_perfect_transform
- *
- * Determine whether lossless transformation is perfectly
- * possible for a specified image and transformation.
- *
- * Inputs:
- *   image_width, image_height: source image dimensions.
- *   MCU_width, MCU_height: pixel dimensions of MCU.
- *   transform: transformation identifier.
- * Parameter sources from initialized jpeg_struct
- * (after reading source header):
- *   image_width = cinfo.image_width
- *   image_height = cinfo.image_height
- *   MCU_width = cinfo.max_h_samp_factor * cinfo.block_size
- *   MCU_height = cinfo.max_v_samp_factor * cinfo.block_size
- * Result:
- *   TRUE = perfect transformation possible
- *   FALSE = perfect transformation not possible
- *           (may use custom action then)
- */
-
-GLOBAL(boolean)
-jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
-			     int MCU_width, int MCU_height,
-			     JXFORM_CODE transform)
-{
-  boolean result = TRUE; /* initialize TRUE */
-
-  switch (transform) {
-  case JXFORM_FLIP_H:
-  case JXFORM_ROT_270:
-    if (image_width % (JDIMENSION) MCU_width)
-      result = FALSE;
-    break;
-  case JXFORM_FLIP_V:
-  case JXFORM_ROT_90:
-    if (image_height % (JDIMENSION) MCU_height)
-      result = FALSE;
-    break;
-  case JXFORM_TRANSVERSE:
-  case JXFORM_ROT_180:
-    if (image_width % (JDIMENSION) MCU_width)
-      result = FALSE;
-    if (image_height % (JDIMENSION) MCU_height)
-      result = FALSE;
-    break;
-  default:
-    break;
-  }
-
-  return result;
-}
-
-#endif /* TRANSFORMS_SUPPORTED */
-
-
-/* Setup decompression object to save desired markers in memory.
- * This must be called before jpeg_read_header() to have the desired effect.
- */
-
-GLOBAL(void)
-jcopy_markers_setup (j_decompress_ptr srcinfo, JCOPY_OPTION option)
-{
-#ifdef SAVE_MARKERS_SUPPORTED
-  int m;
-
-  /* Save comments except under NONE option */
-  if (option != JCOPYOPT_NONE) {
-    jpeg_save_markers(srcinfo, JPEG_COM, 0xFFFF);
-  }
-  /* Save all types of APPn markers iff ALL option */
-  if (option == JCOPYOPT_ALL) {
-    for (m = 0; m < 16; m++)
-      jpeg_save_markers(srcinfo, JPEG_APP0 + m, 0xFFFF);
-  }
-#endif /* SAVE_MARKERS_SUPPORTED */
-}
-
-/* Copy markers saved in the given source object to the destination object.
- * This should be called just after jpeg_start_compress() or
- * jpeg_write_coefficients().
- * Note that those routines will have written the SOI, and also the
- * JFIF APP0 or Adobe APP14 markers if selected.
- */
-
-GLOBAL(void)
-jcopy_markers_execute (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		       JCOPY_OPTION option)
-{
-  jpeg_saved_marker_ptr marker;
-
-  /* In the current implementation, we don't actually need to examine the
-   * option flag here; we just copy everything that got saved.
-   * But to avoid confusion, we do not output JFIF and Adobe APP14 markers
-   * if the encoder library already wrote one.
-   */
-  for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) {
-    if (dstinfo->write_JFIF_header &&
-	marker->marker == JPEG_APP0 &&
-	marker->data_length >= 5 &&
-	GETJOCTET(marker->data[0]) == 0x4A &&
-	GETJOCTET(marker->data[1]) == 0x46 &&
-	GETJOCTET(marker->data[2]) == 0x49 &&
-	GETJOCTET(marker->data[3]) == 0x46 &&
-	GETJOCTET(marker->data[4]) == 0)
-      continue;			/* reject duplicate JFIF */
-    if (dstinfo->write_Adobe_marker &&
-	marker->marker == JPEG_APP0+14 &&
-	marker->data_length >= 5 &&
-	GETJOCTET(marker->data[0]) == 0x41 &&
-	GETJOCTET(marker->data[1]) == 0x64 &&
-	GETJOCTET(marker->data[2]) == 0x6F &&
-	GETJOCTET(marker->data[3]) == 0x62 &&
-	GETJOCTET(marker->data[4]) == 0x65)
-      continue;			/* reject duplicate Adobe */
-#ifdef NEED_FAR_POINTERS
-    /* We could use jpeg_write_marker if the data weren't FAR... */
-    {
-      unsigned int i;
-      jpeg_write_m_header(dstinfo, marker->marker, marker->data_length);
-      for (i = 0; i < marker->data_length; i++)
-	jpeg_write_m_byte(dstinfo, marker->data[i]);
-    }
-#else
-    jpeg_write_marker(dstinfo, marker->marker,
-		      marker->data, marker->data_length);
-#endif
-  }
-}
+/*
+ * transupp.c
+ *
+ * Copyright (C) 1997-2012, Thomas G. Lane, Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains image transformation routines and other utility code
+ * used by the jpegtran sample application.  These are NOT part of the core
+ * JPEG library.  But we keep these routines separate from jpegtran.c to
+ * ease the task of maintaining jpegtran-like programs that have other user
+ * interfaces.
+ */
+
+/* Although this file really shouldn't have access to the library internals,
+ * it's helpful to let it call jround_up() and jcopy_block_row().
+ */
+#define JPEG_INTERNALS
+
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "transupp.h"		/* My own external interface */
+#include <ctype.h>		/* to declare isdigit() */
+
+
+#if TRANSFORMS_SUPPORTED
+
+/*
+ * Lossless image transformation routines.  These routines work on DCT
+ * coefficient arrays and thus do not require any lossy decompression
+ * or recompression of the image.
+ * Thanks to Guido Vollbeding for the initial design and code of this feature,
+ * and to Ben Jackson for introducing the cropping feature.
+ *
+ * Horizontal flipping is done in-place, using a single top-to-bottom
+ * pass through the virtual source array.  It will thus be much the
+ * fastest option for images larger than main memory.
+ *
+ * The other routines require a set of destination virtual arrays, so they
+ * need twice as much memory as jpegtran normally does.  The destination
+ * arrays are always written in normal scan order (top to bottom) because
+ * the virtual array manager expects this.  The source arrays will be scanned
+ * in the corresponding order, which means multiple passes through the source
+ * arrays for most of the transforms.  That could result in much thrashing
+ * if the image is larger than main memory.
+ *
+ * If cropping or trimming is involved, the destination arrays may be smaller
+ * than the source arrays.  Note it is not possible to do horizontal flip
+ * in-place when a nonzero Y crop offset is specified, since we'd have to move
+ * data from one block row to another but the virtual array manager doesn't
+ * guarantee we can touch more than one row at a time.  So in that case,
+ * we have to use a separate destination array.
+ *
+ * Some notes about the operating environment of the individual transform
+ * routines:
+ * 1. Both the source and destination virtual arrays are allocated from the
+ *    source JPEG object, and therefore should be manipulated by calling the
+ *    source's memory manager.
+ * 2. The destination's component count should be used.  It may be smaller
+ *    than the source's when forcing to grayscale.
+ * 3. Likewise the destination's sampling factors should be used.  When
+ *    forcing to grayscale the destination's sampling factors will be all 1,
+ *    and we may as well take that as the effective iMCU size.
+ * 4. When "trim" is in effect, the destination's dimensions will be the
+ *    trimmed values but the source's will be untrimmed.
+ * 5. When "crop" is in effect, the destination's dimensions will be the
+ *    cropped values but the source's will be uncropped.  Each transform
+ *    routine is responsible for picking up source data starting at the
+ *    correct X and Y offset for the crop region.  (The X and Y offsets
+ *    passed to the transform routines are measured in iMCU blocks of the
+ *    destination.)
+ * 6. All the routines assume that the source and destination buffers are
+ *    padded out to a full iMCU boundary.  This is true, although for the
+ *    source buffer it is an undocumented property of jdcoefct.c.
+ */
+
+
+LOCAL(void)
+do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	 jvirt_barray_ptr *src_coef_arrays,
+	 jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop. */
+{
+  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  /* We simply have to copy the right amount of data (the destination's
+   * image size) starting at the given X and Y offsets in the source.
+   */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      src_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	 dst_blk_y + y_crop_blocks,
+	 (JDIMENSION) compptr->v_samp_factor, FALSE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+			dst_buffer[offset_y],
+			compptr->width_in_blocks);
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+		   JDIMENSION x_crop_offset,
+		   jvirt_barray_ptr *src_coef_arrays)
+/* Horizontal flip; done in-place, so no separate dest array is required.
+ * NB: this only works when y_crop_offset is zero.
+ */
+{
+  JDIMENSION MCU_cols, comp_width, blk_x, blk_y, x_crop_blocks;
+  int ci, k, offset_y;
+  JBLOCKARRAY buffer;
+  JCOEFPTR ptr1, ptr2;
+  JCOEF temp1, temp2;
+  jpeg_component_info *compptr;
+
+  /* Horizontal mirroring of DCT blocks is accomplished by swapping
+   * pairs of blocks in-place.  Within a DCT block, we perform horizontal
+   * mirroring by changing the signs of odd-numbered columns.
+   * Partial iMCUs at the right edge are left untouched.
+   */
+  MCU_cols = srcinfo->output_width /
+    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    for (blk_y = 0; blk_y < compptr->height_in_blocks;
+	 blk_y += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	/* Do the mirroring */
+	for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
+	  ptr1 = buffer[offset_y][blk_x];
+	  ptr2 = buffer[offset_y][comp_width - blk_x - 1];
+	  /* this unrolled loop doesn't need to know which row it's on... */
+	  for (k = 0; k < DCTSIZE2; k += 2) {
+	    temp1 = *ptr1;	/* swap even column */
+	    temp2 = *ptr2;
+	    *ptr1++ = temp2;
+	    *ptr2++ = temp1;
+	    temp1 = *ptr1;	/* swap odd column with sign change */
+	    temp2 = *ptr2;
+	    *ptr1++ = -temp2;
+	    *ptr2++ = -temp1;
+	  }
+	}
+	if (x_crop_blocks > 0) {
+	  /* Now left-justify the portion of the data to be kept.
+	   * We can't use a single jcopy_block_row() call because that routine
+	   * depends on memcpy(), whose behavior is unspecified for overlapping
+	   * source and destination areas.  Sigh.
+	   */
+	  for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+	    jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks,
+			    buffer[offset_y] + blk_x,
+			    (JDIMENSION) 1);
+	  }
+	}
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	   jvirt_barray_ptr *src_coef_arrays,
+	   jvirt_barray_ptr *dst_coef_arrays)
+/* Horizontal flip in general cropping case */
+{
+  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, k, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Here we must output into a separate array because we can't touch
+   * different rows of a single virtual array simultaneously.  Otherwise,
+   * this is essentially the same as the routine above.
+   */
+  MCU_cols = srcinfo->output_width /
+    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      src_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	 dst_blk_y + y_crop_blocks,
+	 (JDIMENSION) compptr->v_samp_factor, FALSE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	dst_row_ptr = dst_buffer[offset_y];
+	src_row_ptr = src_buffer[offset_y];
+	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+	  if (x_crop_blocks + dst_blk_x < comp_width) {
+	    /* Do the mirrorable blocks */
+	    dst_ptr = dst_row_ptr[dst_blk_x];
+	    src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+	    /* this unrolled loop doesn't need to know which row it's on... */
+	    for (k = 0; k < DCTSIZE2; k += 2) {
+	      *dst_ptr++ = *src_ptr++;	 /* copy even column */
+	      *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
+	    }
+	  } else {
+	    /* Copy last partial block(s) verbatim */
+	    jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+			    dst_row_ptr + dst_blk_x,
+			    (JDIMENSION) 1);
+	  }
+	}
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	   jvirt_barray_ptr *src_coef_arrays,
+	   jvirt_barray_ptr *dst_coef_arrays)
+/* Vertical flip */
+{
+  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* We output into a separate array because we can't touch different
+   * rows of the source virtual array simultaneously.  Otherwise, this
+   * is a pretty straightforward analog of horizontal flip.
+   * Within a DCT block, vertical mirroring is done by changing the signs
+   * of odd-numbered rows.
+   * Partial iMCUs at the bottom edge are copied verbatim.
+   */
+  MCU_rows = srcinfo->output_height /
+    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      if (y_crop_blocks + dst_blk_y < comp_height) {
+	/* Row is within the mirrorable area. */
+	src_buffer = (*srcinfo->mem->access_virt_barray)
+	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	   comp_height - y_crop_blocks - dst_blk_y -
+	   (JDIMENSION) compptr->v_samp_factor,
+	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+      } else {
+	/* Bottom-edge blocks will be copied verbatim. */
+	src_buffer = (*srcinfo->mem->access_virt_barray)
+	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	   dst_blk_y + y_crop_blocks,
+	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	if (y_crop_blocks + dst_blk_y < comp_height) {
+	  /* Row is within the mirrorable area. */
+	  dst_row_ptr = dst_buffer[offset_y];
+	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+	  src_row_ptr += x_crop_blocks;
+	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+	       dst_blk_x++) {
+	    dst_ptr = dst_row_ptr[dst_blk_x];
+	    src_ptr = src_row_ptr[dst_blk_x];
+	    for (i = 0; i < DCTSIZE; i += 2) {
+	      /* copy even row */
+	      for (j = 0; j < DCTSIZE; j++)
+		*dst_ptr++ = *src_ptr++;
+	      /* copy odd row with sign change */
+	      for (j = 0; j < DCTSIZE; j++)
+		*dst_ptr++ = - *src_ptr++;
+	    }
+	  }
+	} else {
+	  /* Just copy row verbatim. */
+	  jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+			  dst_buffer[offset_y],
+			  compptr->width_in_blocks);
+	}
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	      JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	      jvirt_barray_ptr *src_coef_arrays,
+	      jvirt_barray_ptr *dst_coef_arrays)
+/* Transpose source into destination */
+{
+  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Transposing pixels within a block just requires transposing the
+   * DCT coefficients.
+   * Partial iMCUs at the edges require no special treatment; we simply
+   * process all the available DCT blocks for every component.
+   */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+	     dst_blk_x += compptr->h_samp_factor) {
+	  src_buffer = (*srcinfo->mem->access_virt_barray)
+	    ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	     dst_blk_x + x_crop_blocks,
+	     (JDIMENSION) compptr->h_samp_factor, FALSE);
+	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+	    src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks];
+	    for (i = 0; i < DCTSIZE; i++)
+	      for (j = 0; j < DCTSIZE; j++)
+		dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+	  }
+	}
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	   JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	   jvirt_barray_ptr *src_coef_arrays,
+	   jvirt_barray_ptr *dst_coef_arrays)
+/* 90 degree rotation is equivalent to
+ *   1. Transposing the image;
+ *   2. Horizontal mirroring.
+ * These two steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Because of the horizontal mirror step, we can't process partial iMCUs
+   * at the (output) right edge properly.  They just get transposed and
+   * not mirrored.
+   */
+  MCU_cols = srcinfo->output_height /
+    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+	     dst_blk_x += compptr->h_samp_factor) {
+	  if (x_crop_blocks + dst_blk_x < comp_width) {
+	    /* Block is within the mirrorable area. */
+	    src_buffer = (*srcinfo->mem->access_virt_barray)
+	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	       comp_width - x_crop_blocks - dst_blk_x -
+	       (JDIMENSION) compptr->h_samp_factor,
+	       (JDIMENSION) compptr->h_samp_factor, FALSE);
+	  } else {
+	    /* Edge blocks are transposed but not mirrored. */
+	    src_buffer = (*srcinfo->mem->access_virt_barray)
+	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	       dst_blk_x + x_crop_blocks,
+	       (JDIMENSION) compptr->h_samp_factor, FALSE);
+	  }
+	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+	    if (x_crop_blocks + dst_blk_x < comp_width) {
+	      /* Block is within the mirrorable area. */
+	      src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+		[dst_blk_y + offset_y + y_crop_blocks];
+	      for (i = 0; i < DCTSIZE; i++) {
+		for (j = 0; j < DCTSIZE; j++)
+		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+		i++;
+		for (j = 0; j < DCTSIZE; j++)
+		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+	      }
+	    } else {
+	      /* Edge blocks are transposed but not mirrored. */
+	      src_ptr = src_buffer[offset_x]
+		[dst_blk_y + offset_y + y_crop_blocks];
+	      for (i = 0; i < DCTSIZE; i++)
+		for (j = 0; j < DCTSIZE; j++)
+		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+	    }
+	  }
+	}
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	    jvirt_barray_ptr *src_coef_arrays,
+	    jvirt_barray_ptr *dst_coef_arrays)
+/* 270 degree rotation is equivalent to
+ *   1. Horizontal mirroring;
+ *   2. Transposing the image.
+ * These two steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Because of the horizontal mirror step, we can't process partial iMCUs
+   * at the (output) bottom edge properly.  They just get transposed and
+   * not mirrored.
+   */
+  MCU_rows = srcinfo->output_width /
+    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+	     dst_blk_x += compptr->h_samp_factor) {
+	  src_buffer = (*srcinfo->mem->access_virt_barray)
+	    ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	     dst_blk_x + x_crop_blocks,
+	     (JDIMENSION) compptr->h_samp_factor, FALSE);
+	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+	    if (y_crop_blocks + dst_blk_y < comp_height) {
+	      /* Block is within the mirrorable area. */
+	      src_ptr = src_buffer[offset_x]
+		[comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+	      for (i = 0; i < DCTSIZE; i++) {
+		for (j = 0; j < DCTSIZE; j++) {
+		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+		  j++;
+		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+		}
+	      }
+	    } else {
+	      /* Edge blocks are transposed but not mirrored. */
+	      src_ptr = src_buffer[offset_x]
+		[dst_blk_y + offset_y + y_crop_blocks];
+	      for (i = 0; i < DCTSIZE; i++)
+		for (j = 0; j < DCTSIZE; j++)
+		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+	    }
+	  }
+	}
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	    jvirt_barray_ptr *src_coef_arrays,
+	    jvirt_barray_ptr *dst_coef_arrays)
+/* 180 degree rotation is equivalent to
+ *   1. Vertical mirroring;
+ *   2. Horizontal mirroring.
+ * These two steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      if (y_crop_blocks + dst_blk_y < comp_height) {
+	/* Row is within the vertically mirrorable area. */
+	src_buffer = (*srcinfo->mem->access_virt_barray)
+	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	   comp_height - y_crop_blocks - dst_blk_y -
+	   (JDIMENSION) compptr->v_samp_factor,
+	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+      } else {
+	/* Bottom-edge rows are only mirrored horizontally. */
+	src_buffer = (*srcinfo->mem->access_virt_barray)
+	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	   dst_blk_y + y_crop_blocks,
+	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	dst_row_ptr = dst_buffer[offset_y];
+	if (y_crop_blocks + dst_blk_y < comp_height) {
+	  /* Row is within the mirrorable area. */
+	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+	    dst_ptr = dst_row_ptr[dst_blk_x];
+	    if (x_crop_blocks + dst_blk_x < comp_width) {
+	      /* Process the blocks that can be mirrored both ways. */
+	      src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+	      for (i = 0; i < DCTSIZE; i += 2) {
+		/* For even row, negate every odd column. */
+		for (j = 0; j < DCTSIZE; j += 2) {
+		  *dst_ptr++ = *src_ptr++;
+		  *dst_ptr++ = - *src_ptr++;
+		}
+		/* For odd row, negate every even column. */
+		for (j = 0; j < DCTSIZE; j += 2) {
+		  *dst_ptr++ = - *src_ptr++;
+		  *dst_ptr++ = *src_ptr++;
+		}
+	      }
+	    } else {
+	      /* Any remaining right-edge blocks are only mirrored vertically. */
+	      src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x];
+	      for (i = 0; i < DCTSIZE; i += 2) {
+		for (j = 0; j < DCTSIZE; j++)
+		  *dst_ptr++ = *src_ptr++;
+		for (j = 0; j < DCTSIZE; j++)
+		  *dst_ptr++ = - *src_ptr++;
+	      }
+	    }
+	  }
+	} else {
+	  /* Remaining rows are just mirrored horizontally. */
+	  src_row_ptr = src_buffer[offset_y];
+	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+	    if (x_crop_blocks + dst_blk_x < comp_width) {
+	      /* Process the blocks that can be mirrored. */
+	      dst_ptr = dst_row_ptr[dst_blk_x];
+	      src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+	      for (i = 0; i < DCTSIZE2; i += 2) {
+		*dst_ptr++ = *src_ptr++;
+		*dst_ptr++ = - *src_ptr++;
+	      }
+	    } else {
+	      /* Any remaining right-edge blocks are only copied. */
+	      jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+			      dst_row_ptr + dst_blk_x,
+			      (JDIMENSION) 1);
+	    }
+	  }
+	}
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	       JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	       jvirt_barray_ptr *src_coef_arrays,
+	       jvirt_barray_ptr *dst_coef_arrays)
+/* Transverse transpose is equivalent to
+ *   1. 180 degree rotation;
+ *   2. Transposition;
+ * or
+ *   1. Horizontal mirroring;
+ *   2. Transposition;
+ *   3. Horizontal mirroring.
+ * These steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_height /
+    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_width /
+    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+	     dst_blk_x += compptr->h_samp_factor) {
+	  if (x_crop_blocks + dst_blk_x < comp_width) {
+	    /* Block is within the mirrorable area. */
+	    src_buffer = (*srcinfo->mem->access_virt_barray)
+	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	       comp_width - x_crop_blocks - dst_blk_x -
+	       (JDIMENSION) compptr->h_samp_factor,
+	       (JDIMENSION) compptr->h_samp_factor, FALSE);
+	  } else {
+	    src_buffer = (*srcinfo->mem->access_virt_barray)
+	      ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	       dst_blk_x + x_crop_blocks,
+	       (JDIMENSION) compptr->h_samp_factor, FALSE);
+	  }
+	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+	    if (y_crop_blocks + dst_blk_y < comp_height) {
+	      if (x_crop_blocks + dst_blk_x < comp_width) {
+		/* Block is within the mirrorable area. */
+		src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+		  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+		for (i = 0; i < DCTSIZE; i++) {
+		  for (j = 0; j < DCTSIZE; j++) {
+		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+		    j++;
+		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+		  }
+		  i++;
+		  for (j = 0; j < DCTSIZE; j++) {
+		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+		    j++;
+		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+		  }
+		}
+	      } else {
+		/* Right-edge blocks are mirrored in y only */
+		src_ptr = src_buffer[offset_x]
+		  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+		for (i = 0; i < DCTSIZE; i++) {
+		  for (j = 0; j < DCTSIZE; j++) {
+		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+		    j++;
+		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+		  }
+		}
+	      }
+	    } else {
+	      if (x_crop_blocks + dst_blk_x < comp_width) {
+		/* Bottom-edge blocks are mirrored in x only */
+		src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+		  [dst_blk_y + offset_y + y_crop_blocks];
+		for (i = 0; i < DCTSIZE; i++) {
+		  for (j = 0; j < DCTSIZE; j++)
+		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+		  i++;
+		  for (j = 0; j < DCTSIZE; j++)
+		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+		}
+	      } else {
+		/* At lower right corner, just transpose, no mirroring */
+		src_ptr = src_buffer[offset_x]
+		  [dst_blk_y + offset_y + y_crop_blocks];
+		for (i = 0; i < DCTSIZE; i++)
+		  for (j = 0; j < DCTSIZE; j++)
+		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+	      }
+	    }
+	  }
+	}
+      }
+    }
+  }
+}
+
+
+/* Parse an unsigned integer: subroutine for jtransform_parse_crop_spec.
+ * Returns TRUE if valid integer found, FALSE if not.
+ * *strptr is advanced over the digit string, and *result is set to its value.
+ */
+
+LOCAL(boolean)
+jt_read_integer (const char ** strptr, JDIMENSION * result)
+{
+  const char * ptr = *strptr;
+  JDIMENSION val = 0;
+
+  for (; isdigit(*ptr); ptr++) {
+    val = val * 10 + (JDIMENSION) (*ptr - '0');
+  }
+  *result = val;
+  if (ptr == *strptr)
+    return FALSE;		/* oops, no digits */
+  *strptr = ptr;
+  return TRUE;
+}
+
+
+/* Parse a crop specification (written in X11 geometry style).
+ * The routine returns TRUE if the spec string is valid, FALSE if not.
+ *
+ * The crop spec string should have the format
+ *	<width>[f]x<height>[f]{+-}<xoffset>{+-}<yoffset>
+ * where width, height, xoffset, and yoffset are unsigned integers.
+ * Each of the elements can be omitted to indicate a default value.
+ * (A weakness of this style is that it is not possible to omit xoffset
+ * while specifying yoffset, since they look alike.)
+ *
+ * This code is loosely based on XParseGeometry from the X11 distribution.
+ */
+
+GLOBAL(boolean)
+jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec)
+{
+  info->crop = FALSE;
+  info->crop_width_set = JCROP_UNSET;
+  info->crop_height_set = JCROP_UNSET;
+  info->crop_xoffset_set = JCROP_UNSET;
+  info->crop_yoffset_set = JCROP_UNSET;
+
+  if (isdigit(*spec)) {
+    /* fetch width */
+    if (! jt_read_integer(&spec, &info->crop_width))
+      return FALSE;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_width_set = JCROP_FORCE;
+    } else
+      info->crop_width_set = JCROP_POS;
+  }
+  if (*spec == 'x' || *spec == 'X') {
+    /* fetch height */
+    spec++;
+    if (! jt_read_integer(&spec, &info->crop_height))
+      return FALSE;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_height_set = JCROP_FORCE;
+    } else
+      info->crop_height_set = JCROP_POS;
+  }
+  if (*spec == '+' || *spec == '-') {
+    /* fetch xoffset */
+    info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+    spec++;
+    if (! jt_read_integer(&spec, &info->crop_xoffset))
+      return FALSE;
+  }
+  if (*spec == '+' || *spec == '-') {
+    /* fetch yoffset */
+    info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+    spec++;
+    if (! jt_read_integer(&spec, &info->crop_yoffset))
+      return FALSE;
+  }
+  /* We had better have gotten to the end of the string. */
+  if (*spec != '\0')
+    return FALSE;
+  info->crop = TRUE;
+  return TRUE;
+}
+
+
+/* Trim off any partial iMCUs on the indicated destination edge */
+
+LOCAL(void)
+trim_right_edge (jpeg_transform_info *info, JDIMENSION full_width)
+{
+  JDIMENSION MCU_cols;
+
+  MCU_cols = info->output_width / info->iMCU_sample_width;
+  if (MCU_cols > 0 && info->x_crop_offset + MCU_cols ==
+      full_width / info->iMCU_sample_width)
+    info->output_width = MCU_cols * info->iMCU_sample_width;
+}
+
+LOCAL(void)
+trim_bottom_edge (jpeg_transform_info *info, JDIMENSION full_height)
+{
+  JDIMENSION MCU_rows;
+
+  MCU_rows = info->output_height / info->iMCU_sample_height;
+  if (MCU_rows > 0 && info->y_crop_offset + MCU_rows ==
+      full_height / info->iMCU_sample_height)
+    info->output_height = MCU_rows * info->iMCU_sample_height;
+}
+
+
+/* Request any required workspace.
+ *
+ * This routine figures out the size that the output image will be
+ * (which implies that all the transform parameters must be set before
+ * it is called).
+ *
+ * We allocate the workspace virtual arrays from the source decompression
+ * object, so that all the arrays (both the original data and the workspace)
+ * will be taken into account while making memory management decisions.
+ * Hence, this routine must be called after jpeg_read_header (which reads
+ * the image dimensions) and before jpeg_read_coefficients (which realizes
+ * the source's virtual arrays).
+ *
+ * This function returns FALSE right away if -perfect is given
+ * and transformation is not perfect.  Otherwise returns TRUE.
+ */
+
+GLOBAL(boolean)
+jtransform_request_workspace (j_decompress_ptr srcinfo,
+			      jpeg_transform_info *info)
+{
+  jvirt_barray_ptr *coef_arrays;
+  boolean need_workspace, transpose_it;
+  jpeg_component_info *compptr;
+  JDIMENSION xoffset, yoffset;
+  JDIMENSION width_in_iMCUs, height_in_iMCUs;
+  JDIMENSION width_in_blocks, height_in_blocks;
+  int ci, h_samp_factor, v_samp_factor;
+
+  /* Determine number of components in output image */
+  if (info->force_grayscale &&
+      srcinfo->jpeg_color_space == JCS_YCbCr &&
+      srcinfo->num_components == 3)
+    /* We'll only process the first component */
+    info->num_components = 1;
+  else
+    /* Process all the components */
+    info->num_components = srcinfo->num_components;
+
+  /* Compute output image dimensions and related values. */
+  jpeg_core_output_dimensions(srcinfo);
+
+  /* Return right away if -perfect is given and transformation is not perfect.
+   */
+  if (info->perfect) {
+    if (info->num_components == 1) {
+      if (!jtransform_perfect_transform(srcinfo->output_width,
+	  srcinfo->output_height,
+	  srcinfo->min_DCT_h_scaled_size,
+	  srcinfo->min_DCT_v_scaled_size,
+	  info->transform))
+	return FALSE;
+    } else {
+      if (!jtransform_perfect_transform(srcinfo->output_width,
+	  srcinfo->output_height,
+	  srcinfo->max_h_samp_factor * srcinfo->min_DCT_h_scaled_size,
+	  srcinfo->max_v_samp_factor * srcinfo->min_DCT_v_scaled_size,
+	  info->transform))
+	return FALSE;
+    }
+  }
+
+  /* If there is only one output component, force the iMCU size to be 1;
+   * else use the source iMCU size.  (This allows us to do the right thing
+   * when reducing color to grayscale, and also provides a handy way of
+   * cleaning up "funny" grayscale images whose sampling factors are not 1x1.)
+   */
+  switch (info->transform) {
+  case JXFORM_TRANSPOSE:
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_90:
+  case JXFORM_ROT_270:
+    info->output_width = srcinfo->output_height;
+    info->output_height = srcinfo->output_width;
+    if (info->num_components == 1) {
+      info->iMCU_sample_width = srcinfo->min_DCT_v_scaled_size;
+      info->iMCU_sample_height = srcinfo->min_DCT_h_scaled_size;
+    } else {
+      info->iMCU_sample_width =
+	srcinfo->max_v_samp_factor * srcinfo->min_DCT_v_scaled_size;
+      info->iMCU_sample_height =
+	srcinfo->max_h_samp_factor * srcinfo->min_DCT_h_scaled_size;
+    }
+    break;
+  default:
+    info->output_width = srcinfo->output_width;
+    info->output_height = srcinfo->output_height;
+    if (info->num_components == 1) {
+      info->iMCU_sample_width = srcinfo->min_DCT_h_scaled_size;
+      info->iMCU_sample_height = srcinfo->min_DCT_v_scaled_size;
+    } else {
+      info->iMCU_sample_width =
+	srcinfo->max_h_samp_factor * srcinfo->min_DCT_h_scaled_size;
+      info->iMCU_sample_height =
+	srcinfo->max_v_samp_factor * srcinfo->min_DCT_v_scaled_size;
+    }
+    break;
+  }
+
+  /* If cropping has been requested, compute the crop area's position and
+   * dimensions, ensuring that its upper left corner falls at an iMCU boundary.
+   */
+  if (info->crop) {
+    /* Insert default values for unset crop parameters */
+    if (info->crop_xoffset_set == JCROP_UNSET)
+      info->crop_xoffset = 0;	/* default to +0 */
+    if (info->crop_yoffset_set == JCROP_UNSET)
+      info->crop_yoffset = 0;	/* default to +0 */
+    if (info->crop_xoffset >= info->output_width ||
+	info->crop_yoffset >= info->output_height)
+      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+    if (info->crop_width_set == JCROP_UNSET)
+      info->crop_width = info->output_width - info->crop_xoffset;
+    if (info->crop_height_set == JCROP_UNSET)
+      info->crop_height = info->output_height - info->crop_yoffset;
+    /* Ensure parameters are valid */
+    if (info->crop_width <= 0 || info->crop_width > info->output_width ||
+	info->crop_height <= 0 || info->crop_height > info->output_height ||
+	info->crop_xoffset > info->output_width - info->crop_width ||
+	info->crop_yoffset > info->output_height - info->crop_height)
+      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+    /* Convert negative crop offsets into regular offsets */
+    if (info->crop_xoffset_set == JCROP_NEG)
+      xoffset = info->output_width - info->crop_width - info->crop_xoffset;
+    else
+      xoffset = info->crop_xoffset;
+    if (info->crop_yoffset_set == JCROP_NEG)
+      yoffset = info->output_height - info->crop_height - info->crop_yoffset;
+    else
+      yoffset = info->crop_yoffset;
+    /* Now adjust so that upper left corner falls at an iMCU boundary */
+    if (info->crop_width_set == JCROP_FORCE)
+      info->output_width = info->crop_width;
+    else
+      info->output_width =
+        info->crop_width + (xoffset % info->iMCU_sample_width);
+    if (info->crop_height_set == JCROP_FORCE)
+      info->output_height = info->crop_height;
+    else
+      info->output_height =
+        info->crop_height + (yoffset % info->iMCU_sample_height);
+    /* Save x/y offsets measured in iMCUs */
+    info->x_crop_offset = xoffset / info->iMCU_sample_width;
+    info->y_crop_offset = yoffset / info->iMCU_sample_height;
+  } else {
+    info->x_crop_offset = 0;
+    info->y_crop_offset = 0;
+  }
+
+  /* Figure out whether we need workspace arrays,
+   * and if so whether they are transposed relative to the source.
+   */
+  need_workspace = FALSE;
+  transpose_it = FALSE;
+  switch (info->transform) {
+  case JXFORM_NONE:
+    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+      need_workspace = TRUE;
+    /* No workspace needed if neither cropping nor transforming */
+    break;
+  case JXFORM_FLIP_H:
+    if (info->trim)
+      trim_right_edge(info, srcinfo->output_width);
+    if (info->y_crop_offset != 0)
+      need_workspace = TRUE;
+    /* do_flip_h_no_crop doesn't need a workspace array */
+    break;
+  case JXFORM_FLIP_V:
+    if (info->trim)
+      trim_bottom_edge(info, srcinfo->output_height);
+    /* Need workspace arrays having same dimensions as source image. */
+    need_workspace = TRUE;
+    break;
+  case JXFORM_TRANSPOSE:
+    /* transpose does NOT have to trim anything */
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_TRANSVERSE:
+    if (info->trim) {
+      trim_right_edge(info, srcinfo->output_height);
+      trim_bottom_edge(info, srcinfo->output_width);
+    }
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_ROT_90:
+    if (info->trim)
+      trim_right_edge(info, srcinfo->output_height);
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_ROT_180:
+    if (info->trim) {
+      trim_right_edge(info, srcinfo->output_width);
+      trim_bottom_edge(info, srcinfo->output_height);
+    }
+    /* Need workspace arrays having same dimensions as source image. */
+    need_workspace = TRUE;
+    break;
+  case JXFORM_ROT_270:
+    if (info->trim)
+      trim_bottom_edge(info, srcinfo->output_width);
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  }
+
+  /* Allocate workspace if needed.
+   * Note that we allocate arrays padded out to the next iMCU boundary,
+   * so that transform routines need not worry about missing edge blocks.
+   */
+  if (need_workspace) {
+    coef_arrays = (jvirt_barray_ptr *)
+      (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE,
+	SIZEOF(jvirt_barray_ptr) * info->num_components);
+    width_in_iMCUs = (JDIMENSION)
+      jdiv_round_up((long) info->output_width,
+		    (long) info->iMCU_sample_width);
+    height_in_iMCUs = (JDIMENSION)
+      jdiv_round_up((long) info->output_height,
+		    (long) info->iMCU_sample_height);
+    for (ci = 0; ci < info->num_components; ci++) {
+      compptr = srcinfo->comp_info + ci;
+      if (info->num_components == 1) {
+	/* we're going to force samp factors to 1x1 in this case */
+	h_samp_factor = v_samp_factor = 1;
+      } else if (transpose_it) {
+	h_samp_factor = compptr->v_samp_factor;
+	v_samp_factor = compptr->h_samp_factor;
+      } else {
+	h_samp_factor = compptr->h_samp_factor;
+	v_samp_factor = compptr->v_samp_factor;
+      }
+      width_in_blocks = width_in_iMCUs * h_samp_factor;
+      height_in_blocks = height_in_iMCUs * v_samp_factor;
+      coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
+	((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
+	 width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor);
+    }
+    info->workspace_coef_arrays = coef_arrays;
+  } else
+    info->workspace_coef_arrays = NULL;
+
+  return TRUE;
+}
+
+
+/* Transpose destination image parameters */
+
+LOCAL(void)
+transpose_critical_parameters (j_compress_ptr dstinfo)
+{
+  int tblno, i, j, ci, itemp;
+  jpeg_component_info *compptr;
+  JQUANT_TBL *qtblptr;
+  JDIMENSION jtemp;
+  UINT16 qtemp;
+
+  /* Transpose image dimensions */
+  jtemp = dstinfo->image_width;
+  dstinfo->image_width = dstinfo->image_height;
+  dstinfo->image_height = jtemp;
+  itemp = dstinfo->min_DCT_h_scaled_size;
+  dstinfo->min_DCT_h_scaled_size = dstinfo->min_DCT_v_scaled_size;
+  dstinfo->min_DCT_v_scaled_size = itemp;
+
+  /* Transpose sampling factors */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    itemp = compptr->h_samp_factor;
+    compptr->h_samp_factor = compptr->v_samp_factor;
+    compptr->v_samp_factor = itemp;
+  }
+
+  /* Transpose quantization tables */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    qtblptr = dstinfo->quant_tbl_ptrs[tblno];
+    if (qtblptr != NULL) {
+      for (i = 0; i < DCTSIZE; i++) {
+	for (j = 0; j < i; j++) {
+	  qtemp = qtblptr->quantval[i*DCTSIZE+j];
+	  qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i];
+	  qtblptr->quantval[j*DCTSIZE+i] = qtemp;
+	}
+      }
+    }
+  }
+}
+
+
+/* Adjust Exif image parameters.
+ *
+ * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible.
+ */
+
+LOCAL(void)
+adjust_exif_parameters (JOCTET FAR * data, unsigned int length,
+			JDIMENSION new_width, JDIMENSION new_height)
+{
+  boolean is_motorola; /* Flag for byte order */
+  unsigned int number_of_tags, tagnum;
+  unsigned int firstoffset, offset;
+  JDIMENSION new_value;
+
+  if (length < 12) return; /* Length of an IFD entry */
+
+  /* Discover byte order */
+  if (GETJOCTET(data[0]) == 0x49 && GETJOCTET(data[1]) == 0x49)
+    is_motorola = FALSE;
+  else if (GETJOCTET(data[0]) == 0x4D && GETJOCTET(data[1]) == 0x4D)
+    is_motorola = TRUE;
+  else
+    return;
+
+  /* Check Tag Mark */
+  if (is_motorola) {
+    if (GETJOCTET(data[2]) != 0) return;
+    if (GETJOCTET(data[3]) != 0x2A) return;
+  } else {
+    if (GETJOCTET(data[3]) != 0) return;
+    if (GETJOCTET(data[2]) != 0x2A) return;
+  }
+
+  /* Get first IFD offset (offset to IFD0) */
+  if (is_motorola) {
+    if (GETJOCTET(data[4]) != 0) return;
+    if (GETJOCTET(data[5]) != 0) return;
+    firstoffset = GETJOCTET(data[6]);
+    firstoffset <<= 8;
+    firstoffset += GETJOCTET(data[7]);
+  } else {
+    if (GETJOCTET(data[7]) != 0) return;
+    if (GETJOCTET(data[6]) != 0) return;
+    firstoffset = GETJOCTET(data[5]);
+    firstoffset <<= 8;
+    firstoffset += GETJOCTET(data[4]);
+  }
+  if (firstoffset > length - 2) return; /* check end of data segment */
+
+  /* Get the number of directory entries contained in this IFD */
+  if (is_motorola) {
+    number_of_tags = GETJOCTET(data[firstoffset]);
+    number_of_tags <<= 8;
+    number_of_tags += GETJOCTET(data[firstoffset+1]);
+  } else {
+    number_of_tags = GETJOCTET(data[firstoffset+1]);
+    number_of_tags <<= 8;
+    number_of_tags += GETJOCTET(data[firstoffset]);
+  }
+  if (number_of_tags == 0) return;
+  firstoffset += 2;
+
+  /* Search for ExifSubIFD offset Tag in IFD0 */
+  for (;;) {
+    if (firstoffset > length - 12) return; /* check end of data segment */
+    /* Get Tag number */
+    if (is_motorola) {
+      tagnum = GETJOCTET(data[firstoffset]);
+      tagnum <<= 8;
+      tagnum += GETJOCTET(data[firstoffset+1]);
+    } else {
+      tagnum = GETJOCTET(data[firstoffset+1]);
+      tagnum <<= 8;
+      tagnum += GETJOCTET(data[firstoffset]);
+    }
+    if (tagnum == 0x8769) break; /* found ExifSubIFD offset Tag */
+    if (--number_of_tags == 0) return;
+    firstoffset += 12;
+  }
+
+  /* Get the ExifSubIFD offset */
+  if (is_motorola) {
+    if (GETJOCTET(data[firstoffset+8]) != 0) return;
+    if (GETJOCTET(data[firstoffset+9]) != 0) return;
+    offset = GETJOCTET(data[firstoffset+10]);
+    offset <<= 8;
+    offset += GETJOCTET(data[firstoffset+11]);
+  } else {
+    if (GETJOCTET(data[firstoffset+11]) != 0) return;
+    if (GETJOCTET(data[firstoffset+10]) != 0) return;
+    offset = GETJOCTET(data[firstoffset+9]);
+    offset <<= 8;
+    offset += GETJOCTET(data[firstoffset+8]);
+  }
+  if (offset > length - 2) return; /* check end of data segment */
+
+  /* Get the number of directory entries contained in this SubIFD */
+  if (is_motorola) {
+    number_of_tags = GETJOCTET(data[offset]);
+    number_of_tags <<= 8;
+    number_of_tags += GETJOCTET(data[offset+1]);
+  } else {
+    number_of_tags = GETJOCTET(data[offset+1]);
+    number_of_tags <<= 8;
+    number_of_tags += GETJOCTET(data[offset]);
+  }
+  if (number_of_tags < 2) return;
+  offset += 2;
+
+  /* Search for ExifImageWidth and ExifImageHeight Tags in this SubIFD */
+  do {
+    if (offset > length - 12) return; /* check end of data segment */
+    /* Get Tag number */
+    if (is_motorola) {
+      tagnum = GETJOCTET(data[offset]);
+      tagnum <<= 8;
+      tagnum += GETJOCTET(data[offset+1]);
+    } else {
+      tagnum = GETJOCTET(data[offset+1]);
+      tagnum <<= 8;
+      tagnum += GETJOCTET(data[offset]);
+    }
+    if (tagnum == 0xA002 || tagnum == 0xA003) {
+      if (tagnum == 0xA002)
+	new_value = new_width; /* ExifImageWidth Tag */
+      else
+	new_value = new_height; /* ExifImageHeight Tag */
+      if (is_motorola) {
+	data[offset+2] = 0; /* Format = unsigned long (4 octets) */
+	data[offset+3] = 4;
+	data[offset+4] = 0; /* Number Of Components = 1 */
+	data[offset+5] = 0;
+	data[offset+6] = 0;
+	data[offset+7] = 1;
+	data[offset+8] = 0;
+	data[offset+9] = 0;
+	data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF);
+	data[offset+11] = (JOCTET)(new_value & 0xFF);
+      } else {
+	data[offset+2] = 4; /* Format = unsigned long (4 octets) */
+	data[offset+3] = 0;
+	data[offset+4] = 1; /* Number Of Components = 1 */
+	data[offset+5] = 0;
+	data[offset+6] = 0;
+	data[offset+7] = 0;
+	data[offset+8] = (JOCTET)(new_value & 0xFF);
+	data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF);
+	data[offset+10] = 0;
+	data[offset+11] = 0;
+      }
+    }
+    offset += 12;
+  } while (--number_of_tags);
+}
+
+
+/* Adjust output image parameters as needed.
+ *
+ * This must be called after jpeg_copy_critical_parameters()
+ * and before jpeg_write_coefficients().
+ *
+ * The return value is the set of virtual coefficient arrays to be written
+ * (either the ones allocated by jtransform_request_workspace, or the
+ * original source data arrays).  The caller will need to pass this value
+ * to jpeg_write_coefficients().
+ */
+
+GLOBAL(jvirt_barray_ptr *)
+jtransform_adjust_parameters (j_decompress_ptr srcinfo,
+			      j_compress_ptr dstinfo,
+			      jvirt_barray_ptr *src_coef_arrays,
+			      jpeg_transform_info *info)
+{
+  /* If force-to-grayscale is requested, adjust destination parameters */
+  if (info->force_grayscale) {
+    /* First, ensure we have YCbCr or grayscale data, and that the source's
+     * Y channel is full resolution.  (No reasonable person would make Y
+     * be less than full resolution, so actually coping with that case
+     * isn't worth extra code space.  But we check it to avoid crashing.)
+     */
+    if (((dstinfo->jpeg_color_space == JCS_YCbCr &&
+	  dstinfo->num_components == 3) ||
+	 (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
+	  dstinfo->num_components == 1)) &&
+	srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor &&
+	srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) {
+      /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
+       * properly.  Among other things, it sets the target h_samp_factor &
+       * v_samp_factor to 1, which typically won't match the source.
+       * We have to preserve the source's quantization table number, however.
+       */
+      int sv_quant_tbl_no = dstinfo->comp_info[0].quant_tbl_no;
+      jpeg_set_colorspace(dstinfo, JCS_GRAYSCALE);
+      dstinfo->comp_info[0].quant_tbl_no = sv_quant_tbl_no;
+    } else {
+      /* Sorry, can't do it */
+      ERREXIT(dstinfo, JERR_CONVERSION_NOTIMPL);
+    }
+  } else if (info->num_components == 1) {
+    /* For a single-component source, we force the destination sampling factors
+     * to 1x1, with or without force_grayscale.  This is useful because some
+     * decoders choke on grayscale images with other sampling factors.
+     */
+    dstinfo->comp_info[0].h_samp_factor = 1;
+    dstinfo->comp_info[0].v_samp_factor = 1;
+  }
+
+  /* Correct the destination's image dimensions as necessary
+   * for rotate/flip, resize, and crop operations.
+   */
+  dstinfo->jpeg_width = info->output_width;
+  dstinfo->jpeg_height = info->output_height;
+
+  /* Transpose destination image parameters */
+  switch (info->transform) {
+  case JXFORM_TRANSPOSE:
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_90:
+  case JXFORM_ROT_270:
+    transpose_critical_parameters(dstinfo);
+    break;
+  default:
+    break;
+  }
+
+  /* Adjust Exif properties */
+  if (srcinfo->marker_list != NULL &&
+      srcinfo->marker_list->marker == JPEG_APP0+1 &&
+      srcinfo->marker_list->data_length >= 6 &&
+      GETJOCTET(srcinfo->marker_list->data[0]) == 0x45 &&
+      GETJOCTET(srcinfo->marker_list->data[1]) == 0x78 &&
+      GETJOCTET(srcinfo->marker_list->data[2]) == 0x69 &&
+      GETJOCTET(srcinfo->marker_list->data[3]) == 0x66 &&
+      GETJOCTET(srcinfo->marker_list->data[4]) == 0 &&
+      GETJOCTET(srcinfo->marker_list->data[5]) == 0) {
+    /* Suppress output of JFIF marker */
+    dstinfo->write_JFIF_header = FALSE;
+    /* Adjust Exif image parameters */
+    if (dstinfo->jpeg_width != srcinfo->image_width ||
+	dstinfo->jpeg_height != srcinfo->image_height)
+      /* Align data segment to start of TIFF structure for parsing */
+      adjust_exif_parameters(srcinfo->marker_list->data + 6,
+	srcinfo->marker_list->data_length - 6,
+	dstinfo->jpeg_width, dstinfo->jpeg_height);
+  }
+
+  /* Return the appropriate output data set */
+  if (info->workspace_coef_arrays != NULL)
+    return info->workspace_coef_arrays;
+  return src_coef_arrays;
+}
+
+
+/* Execute the actual transformation, if any.
+ *
+ * This must be called *after* jpeg_write_coefficients, because it depends
+ * on jpeg_write_coefficients to have computed subsidiary values such as
+ * the per-component width and height fields in the destination object.
+ *
+ * Note that some transformations will modify the source data arrays!
+ */
+
+GLOBAL(void)
+jtransform_execute_transform (j_decompress_ptr srcinfo,
+			      j_compress_ptr dstinfo,
+			      jvirt_barray_ptr *src_coef_arrays,
+			      jpeg_transform_info *info)
+{
+  jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays;
+
+  /* Note: conditions tested here should match those in switch statement
+   * in jtransform_request_workspace()
+   */
+  switch (info->transform) {
+  case JXFORM_NONE:
+    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+      do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+	      src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_FLIP_H:
+    if (info->y_crop_offset != 0)
+      do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+		src_coef_arrays, dst_coef_arrays);
+    else
+      do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset,
+			src_coef_arrays);
+    break;
+  case JXFORM_FLIP_V:
+    do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+	      src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_TRANSPOSE:
+    do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+		 src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_TRANSVERSE:
+    do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+		  src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_ROT_90:
+    do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+	      src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_ROT_180:
+    do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+	       src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_ROT_270:
+    do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+	       src_coef_arrays, dst_coef_arrays);
+    break;
+  }
+}
+
+/* jtransform_perfect_transform
+ *
+ * Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ *
+ * Inputs:
+ *   image_width, image_height: source image dimensions.
+ *   MCU_width, MCU_height: pixel dimensions of MCU.
+ *   transform: transformation identifier.
+ * Parameter sources from initialized jpeg_struct
+ * (after reading source header):
+ *   image_width = cinfo.image_width
+ *   image_height = cinfo.image_height
+ *   MCU_width = cinfo.max_h_samp_factor * cinfo.block_size
+ *   MCU_height = cinfo.max_v_samp_factor * cinfo.block_size
+ * Result:
+ *   TRUE = perfect transformation possible
+ *   FALSE = perfect transformation not possible
+ *           (may use custom action then)
+ */
+
+GLOBAL(boolean)
+jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
+			     int MCU_width, int MCU_height,
+			     JXFORM_CODE transform)
+{
+  boolean result = TRUE; /* initialize TRUE */
+
+  switch (transform) {
+  case JXFORM_FLIP_H:
+  case JXFORM_ROT_270:
+    if (image_width % (JDIMENSION) MCU_width)
+      result = FALSE;
+    break;
+  case JXFORM_FLIP_V:
+  case JXFORM_ROT_90:
+    if (image_height % (JDIMENSION) MCU_height)
+      result = FALSE;
+    break;
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_180:
+    if (image_width % (JDIMENSION) MCU_width)
+      result = FALSE;
+    if (image_height % (JDIMENSION) MCU_height)
+      result = FALSE;
+    break;
+  default:
+    break;
+  }
+
+  return result;
+}
+
+#endif /* TRANSFORMS_SUPPORTED */
+
+
+/* Setup decompression object to save desired markers in memory.
+ * This must be called before jpeg_read_header() to have the desired effect.
+ */
+
+GLOBAL(void)
+jcopy_markers_setup (j_decompress_ptr srcinfo, JCOPY_OPTION option)
+{
+#ifdef SAVE_MARKERS_SUPPORTED
+  int m;
+
+  /* Save comments except under NONE option */
+  if (option != JCOPYOPT_NONE) {
+    jpeg_save_markers(srcinfo, JPEG_COM, 0xFFFF);
+  }
+  /* Save all types of APPn markers iff ALL option */
+  if (option == JCOPYOPT_ALL) {
+    for (m = 0; m < 16; m++)
+      jpeg_save_markers(srcinfo, JPEG_APP0 + m, 0xFFFF);
+  }
+#endif /* SAVE_MARKERS_SUPPORTED */
+}
+
+/* Copy markers saved in the given source object to the destination object.
+ * This should be called just after jpeg_start_compress() or
+ * jpeg_write_coefficients().
+ * Note that those routines will have written the SOI, and also the
+ * JFIF APP0 or Adobe APP14 markers if selected.
+ */
+
+GLOBAL(void)
+jcopy_markers_execute (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+		       JCOPY_OPTION option)
+{
+  jpeg_saved_marker_ptr marker;
+
+  /* In the current implementation, we don't actually need to examine the
+   * option flag here; we just copy everything that got saved.
+   * But to avoid confusion, we do not output JFIF and Adobe APP14 markers
+   * if the encoder library already wrote one.
+   */
+  for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (dstinfo->write_JFIF_header &&
+	marker->marker == JPEG_APP0 &&
+	marker->data_length >= 5 &&
+	GETJOCTET(marker->data[0]) == 0x4A &&
+	GETJOCTET(marker->data[1]) == 0x46 &&
+	GETJOCTET(marker->data[2]) == 0x49 &&
+	GETJOCTET(marker->data[3]) == 0x46 &&
+	GETJOCTET(marker->data[4]) == 0)
+      continue;			/* reject duplicate JFIF */
+    if (dstinfo->write_Adobe_marker &&
+	marker->marker == JPEG_APP0+14 &&
+	marker->data_length >= 5 &&
+	GETJOCTET(marker->data[0]) == 0x41 &&
+	GETJOCTET(marker->data[1]) == 0x64 &&
+	GETJOCTET(marker->data[2]) == 0x6F &&
+	GETJOCTET(marker->data[3]) == 0x62 &&
+	GETJOCTET(marker->data[4]) == 0x65)
+      continue;			/* reject duplicate Adobe */
+#ifdef NEED_FAR_POINTERS
+    /* We could use jpeg_write_marker if the data weren't FAR... */
+    {
+      unsigned int i;
+      jpeg_write_m_header(dstinfo, marker->marker, marker->data_length);
+      for (i = 0; i < marker->data_length; i++)
+	jpeg_write_m_byte(dstinfo, marker->data[i]);
+    }
+#else
+    jpeg_write_marker(dstinfo, marker->marker,
+		      marker->data, marker->data_length);
+#endif
+  }
+}
diff --git a/plugins/AdvaImg/src/LibJPEG/transupp.h b/plugins/AdvaImg/src/LibJPEG/transupp.h
index 9aa0af385a..6e4d65afbe 100644
--- a/plugins/AdvaImg/src/LibJPEG/transupp.h
+++ b/plugins/AdvaImg/src/LibJPEG/transupp.h
@@ -1,213 +1,213 @@
-/*
- * transupp.h
- *
- * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains declarations for image transformation routines and
- * other utility code used by the jpegtran sample application.  These are
- * NOT part of the core JPEG library.  But we keep these routines separate
- * from jpegtran.c to ease the task of maintaining jpegtran-like programs
- * that have other user interfaces.
- *
- * NOTE: all the routines declared here have very specific requirements
- * about when they are to be executed during the reading and writing of the
- * source and destination files.  See the comments in transupp.c, or see
- * jpegtran.c for an example of correct usage.
- */
-
-/* If you happen not to want the image transform support, disable it here */
-#ifndef TRANSFORMS_SUPPORTED
-#define TRANSFORMS_SUPPORTED 1		/* 0 disables transform code */
-#endif
-
-/*
- * Although rotating and flipping data expressed as DCT coefficients is not
- * hard, there is an asymmetry in the JPEG format specification for images
- * whose dimensions aren't multiples of the iMCU size.  The right and bottom
- * image edges are padded out to the next iMCU boundary with junk data; but
- * no padding is possible at the top and left edges.  If we were to flip
- * the whole image including the pad data, then pad garbage would become
- * visible at the top and/or left, and real pixels would disappear into the
- * pad margins --- perhaps permanently, since encoders & decoders may not
- * bother to preserve DCT blocks that appear to be completely outside the
- * nominal image area.  So, we have to exclude any partial iMCUs from the
- * basic transformation.
- *
- * Transpose is the only transformation that can handle partial iMCUs at the
- * right and bottom edges completely cleanly.  flip_h can flip partial iMCUs
- * at the bottom, but leaves any partial iMCUs at the right edge untouched.
- * Similarly flip_v leaves any partial iMCUs at the bottom edge untouched.
- * The other transforms are defined as combinations of these basic transforms
- * and process edge blocks in a way that preserves the equivalence.
- *
- * The "trim" option causes untransformable partial iMCUs to be dropped;
- * this is not strictly lossless, but it usually gives the best-looking
- * result for odd-size images.  Note that when this option is active,
- * the expected mathematical equivalences between the transforms may not hold.
- * (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim
- * followed by -rot 180 -trim trims both edges.)
- *
- * We also offer a lossless-crop option, which discards data outside a given
- * image region but losslessly preserves what is inside.  Like the rotate and
- * flip transforms, lossless crop is restricted by the JPEG format: the upper
- * left corner of the selected region must fall on an iMCU boundary.  If this
- * does not hold for the given crop parameters, we silently move the upper left
- * corner up and/or left to make it so, simultaneously increasing the region
- * dimensions to keep the lower right crop corner unchanged.  (Thus, the
- * output image covers at least the requested region, but may cover more.)
- * The adjustment of the region dimensions may be optionally disabled.
- *
- * We also provide a lossless-resize option, which is kind of a lossless-crop
- * operation in the DCT coefficient block domain - it discards higher-order
- * coefficients and losslessly preserves lower-order coefficients of a
- * sub-block.
- *
- * Rotate/flip transform, resize, and crop can be requested together in a
- * single invocation.  The crop is applied last --- that is, the crop region
- * is specified in terms of the destination image after transform/resize.
- *
- * We also offer a "force to grayscale" option, which simply discards the
- * chrominance channels of a YCbCr image.  This is lossless in the sense that
- * the luminance channel is preserved exactly.  It's not the same kind of
- * thing as the rotate/flip transformations, but it's convenient to handle it
- * as part of this package, mainly because the transformation routines have to
- * be aware of the option to know how many components to work on.
- */
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jtransform_parse_crop_spec	jTrParCrop
-#define jtransform_request_workspace	jTrRequest
-#define jtransform_adjust_parameters	jTrAdjust
-#define jtransform_execute_transform	jTrExec
-#define jtransform_perfect_transform	jTrPerfect
-#define jcopy_markers_setup		jCMrkSetup
-#define jcopy_markers_execute		jCMrkExec
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/*
- * Codes for supported types of image transformations.
- */
-
-typedef enum {
-	JXFORM_NONE,		/* no transformation */
-	JXFORM_FLIP_H,		/* horizontal flip */
-	JXFORM_FLIP_V,		/* vertical flip */
-	JXFORM_TRANSPOSE,	/* transpose across UL-to-LR axis */
-	JXFORM_TRANSVERSE,	/* transpose across UR-to-LL axis */
-	JXFORM_ROT_90,		/* 90-degree clockwise rotation */
-	JXFORM_ROT_180,		/* 180-degree rotation */
-	JXFORM_ROT_270		/* 270-degree clockwise (or 90 ccw) */
-} JXFORM_CODE;
-
-/*
- * Codes for crop parameters, which can individually be unspecified,
- * positive or negative for xoffset or yoffset,
- * positive or forced for width or height.
- */
-
-typedef enum {
-        JCROP_UNSET,
-        JCROP_POS,
-        JCROP_NEG,
-        JCROP_FORCE
-} JCROP_CODE;
-
-/*
- * Transform parameters struct.
- * NB: application must not change any elements of this struct after
- * calling jtransform_request_workspace.
- */
-
-typedef struct {
-  /* Options: set by caller */
-  JXFORM_CODE transform;	/* image transform operator */
-  boolean perfect;		/* if TRUE, fail if partial MCUs are requested */
-  boolean trim;			/* if TRUE, trim partial MCUs as needed */
-  boolean force_grayscale;	/* if TRUE, convert color image to grayscale */
-  boolean crop;			/* if TRUE, crop source image */
-
-  /* Crop parameters: application need not set these unless crop is TRUE.
-   * These can be filled in by jtransform_parse_crop_spec().
-   */
-  JDIMENSION crop_width;	/* Width of selected region */
-  JCROP_CODE crop_width_set;	/* (forced disables adjustment) */
-  JDIMENSION crop_height;	/* Height of selected region */
-  JCROP_CODE crop_height_set;	/* (forced disables adjustment) */
-  JDIMENSION crop_xoffset;	/* X offset of selected region */
-  JCROP_CODE crop_xoffset_set;	/* (negative measures from right edge) */
-  JDIMENSION crop_yoffset;	/* Y offset of selected region */
-  JCROP_CODE crop_yoffset_set;	/* (negative measures from bottom edge) */
-
-  /* Internal workspace: caller should not touch these */
-  int num_components;		/* # of components in workspace */
-  jvirt_barray_ptr * workspace_coef_arrays; /* workspace for transformations */
-  JDIMENSION output_width;	/* cropped destination dimensions */
-  JDIMENSION output_height;
-  JDIMENSION x_crop_offset;	/* destination crop offsets measured in iMCUs */
-  JDIMENSION y_crop_offset;
-  int iMCU_sample_width;	/* destination iMCU size */
-  int iMCU_sample_height;
-} jpeg_transform_info;
-
-
-#if TRANSFORMS_SUPPORTED
-
-/* Parse a crop specification (written in X11 geometry style) */
-EXTERN(boolean) jtransform_parse_crop_spec
-	JPP((jpeg_transform_info *info, const char *spec));
-/* Request any required workspace */
-EXTERN(boolean) jtransform_request_workspace
-	JPP((j_decompress_ptr srcinfo, jpeg_transform_info *info));
-/* Adjust output image parameters */
-EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
-/* Execute the actual transformation, if any */
-EXTERN(void) jtransform_execute_transform
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
-/* Determine whether lossless transformation is perfectly
- * possible for a specified image and transformation.
- */
-EXTERN(boolean) jtransform_perfect_transform
-	JPP((JDIMENSION image_width, JDIMENSION image_height,
-	     int MCU_width, int MCU_height,
-	     JXFORM_CODE transform));
-
-/* jtransform_execute_transform used to be called
- * jtransform_execute_transformation, but some compilers complain about
- * routine names that long.  This macro is here to avoid breaking any
- * old source code that uses the original name...
- */
-#define jtransform_execute_transformation	jtransform_execute_transform
-
-#endif /* TRANSFORMS_SUPPORTED */
-
-
-/*
- * Support for copying optional markers from source to destination file.
- */
-
-typedef enum {
-	JCOPYOPT_NONE,		/* copy no optional markers */
-	JCOPYOPT_COMMENTS,	/* copy only comment (COM) markers */
-	JCOPYOPT_ALL		/* copy all optional markers */
-} JCOPY_OPTION;
-
-#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS	/* recommended default */
-
-/* Setup decompression object to save desired markers in memory */
-EXTERN(void) jcopy_markers_setup
-	JPP((j_decompress_ptr srcinfo, JCOPY_OPTION option));
-/* Copy markers saved in the given source object to the destination object */
-EXTERN(void) jcopy_markers_execute
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     JCOPY_OPTION option));
+/*
+ * transupp.h
+ *
+ * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains declarations for image transformation routines and
+ * other utility code used by the jpegtran sample application.  These are
+ * NOT part of the core JPEG library.  But we keep these routines separate
+ * from jpegtran.c to ease the task of maintaining jpegtran-like programs
+ * that have other user interfaces.
+ *
+ * NOTE: all the routines declared here have very specific requirements
+ * about when they are to be executed during the reading and writing of the
+ * source and destination files.  See the comments in transupp.c, or see
+ * jpegtran.c for an example of correct usage.
+ */
+
+/* If you happen not to want the image transform support, disable it here */
+#ifndef TRANSFORMS_SUPPORTED
+#define TRANSFORMS_SUPPORTED 1		/* 0 disables transform code */
+#endif
+
+/*
+ * Although rotating and flipping data expressed as DCT coefficients is not
+ * hard, there is an asymmetry in the JPEG format specification for images
+ * whose dimensions aren't multiples of the iMCU size.  The right and bottom
+ * image edges are padded out to the next iMCU boundary with junk data; but
+ * no padding is possible at the top and left edges.  If we were to flip
+ * the whole image including the pad data, then pad garbage would become
+ * visible at the top and/or left, and real pixels would disappear into the
+ * pad margins --- perhaps permanently, since encoders & decoders may not
+ * bother to preserve DCT blocks that appear to be completely outside the
+ * nominal image area.  So, we have to exclude any partial iMCUs from the
+ * basic transformation.
+ *
+ * Transpose is the only transformation that can handle partial iMCUs at the
+ * right and bottom edges completely cleanly.  flip_h can flip partial iMCUs
+ * at the bottom, but leaves any partial iMCUs at the right edge untouched.
+ * Similarly flip_v leaves any partial iMCUs at the bottom edge untouched.
+ * The other transforms are defined as combinations of these basic transforms
+ * and process edge blocks in a way that preserves the equivalence.
+ *
+ * The "trim" option causes untransformable partial iMCUs to be dropped;
+ * this is not strictly lossless, but it usually gives the best-looking
+ * result for odd-size images.  Note that when this option is active,
+ * the expected mathematical equivalences between the transforms may not hold.
+ * (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim
+ * followed by -rot 180 -trim trims both edges.)
+ *
+ * We also offer a lossless-crop option, which discards data outside a given
+ * image region but losslessly preserves what is inside.  Like the rotate and
+ * flip transforms, lossless crop is restricted by the JPEG format: the upper
+ * left corner of the selected region must fall on an iMCU boundary.  If this
+ * does not hold for the given crop parameters, we silently move the upper left
+ * corner up and/or left to make it so, simultaneously increasing the region
+ * dimensions to keep the lower right crop corner unchanged.  (Thus, the
+ * output image covers at least the requested region, but may cover more.)
+ * The adjustment of the region dimensions may be optionally disabled.
+ *
+ * We also provide a lossless-resize option, which is kind of a lossless-crop
+ * operation in the DCT coefficient block domain - it discards higher-order
+ * coefficients and losslessly preserves lower-order coefficients of a
+ * sub-block.
+ *
+ * Rotate/flip transform, resize, and crop can be requested together in a
+ * single invocation.  The crop is applied last --- that is, the crop region
+ * is specified in terms of the destination image after transform/resize.
+ *
+ * We also offer a "force to grayscale" option, which simply discards the
+ * chrominance channels of a YCbCr image.  This is lossless in the sense that
+ * the luminance channel is preserved exactly.  It's not the same kind of
+ * thing as the rotate/flip transformations, but it's convenient to handle it
+ * as part of this package, mainly because the transformation routines have to
+ * be aware of the option to know how many components to work on.
+ */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jtransform_parse_crop_spec	jTrParCrop
+#define jtransform_request_workspace	jTrRequest
+#define jtransform_adjust_parameters	jTrAdjust
+#define jtransform_execute_transform	jTrExec
+#define jtransform_perfect_transform	jTrPerfect
+#define jcopy_markers_setup		jCMrkSetup
+#define jcopy_markers_execute		jCMrkExec
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/*
+ * Codes for supported types of image transformations.
+ */
+
+typedef enum {
+	JXFORM_NONE,		/* no transformation */
+	JXFORM_FLIP_H,		/* horizontal flip */
+	JXFORM_FLIP_V,		/* vertical flip */
+	JXFORM_TRANSPOSE,	/* transpose across UL-to-LR axis */
+	JXFORM_TRANSVERSE,	/* transpose across UR-to-LL axis */
+	JXFORM_ROT_90,		/* 90-degree clockwise rotation */
+	JXFORM_ROT_180,		/* 180-degree rotation */
+	JXFORM_ROT_270		/* 270-degree clockwise (or 90 ccw) */
+} JXFORM_CODE;
+
+/*
+ * Codes for crop parameters, which can individually be unspecified,
+ * positive or negative for xoffset or yoffset,
+ * positive or forced for width or height.
+ */
+
+typedef enum {
+        JCROP_UNSET,
+        JCROP_POS,
+        JCROP_NEG,
+        JCROP_FORCE
+} JCROP_CODE;
+
+/*
+ * Transform parameters struct.
+ * NB: application must not change any elements of this struct after
+ * calling jtransform_request_workspace.
+ */
+
+typedef struct {
+  /* Options: set by caller */
+  JXFORM_CODE transform;	/* image transform operator */
+  boolean perfect;		/* if TRUE, fail if partial MCUs are requested */
+  boolean trim;			/* if TRUE, trim partial MCUs as needed */
+  boolean force_grayscale;	/* if TRUE, convert color image to grayscale */
+  boolean crop;			/* if TRUE, crop source image */
+
+  /* Crop parameters: application need not set these unless crop is TRUE.
+   * These can be filled in by jtransform_parse_crop_spec().
+   */
+  JDIMENSION crop_width;	/* Width of selected region */
+  JCROP_CODE crop_width_set;	/* (forced disables adjustment) */
+  JDIMENSION crop_height;	/* Height of selected region */
+  JCROP_CODE crop_height_set;	/* (forced disables adjustment) */
+  JDIMENSION crop_xoffset;	/* X offset of selected region */
+  JCROP_CODE crop_xoffset_set;	/* (negative measures from right edge) */
+  JDIMENSION crop_yoffset;	/* Y offset of selected region */
+  JCROP_CODE crop_yoffset_set;	/* (negative measures from bottom edge) */
+
+  /* Internal workspace: caller should not touch these */
+  int num_components;		/* # of components in workspace */
+  jvirt_barray_ptr * workspace_coef_arrays; /* workspace for transformations */
+  JDIMENSION output_width;	/* cropped destination dimensions */
+  JDIMENSION output_height;
+  JDIMENSION x_crop_offset;	/* destination crop offsets measured in iMCUs */
+  JDIMENSION y_crop_offset;
+  int iMCU_sample_width;	/* destination iMCU size */
+  int iMCU_sample_height;
+} jpeg_transform_info;
+
+
+#if TRANSFORMS_SUPPORTED
+
+/* Parse a crop specification (written in X11 geometry style) */
+EXTERN(boolean) jtransform_parse_crop_spec
+	JPP((jpeg_transform_info *info, const char *spec));
+/* Request any required workspace */
+EXTERN(boolean) jtransform_request_workspace
+	JPP((j_decompress_ptr srcinfo, jpeg_transform_info *info));
+/* Adjust output image parameters */
+EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters
+	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	     jvirt_barray_ptr *src_coef_arrays,
+	     jpeg_transform_info *info));
+/* Execute the actual transformation, if any */
+EXTERN(void) jtransform_execute_transform
+	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	     jvirt_barray_ptr *src_coef_arrays,
+	     jpeg_transform_info *info));
+/* Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ */
+EXTERN(boolean) jtransform_perfect_transform
+	JPP((JDIMENSION image_width, JDIMENSION image_height,
+	     int MCU_width, int MCU_height,
+	     JXFORM_CODE transform));
+
+/* jtransform_execute_transform used to be called
+ * jtransform_execute_transformation, but some compilers complain about
+ * routine names that long.  This macro is here to avoid breaking any
+ * old source code that uses the original name...
+ */
+#define jtransform_execute_transformation	jtransform_execute_transform
+
+#endif /* TRANSFORMS_SUPPORTED */
+
+
+/*
+ * Support for copying optional markers from source to destination file.
+ */
+
+typedef enum {
+	JCOPYOPT_NONE,		/* copy no optional markers */
+	JCOPYOPT_COMMENTS,	/* copy only comment (COM) markers */
+	JCOPYOPT_ALL		/* copy all optional markers */
+} JCOPY_OPTION;
+
+#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS	/* recommended default */
+
+/* Setup decompression object to save desired markers in memory */
+EXTERN(void) jcopy_markers_setup
+	JPP((j_decompress_ptr srcinfo, JCOPY_OPTION option));
+/* Copy markers saved in the given source object to the destination object */
+EXTERN(void) jcopy_markers_execute
+	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	     JCOPY_OPTION option));
diff --git a/plugins/AdvaImg/src/LibJPEG/usage.txt b/plugins/AdvaImg/src/LibJPEG/usage.txt
index c91ddff288..c8ea77cd26 100644
--- a/plugins/AdvaImg/src/LibJPEG/usage.txt
+++ b/plugins/AdvaImg/src/LibJPEG/usage.txt
@@ -80,8 +80,9 @@ The basic command line switches for cjpeg are:
 	-rgb		Create RGB JPEG file.
 			Using this switch suppresses the conversion from RGB
 			colorspace input to the default YCbCr JPEG colorspace.
-			Use this switch in combination with the -block N
-			switch (see below) for lossless JPEG coding.
+			You can use this switch in combination with the
+			-block N switch (see below) for lossless JPEG coding.
+			See also the -rgb1 switch below.
 
 	-optimize	Perform optimization of entropy encoding parameters.
 			Without this, default encoding parameters are used.
@@ -175,6 +176,18 @@ Switches for advanced users:
 			decoders will be unable to view a SmartScale extended
 			JPEG file at all.
 
+	-rgb1		Create RGB JPEG file with reversible color transform.
+			Works like the -rgb switch (see above) and inserts a
+			simple reversible color transform into the processing
+			which significantly improves the compression.
+			Use this switch in combination with the -block N
+			switch (see above) for lossless JPEG coding.
+			CAUTION: A decoder with inverse color transform
+			support is required for this feature.  Reversible
+			color transform support is not yet widely implemented,
+			so many decoders will be unable to view a reversible
+			color transformed JPEG file at all.
+
 	-dct int	Use integer DCT method (default).
 	-dct fast	Use fast integer DCT (less accurate).
 	-dct float	Use floating-point DCT method.
author	Kirill Volinsky <mataes2007@gmail.com>	2013-08-26 14:14:54 +0000
committer	Kirill Volinsky <mataes2007@gmail.com>	2013-08-26 14:14:54 +0000
commit	4906fb47f340bea7d2ba551364d1a5d8d0473861 (patch)
tree	1ca2cbed515675150be45f06f0cbb779683ca626
parent	af85ed40e26a345cc15ba7f310c92a62116865d1 (diff)